]> asedeno.scripts.mit.edu Git - linux.git/commitdiff
Merge ath-next from git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/ath.git
authorKalle Valo <kvalo@codeaurora.org>
Wed, 23 May 2018 07:42:08 +0000 (10:42 +0300)
committerKalle Valo <kvalo@codeaurora.org>
Wed, 23 May 2018 07:42:08 +0000 (10:42 +0300)
ath.git patches for 4.18. Major changes:

ath10k

* add quiet mode support for QCA6174/QCA9377

wil6210

* disable WIL6210_TRACING kconfig option by default

2312 files changed:
Documentation/ABI/testing/sysfs-class-cxl
Documentation/admin-guide/pm/intel_pstate.rst
Documentation/admin-guide/pm/sleep-states.rst
Documentation/bpf/README.rst [new file with mode: 0644]
Documentation/bpf/bpf_design_QA.rst [new file with mode: 0644]
Documentation/bpf/bpf_design_QA.txt [deleted file]
Documentation/bpf/bpf_devel_QA.rst [new file with mode: 0644]
Documentation/bpf/bpf_devel_QA.txt [deleted file]
Documentation/device-mapper/thin-provisioning.txt
Documentation/devicetree/bindings/ata/ahci-platform.txt
Documentation/devicetree/bindings/display/panel/panel-common.txt
Documentation/devicetree/bindings/dma/renesas,rcar-dmac.txt
Documentation/devicetree/bindings/input/atmel,maxtouch.txt
Documentation/devicetree/bindings/net/can/rcar_canfd.txt
Documentation/devicetree/bindings/net/dsa/dsa.txt
Documentation/devicetree/bindings/net/dwmac-sun8i.txt
Documentation/devicetree/bindings/net/marvell-pp2.txt
Documentation/devicetree/bindings/net/meson-dwmac.txt
Documentation/devicetree/bindings/net/micrel-ksz90x1.txt
Documentation/devicetree/bindings/net/mscc-miim.txt [new file with mode: 0644]
Documentation/devicetree/bindings/net/mscc-ocelot.txt [new file with mode: 0644]
Documentation/devicetree/bindings/net/qualcomm-bluetooth.txt [new file with mode: 0644]
Documentation/devicetree/bindings/net/renesas,ravb.txt
Documentation/devicetree/bindings/net/sh_eth.txt
Documentation/devicetree/bindings/pinctrl/allwinner,sunxi-pinctrl.txt
Documentation/devicetree/bindings/serial/amlogic,meson-uart.txt
Documentation/devicetree/bindings/serial/mvebu-uart.txt
Documentation/devicetree/bindings/serial/renesas,sci-serial.txt
Documentation/devicetree/bindings/usb/usb-xhci.txt
Documentation/devicetree/bindings/vendor-prefixes.txt
Documentation/devicetree/overlay-notes.txt
Documentation/doc-guide/parse-headers.rst
Documentation/driver-api/firmware/request_firmware.rst
Documentation/driver-api/infrastructure.rst
Documentation/driver-api/usb/typec.rst
Documentation/filesystems/nfs/nfsroot.txt
Documentation/i2c/dev-interface
Documentation/ioctl/ioctl-number.txt
Documentation/media/uapi/rc/keytable.c.rst
Documentation/media/uapi/v4l/v4l2grab.c.rst
Documentation/networking/af_xdp.rst [new file with mode: 0644]
Documentation/networking/bonding.txt
Documentation/networking/filter.txt
Documentation/networking/index.rst
Documentation/networking/ip-sysctl.txt
Documentation/networking/netdev-features.txt
Documentation/power/suspend-and-cpuhotplug.txt
Documentation/process/magic-number.rst
Documentation/sphinx/parse-headers.pl
Documentation/sysctl/net.txt
Documentation/trace/ftrace.rst
Documentation/translations/zh_CN/video4linux/v4l2-framework.txt
Documentation/virtual/kvm/api.txt
Documentation/virtual/kvm/arm/psci.txt [new file with mode: 0644]
Documentation/virtual/kvm/cpuid.txt
MAINTAINERS
Makefile
arch/Kconfig
arch/arm/boot/compressed/Makefile
arch/arm/boot/compressed/head.S
arch/arm/boot/dts/bcm-cygnus.dtsi
arch/arm/boot/dts/da850-lcdk.dts
arch/arm/boot/dts/da850.dtsi
arch/arm/boot/dts/dm8148-evm.dts
arch/arm/boot/dts/dm8148-t410.dts
arch/arm/boot/dts/dm8168-evm.dts
arch/arm/boot/dts/dra62x-j5eco-evm.dts
arch/arm/boot/dts/gemini-nas4220b.dts
arch/arm/boot/dts/imx35.dtsi
arch/arm/boot/dts/imx51-zii-rdu1.dts
arch/arm/boot/dts/imx53.dtsi
arch/arm/boot/dts/imx7s.dtsi
arch/arm/boot/dts/logicpd-som-lv.dtsi
arch/arm/boot/dts/omap4.dtsi
arch/arm/boot/dts/r8a7790-lager.dts
arch/arm/boot/dts/r8a7790.dtsi
arch/arm/boot/dts/r8a7791-koelsch.dts
arch/arm/boot/dts/r8a7791-porter.dts
arch/arm/boot/dts/r8a7791.dtsi
arch/arm/boot/dts/r8a7793-gose.dts
arch/arm/boot/dts/r8a7793.dtsi
arch/arm/boot/dts/tegra20.dtsi
arch/arm/configs/gemini_defconfig
arch/arm/configs/socfpga_defconfig
arch/arm/include/asm/assembler.h
arch/arm/include/asm/kvm_host.h
arch/arm/include/asm/kvm_mmu.h
arch/arm/include/uapi/asm/kvm.h
arch/arm/include/uapi/asm/siginfo.h [deleted file]
arch/arm/kernel/machine_kexec.c
arch/arm/kernel/traps.c
arch/arm/kvm/guest.c
arch/arm/lib/getuser.S
arch/arm/mach-davinci/board-da830-evm.c
arch/arm/mach-davinci/board-da850-evm.c
arch/arm/mach-davinci/board-dm355-evm.c
arch/arm/mach-davinci/board-dm644x-evm.c
arch/arm/mach-davinci/board-dm646x-evm.c
arch/arm/mach-davinci/board-omapl138-hawk.c
arch/arm/mach-davinci/dm646x.c
arch/arm/mach-keystone/pm_domain.c
arch/arm/mach-omap1/ams-delta-fiq.c
arch/arm/mach-omap2/Makefile
arch/arm/mach-omap2/pm-asm-offsets.c
arch/arm/mach-omap2/powerdomain.c
arch/arm/mach-omap2/sleep33xx.S
arch/arm/mach-omap2/sleep43xx.S
arch/arm/mach-s3c24xx/mach-jive.c
arch/arm/net/bpf_jit_32.c
arch/arm/probes/kprobes/opt-arm.c
arch/arm/vfp/vfpmodule.c
arch/arm64/Makefile
arch/arm64/boot/dts/amlogic/meson-gx-p23x-q20x.dtsi
arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts
arch/arm64/boot/dts/amlogic/meson-gxl-s905x-nexbox-a95x.dts
arch/arm64/boot/dts/amlogic/meson-gxl-s905x-p212.dtsi
arch/arm64/boot/dts/amlogic/meson-gxl.dtsi
arch/arm64/boot/dts/amlogic/meson-gxm-khadas-vim2.dts
arch/arm64/boot/dts/amlogic/meson-gxm.dtsi
arch/arm64/boot/dts/arm/juno-motherboard.dtsi
arch/arm64/boot/dts/broadcom/stingray/stingray-sata.dtsi
arch/arm64/boot/dts/exynos/exynos5433.dtsi
arch/arm64/boot/dts/marvell/armada-cp110.dtsi
arch/arm64/boot/dts/nvidia/tegra186-p3310.dtsi
arch/arm64/boot/dts/qcom/apq8096-db820c-pins.dtsi
arch/arm64/boot/dts/qcom/apq8096-db820c-pmic-pins.dtsi
arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi
arch/arm64/boot/dts/qcom/msm8996.dtsi
arch/arm64/boot/dts/socionext/uniphier-ld11.dtsi
arch/arm64/boot/dts/socionext/uniphier-ld20-ref.dts
arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi
arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi
arch/arm64/include/asm/cputype.h
arch/arm64/include/asm/kvm_emulate.h
arch/arm64/include/asm/kvm_host.h
arch/arm64/include/asm/kvm_mmu.h
arch/arm64/include/asm/module.h
arch/arm64/include/asm/pgtable.h
arch/arm64/include/uapi/asm/kvm.h
arch/arm64/kernel/cpu_errata.c
arch/arm64/kernel/cpufeature.c
arch/arm64/kernel/module-plts.c
arch/arm64/kernel/module.c
arch/arm64/kernel/ptrace.c
arch/arm64/kernel/traps.c
arch/arm64/kvm/guest.c
arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c
arch/arm64/kvm/sys_regs.c
arch/arm64/lib/Makefile
arch/arm64/mm/flush.c
arch/arm64/mm/init.c
arch/arm64/mm/kasan_init.c
arch/arm64/net/bpf_jit_comp.c
arch/hexagon/include/asm/io.h
arch/hexagon/lib/checksum.c
arch/mips/boot/compressed/uart-16550.c
arch/mips/boot/dts/xilfpga/Makefile
arch/mips/generic/Platform
arch/mips/kernel/ptrace.c
arch/mips/kernel/ptrace32.c
arch/mips/kvm/mips.c
arch/mips/mm/c-r4k.c
arch/mips/net/ebpf_jit.c
arch/parisc/Makefile
arch/parisc/kernel/drivers.c
arch/parisc/kernel/pci.c
arch/parisc/kernel/smp.c
arch/parisc/kernel/time.c
arch/parisc/kernel/traps.c
arch/parisc/mm/init.c
arch/powerpc/include/asm/ftrace.h
arch/powerpc/include/asm/paca.h
arch/powerpc/include/asm/powernv.h
arch/powerpc/include/asm/topology.h
arch/powerpc/kernel/mce_power.c
arch/powerpc/kernel/smp.c
arch/powerpc/kvm/booke.c
arch/powerpc/mm/mem.c
arch/powerpc/net/Makefile
arch/powerpc/net/bpf_jit64.h
arch/powerpc/net/bpf_jit_asm64.S [deleted file]
arch/powerpc/net/bpf_jit_comp64.c
arch/powerpc/platforms/cell/spufs/sched.c
arch/powerpc/platforms/powernv/memtrace.c
arch/powerpc/platforms/powernv/npu-dma.c
arch/powerpc/platforms/powernv/opal-nvram.c
arch/powerpc/platforms/powernv/opal-rtc.c
arch/riscv/Kconfig
arch/riscv/include/asm/Kbuild
arch/riscv/kernel/vdso/Makefile
arch/s390/configs/debug_defconfig
arch/s390/configs/performance_defconfig
arch/s390/crypto/crc32be-vx.S
arch/s390/crypto/crc32le-vx.S
arch/s390/include/asm/nospec-insn.h [new file with mode: 0644]
arch/s390/include/asm/purgatory.h
arch/s390/include/asm/thread_info.h
arch/s390/kernel/Makefile
arch/s390/kernel/asm-offsets.c
arch/s390/kernel/base.S
arch/s390/kernel/entry.S
arch/s390/kernel/irq.c
arch/s390/kernel/mcount.S
arch/s390/kernel/module.c
arch/s390/kernel/nospec-branch.c
arch/s390/kernel/nospec-sysfs.c [new file with mode: 0644]
arch/s390/kernel/perf_cpum_cf_events.c
arch/s390/kernel/perf_cpum_sf.c
arch/s390/kernel/process.c
arch/s390/kernel/reipl.S
arch/s390/kernel/swsusp.S
arch/s390/kernel/uprobes.c
arch/s390/lib/mem.S
arch/s390/net/Makefile
arch/s390/net/bpf_jit.S [deleted file]
arch/s390/net/bpf_jit.h
arch/s390/net/bpf_jit_comp.c
arch/sh/Kconfig
arch/sh/kernel/cpu/sh2/probe.c
arch/sh/kernel/setup.c
arch/sh/mm/consistent.c
arch/sh/mm/init.c
arch/sh/mm/numa.c
arch/sparc/include/uapi/asm/oradax.h
arch/sparc/kernel/vio.c
arch/sparc/net/Makefile
arch/sparc/net/bpf_jit_64.h
arch/sparc/net/bpf_jit_asm_64.S [deleted file]
arch/sparc/net/bpf_jit_comp_64.c
arch/x86/Kconfig
arch/x86/boot/compressed/eboot.c
arch/x86/boot/compressed/head_64.S
arch/x86/boot/compressed/pgtable_64.c
arch/x86/entry/entry_64_compat.S
arch/x86/entry/vdso/vdso32/vdso-fakesections.c [deleted file]
arch/x86/events/core.c
arch/x86/events/intel/core.c
arch/x86/events/intel/cstate.c
arch/x86/events/intel/uncore_snbep.c
arch/x86/events/msr.c
arch/x86/include/asm/asm.h
arch/x86/include/asm/cpufeature.h
arch/x86/include/asm/cpufeatures.h
arch/x86/include/asm/ftrace.h
arch/x86/include/asm/insn.h
arch/x86/include/asm/irq_vectors.h
arch/x86/include/asm/jailhouse_para.h
arch/x86/include/asm/mmu_context.h
arch/x86/include/asm/nospec-branch.h
arch/x86/include/asm/pgtable.h
arch/x86/include/asm/pgtable_64_types.h
arch/x86/include/asm/pkeys.h
arch/x86/include/asm/processor.h
arch/x86/include/uapi/asm/kvm_para.h
arch/x86/include/uapi/asm/msgbuf.h
arch/x86/include/uapi/asm/shmbuf.h
arch/x86/kernel/acpi/boot.c
arch/x86/kernel/amd_nb.c
arch/x86/kernel/apic/x2apic_cluster.c
arch/x86/kernel/cpu/common.c
arch/x86/kernel/cpu/intel.c
arch/x86/kernel/cpu/mcheck/mce_amd.c
arch/x86/kernel/cpu/microcode/core.c
arch/x86/kernel/cpu/microcode/intel.c
arch/x86/kernel/head64.c
arch/x86/kernel/jailhouse.c
arch/x86/kernel/kexec-bzimage64.c
arch/x86/kernel/kprobes/core.c
arch/x86/kernel/kvm.c
arch/x86/kernel/machine_kexec_32.c
arch/x86/kernel/machine_kexec_64.c
arch/x86/kernel/pci-nommu.c [deleted file]
arch/x86/kernel/process_64.c
arch/x86/kernel/setup.c
arch/x86/kernel/smpboot.c
arch/x86/kernel/tsc.c
arch/x86/kernel/uprobes.c
arch/x86/kvm/hyperv.c
arch/x86/kvm/lapic.c
arch/x86/kvm/vmx.c
arch/x86/kvm/x86.c
arch/x86/kvm/x86.h
arch/x86/mm/dump_pagetables.c
arch/x86/mm/pageattr.c
arch/x86/mm/pkeys.c
arch/x86/mm/pti.c
arch/x86/net/Makefile
arch/x86/net/bpf_jit.S [deleted file]
arch/x86/net/bpf_jit_comp.c
arch/x86/net/bpf_jit_comp32.c [new file with mode: 0644]
arch/x86/power/hibernate_64.c
arch/x86/xen/enlighten_hvm.c
arch/x86/xen/enlighten_pv.c
arch/x86/xen/mmu.c
arch/x86/xen/mmu_pv.c
block/bfq-iosched.c
block/blk-cgroup.c
block/blk-core.c
block/blk-mq.c
block/blk-mq.h
block/genhd.c
block/partition-generic.c
crypto/api.c
crypto/drbg.c
drivers/acpi/acpi_video.c
drivers/acpi/acpi_watchdog.c
drivers/acpi/acpica/acnamesp.h
drivers/acpi/acpica/exconfig.c
drivers/acpi/acpica/nsinit.c
drivers/acpi/button.c
drivers/acpi/scan.c
drivers/acpi/sleep.c
drivers/amba/bus.c
drivers/android/binder.c
drivers/ata/ahci.c
drivers/ata/ahci.h
drivers/ata/ahci_mvebu.c
drivers/ata/ahci_qoriq.c
drivers/ata/ahci_xgene.c
drivers/ata/libahci.c
drivers/ata/libahci_platform.c
drivers/ata/libata-core.c
drivers/ata/libata-eh.c
drivers/ata/sata_highbank.c
drivers/ata/sata_sil24.c
drivers/atm/firestream.c
drivers/atm/zatm.c
drivers/base/dma-coherent.c
drivers/base/dma-mapping.c
drivers/base/firmware_loader/fallback.c
drivers/base/firmware_loader/fallback.h
drivers/block/loop.c
drivers/block/loop.h
drivers/block/rbd.c
drivers/block/swim.c
drivers/block/swim3.c
drivers/bluetooth/Kconfig
drivers/bluetooth/btbcm.c
drivers/bluetooth/btbcm.h
drivers/bluetooth/btqca.c
drivers/bluetooth/btqca.h
drivers/bluetooth/btqcomsmd.c
drivers/bluetooth/btusb.c
drivers/bluetooth/hci_bcm.c
drivers/bluetooth/hci_ldisc.c
drivers/bluetooth/hci_qca.c
drivers/bus/Kconfig
drivers/cdrom/cdrom.c
drivers/char/agp/uninorth-agp.c
drivers/char/random.c
drivers/char/virtio_console.c
drivers/clk/Kconfig
drivers/clk/clk-cs2000-cp.c
drivers/clk/clk-mux.c
drivers/clk/clk-stm32mp1.c
drivers/clk/clk.c
drivers/clk/imx/clk-imx6ul.c
drivers/clk/meson/clk-regmap.c
drivers/clk/meson/gxbb-aoclk.h
drivers/clk/meson/meson8b.c
drivers/clocksource/timer-imx-tpm.c
drivers/connector/cn_proc.c
drivers/cpufreq/Kconfig.arm
drivers/cpufreq/brcmstb-avs-cpufreq.c
drivers/cpufreq/cppc_cpufreq.c
drivers/cpufreq/powernv-cpufreq.c
drivers/dax/device.c
drivers/dma/qcom/bam_dma.c
drivers/firmware/arm_scmi/clock.c
drivers/firmware/arm_scmi/driver.c
drivers/firmware/efi/libstub/arm64-stub.c
drivers/fpga/altera-ps-spi.c
drivers/gpio/gpio-aspeed.c
drivers/gpio/gpio-pci-idio-16.c
drivers/gpio/gpio-pcie-idio-24.c
drivers/gpio/gpiolib.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
drivers/gpu/drm/amd/amdkfd/Kconfig
drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c
drivers/gpu/drm/amd/display/dc/core/dc_surface.c
drivers/gpu/drm/amd/display/dc/inc/dc_link_ddc.h
drivers/gpu/drm/amd/display/include/dal_asic_id.h
drivers/gpu/drm/amd/display/modules/color/color_gamma.c
drivers/gpu/drm/amd/include/atomfirmware.h
drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.h
drivers/gpu/drm/amd/powerplay/hwmgr/smu7_powertune.c
drivers/gpu/drm/amd/powerplay/inc/vega12/smu9_driver_if.h
drivers/gpu/drm/bridge/Kconfig
drivers/gpu/drm/bridge/dumb-vga-dac.c
drivers/gpu/drm/drm_atomic.c
drivers/gpu/drm/drm_dp_dual_mode_helper.c
drivers/gpu/drm/drm_drv.c
drivers/gpu/drm/drm_dumb_buffers.c
drivers/gpu/drm/drm_edid.c
drivers/gpu/drm/drm_file.c
drivers/gpu/drm/exynos/exynos_drm_fb.c
drivers/gpu/drm/exynos/exynos_hdmi.c
drivers/gpu/drm/exynos/exynos_mixer.c
drivers/gpu/drm/exynos/regs-mixer.h
drivers/gpu/drm/i915/gvt/cmd_parser.c
drivers/gpu/drm/i915/gvt/display.c
drivers/gpu/drm/i915/gvt/dmabuf.c
drivers/gpu/drm/i915/gvt/fb_decoder.c
drivers/gpu/drm/i915/gvt/gtt.c
drivers/gpu/drm/i915/gvt/gtt.h
drivers/gpu/drm/i915/gvt/handlers.c
drivers/gpu/drm/i915/gvt/kvmgt.c
drivers/gpu/drm/i915/i915_drv.c
drivers/gpu/drm/i915/i915_gem_execbuffer.c
drivers/gpu/drm/i915/i915_gem_userptr.c
drivers/gpu/drm/i915/i915_pmu.c
drivers/gpu/drm/i915/i915_reg.h
drivers/gpu/drm/i915/intel_audio.c
drivers/gpu/drm/i915/intel_bios.c
drivers/gpu/drm/i915/intel_cdclk.c
drivers/gpu/drm/i915/intel_csr.c
drivers/gpu/drm/i915/intel_display.c
drivers/gpu/drm/i915/intel_dp.c
drivers/gpu/drm/i915/intel_drv.h
drivers/gpu/drm/i915/intel_engine_cs.c
drivers/gpu/drm/i915/intel_fbdev.c
drivers/gpu/drm/i915/intel_lrc.c
drivers/gpu/drm/i915/intel_lvds.c
drivers/gpu/drm/i915/intel_runtime_pm.c
drivers/gpu/drm/msm/disp/mdp4/mdp4_crtc.c
drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c
drivers/gpu/drm/msm/disp/mdp_format.c
drivers/gpu/drm/msm/disp/mdp_kms.h
drivers/gpu/drm/msm/dsi/dsi_host.c
drivers/gpu/drm/msm/dsi/phy/dsi_phy.c
drivers/gpu/drm/msm/dsi/phy/dsi_phy.h
drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c
drivers/gpu/drm/msm/msm_fb.c
drivers/gpu/drm/msm/msm_fbdev.c
drivers/gpu/drm/msm/msm_gem.c
drivers/gpu/drm/msm/msm_kms.h
drivers/gpu/drm/nouveau/nouveau_bo.c
drivers/gpu/drm/nouveau/nouveau_bo.h
drivers/gpu/drm/nouveau/nouveau_ttm.c
drivers/gpu/drm/nouveau/nv50_display.c
drivers/gpu/drm/omapdrm/dss/dispc.c
drivers/gpu/drm/omapdrm/dss/hdmi4.c
drivers/gpu/drm/omapdrm/dss/hdmi4_core.c
drivers/gpu/drm/omapdrm/dss/hdmi5.c
drivers/gpu/drm/omapdrm/omap_connector.c
drivers/gpu/drm/omapdrm/omap_dmm_tiler.c
drivers/gpu/drm/omapdrm/tcm-sita.c
drivers/gpu/drm/qxl/qxl_cmd.c
drivers/gpu/drm/qxl/qxl_drv.h
drivers/gpu/drm/qxl/qxl_ioctl.c
drivers/gpu/drm/qxl/qxl_release.c
drivers/gpu/drm/sun4i/sun4i_lvds.c
drivers/gpu/drm/ttm/ttm_page_alloc.c
drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
drivers/gpu/drm/vc4/vc4_bo.c
drivers/gpu/drm/vc4/vc4_crtc.c
drivers/gpu/drm/vc4/vc4_dpi.c
drivers/gpu/drm/vc4/vc4_drv.c
drivers/gpu/drm/vc4/vc4_plane.c
drivers/gpu/drm/vc4/vc4_validate_shaders.c
drivers/gpu/drm/virtio/virtgpu_vq.c
drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
drivers/hid/Kconfig
drivers/hid/hid-ids.h
drivers/hid/hid-lenovo.c
drivers/hid/i2c-hid/i2c-hid.c
drivers/hid/intel-ish-hid/ishtp-hid-client.c
drivers/hid/intel-ish-hid/ishtp/bus.c
drivers/hid/wacom_sys.c
drivers/hwmon/Kconfig
drivers/hwmon/k10temp.c
drivers/hwmon/nct6683.c
drivers/hwmon/scmi-hwmon.c
drivers/i2c/busses/Kconfig
drivers/i2c/busses/i2c-designware-master.c
drivers/i2c/busses/i2c-pmcmsp.c
drivers/i2c/busses/i2c-sprd.c
drivers/i2c/busses/i2c-viperboard.c
drivers/i2c/i2c-core-acpi.c
drivers/i2c/i2c-dev.c
drivers/infiniband/Kconfig
drivers/infiniband/core/cache.c
drivers/infiniband/core/cma.c
drivers/infiniband/core/iwpm_util.c
drivers/infiniband/core/mad.c
drivers/infiniband/core/roce_gid_mgmt.c
drivers/infiniband/core/ucma.c
drivers/infiniband/core/uverbs_cmd.c
drivers/infiniband/core/uverbs_ioctl.c
drivers/infiniband/core/uverbs_std_types_flow_action.c
drivers/infiniband/core/verbs.c
drivers/infiniband/hw/cxgb4/cq.c
drivers/infiniband/hw/cxgb4/device.c
drivers/infiniband/hw/cxgb4/iw_cxgb4.h
drivers/infiniband/hw/cxgb4/qp.c
drivers/infiniband/hw/cxgb4/resource.c
drivers/infiniband/hw/hfi1/affinity.c
drivers/infiniband/hw/hfi1/driver.c
drivers/infiniband/hw/hfi1/hfi.h
drivers/infiniband/hw/hfi1/init.c
drivers/infiniband/hw/hfi1/pcie.c
drivers/infiniband/hw/hfi1/platform.c
drivers/infiniband/hw/hfi1/qsfp.c
drivers/infiniband/hw/hfi1/ruc.c
drivers/infiniband/hw/hfi1/ud.c
drivers/infiniband/hw/hns/hns_roce_hem.c
drivers/infiniband/hw/hns/hns_roce_hw_v2.c
drivers/infiniband/hw/hns/hns_roce_qp.c
drivers/infiniband/hw/mlx4/mr.c
drivers/infiniband/hw/mlx4/qp.c
drivers/infiniband/hw/mlx5/Kconfig
drivers/infiniband/hw/mlx5/cq.c
drivers/infiniband/hw/mlx5/main.c
drivers/infiniband/hw/mlx5/mr.c
drivers/infiniband/hw/mlx5/qp.c
drivers/infiniband/hw/nes/nes_nic.c
drivers/infiniband/sw/rxe/rxe_opcode.c
drivers/infiniband/sw/rxe/rxe_req.c
drivers/infiniband/sw/rxe/rxe_resp.c
drivers/infiniband/ulp/ipoib/ipoib_main.c
drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c
drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h
drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c
drivers/infiniband/ulp/srp/Kconfig
drivers/infiniband/ulp/srpt/Kconfig
drivers/input/evdev.c
drivers/input/input-leds.c
drivers/input/mouse/alps.c
drivers/input/rmi4/rmi_spi.c
drivers/input/touchscreen/Kconfig
drivers/input/touchscreen/atmel_mxt_ts.c
drivers/iommu/amd_iommu.c
drivers/iommu/dma-iommu.c
drivers/iommu/dmar.c
drivers/iommu/intel_irq_remapping.c
drivers/iommu/rockchip-iommu.c
drivers/irqchip/qcom-irq-combiner.c
drivers/md/bcache/alloc.c
drivers/md/bcache/bcache.h
drivers/md/bcache/debug.c
drivers/md/bcache/io.c
drivers/md/bcache/request.c
drivers/md/bcache/super.c
drivers/md/bcache/writeback.c
drivers/md/dm-bufio.c
drivers/md/dm-cache-background-tracker.c
drivers/md/dm-integrity.c
drivers/md/dm-raid1.c
drivers/md/dm.c
drivers/media/i2c/saa7115.c
drivers/media/i2c/saa711x_regs.h
drivers/media/i2c/tda7432.c
drivers/media/i2c/tvp5150.c
drivers/media/i2c/tvp5150_reg.h
drivers/media/i2c/tvp7002.c
drivers/media/i2c/tvp7002_reg.h
drivers/media/media-devnode.c
drivers/media/pci/bt8xx/bttv-audio-hook.c
drivers/media/pci/bt8xx/bttv-audio-hook.h
drivers/media/pci/bt8xx/bttv-cards.c
drivers/media/pci/bt8xx/bttv-driver.c
drivers/media/pci/bt8xx/bttv-i2c.c
drivers/media/pci/cx23885/cx23885-input.c
drivers/media/pci/cx88/cx88-alsa.c
drivers/media/pci/cx88/cx88-blackbird.c
drivers/media/pci/cx88/cx88-core.c
drivers/media/pci/cx88/cx88-i2c.c
drivers/media/pci/cx88/cx88-video.c
drivers/media/radio/radio-aimslab.c
drivers/media/radio/radio-aztech.c
drivers/media/radio/radio-gemtek.c
drivers/media/radio/radio-maxiradio.c
drivers/media/radio/radio-rtrack2.c
drivers/media/radio/radio-sf16fmi.c
drivers/media/radio/radio-terratec.c
drivers/media/radio/radio-trust.c
drivers/media/radio/radio-typhoon.c
drivers/media/radio/radio-zoltrix.c
drivers/media/rc/keymaps/rc-avermedia-m135a.c
drivers/media/rc/keymaps/rc-encore-enltv-fm53.c
drivers/media/rc/keymaps/rc-encore-enltv2.c
drivers/media/rc/keymaps/rc-kaiomy.c
drivers/media/rc/keymaps/rc-kworld-plus-tv-analog.c
drivers/media/rc/keymaps/rc-pixelview-new.c
drivers/media/tuners/tea5761.c
drivers/media/tuners/tea5767.c
drivers/media/tuners/tuner-xc2028-types.h
drivers/media/tuners/tuner-xc2028.c
drivers/media/tuners/tuner-xc2028.h
drivers/media/usb/em28xx/em28xx-camera.c
drivers/media/usb/em28xx/em28xx-cards.c
drivers/media/usb/em28xx/em28xx-core.c
drivers/media/usb/em28xx/em28xx-dvb.c
drivers/media/usb/em28xx/em28xx-i2c.c
drivers/media/usb/em28xx/em28xx-input.c
drivers/media/usb/em28xx/em28xx-video.c
drivers/media/usb/em28xx/em28xx.h
drivers/media/usb/gspca/zc3xx-reg.h
drivers/media/usb/tm6000/tm6000-cards.c
drivers/media/usb/tm6000/tm6000-core.c
drivers/media/usb/tm6000/tm6000-i2c.c
drivers/media/usb/tm6000/tm6000-regs.h
drivers/media/usb/tm6000/tm6000-usb-isoc.h
drivers/media/usb/tm6000/tm6000-video.c
drivers/media/usb/tm6000/tm6000.h
drivers/media/v4l2-core/v4l2-dev.c
drivers/media/v4l2-core/v4l2-ioctl.c
drivers/media/v4l2-core/videobuf-core.c
drivers/media/v4l2-core/videobuf-dma-contig.c
drivers/media/v4l2-core/videobuf-dma-sg.c
drivers/media/v4l2-core/videobuf-vmalloc.c
drivers/memory/emif-asm-offsets.c
drivers/message/fusion/mptsas.c
drivers/misc/cxl/cxl.h
drivers/misc/cxl/pci.c
drivers/misc/cxl/sysfs.c
drivers/misc/eeprom/at24.c
drivers/mtd/chips/cfi_cmdset_0001.c
drivers/mtd/chips/cfi_cmdset_0002.c
drivers/mtd/nand/core.c
drivers/mtd/nand/onenand/omap2.c
drivers/mtd/nand/raw/marvell_nand.c
drivers/mtd/nand/raw/nand_base.c
drivers/mtd/nand/raw/tango_nand.c
drivers/mtd/spi-nor/cadence-quadspi.c
drivers/net/Kconfig
drivers/net/bonding/bond_alb.c
drivers/net/bonding/bond_main.c
drivers/net/bonding/bond_options.c
drivers/net/can/dev.c
drivers/net/can/flexcan.c
drivers/net/can/spi/hi311x.c
drivers/net/can/usb/kvaser_usb.c
drivers/net/dsa/b53/b53_common.c
drivers/net/dsa/b53/b53_priv.h
drivers/net/dsa/bcm_sf2.c
drivers/net/dsa/bcm_sf2_cfp.c
drivers/net/dsa/dsa_loop.c
drivers/net/dsa/lan9303-core.c
drivers/net/dsa/microchip/ksz_common.c
drivers/net/dsa/mt7530.c
drivers/net/dsa/mv88e6xxx/chip.c
drivers/net/dsa/mv88e6xxx/chip.h
drivers/net/dsa/mv88e6xxx/global1.c
drivers/net/dsa/mv88e6xxx/global1.h
drivers/net/dsa/mv88e6xxx/global2.c
drivers/net/dsa/mv88e6xxx/global2.h
drivers/net/dsa/mv88e6xxx/port.c
drivers/net/dsa/mv88e6xxx/port.h
drivers/net/dsa/mv88e6xxx/serdes.c
drivers/net/dsa/mv88e6xxx/serdes.h
drivers/net/dsa/qca8k.c
drivers/net/ethernet/3com/3c59x.c
drivers/net/ethernet/8390/ne.c
drivers/net/ethernet/Kconfig
drivers/net/ethernet/Makefile
drivers/net/ethernet/amd/amd8111e.c
drivers/net/ethernet/amd/xgbe/xgbe-common.h
drivers/net/ethernet/amd/xgbe/xgbe-debugfs.c
drivers/net/ethernet/amd/xgbe/xgbe-main.c
drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
drivers/net/ethernet/amd/xgbe/xgbe-pci.c
drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
drivers/net/ethernet/amd/xgbe/xgbe.h
drivers/net/ethernet/aquantia/atlantic/aq_nic.c
drivers/net/ethernet/aquantia/atlantic/aq_nic.h
drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
drivers/net/ethernet/broadcom/bcmsysport.c
drivers/net/ethernet/broadcom/bnxt/Makefile
drivers/net/ethernet/broadcom/bnxt/bnxt.c
drivers/net/ethernet/broadcom/bnxt/bnxt.h
drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
drivers/net/ethernet/broadcom/bnxt/bnxt_debugfs.c [new file with mode: 0644]
drivers/net/ethernet/broadcom/bnxt/bnxt_debugfs.h [new file with mode: 0644]
drivers/net/ethernet/broadcom/bnxt/bnxt_dim.c
drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h
drivers/net/ethernet/broadcom/genet/bcmgenet.c
drivers/net/ethernet/broadcom/tg3.c
drivers/net/ethernet/cadence/macb_main.c
drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c
drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.h
drivers/net/ethernet/cavium/liquidio/lio_core.c
drivers/net/ethernet/cavium/liquidio/lio_ethtool.c
drivers/net/ethernet/cavium/liquidio/lio_main.c
drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c
drivers/net/ethernet/cavium/liquidio/liquidio_common.h
drivers/net/ethernet/cavium/liquidio/octeon_device.c
drivers/net/ethernet/cavium/liquidio/octeon_device.h
drivers/net/ethernet/cavium/liquidio/octeon_network.h
drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h
drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h
drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c
drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c
drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.h
drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c
drivers/net/ethernet/chelsio/cxgb4/l2t.c
drivers/net/ethernet/chelsio/cxgb4/sge.c
drivers/net/ethernet/chelsio/cxgb4/srq.c
drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h
drivers/net/ethernet/chelsio/cxgb4/t4_regs.h
drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h
drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h
drivers/net/ethernet/ethoc.c
drivers/net/ethernet/freescale/Kconfig
drivers/net/ethernet/freescale/fec.h
drivers/net/ethernet/freescale/fec_main.c
drivers/net/ethernet/freescale/fman/fman_port.c
drivers/net/ethernet/freescale/ucc_geth_ethtool.c
drivers/net/ethernet/hisilicon/hns3/hnae3.c
drivers/net/ethernet/hisilicon/hns3/hnae3.h
drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
drivers/net/ethernet/huawei/hinic/hinic_main.c
drivers/net/ethernet/ibm/ibmvnic.c
drivers/net/ethernet/intel/e100.c
drivers/net/ethernet/intel/e1000/Makefile
drivers/net/ethernet/intel/e1000/e1000.h
drivers/net/ethernet/intel/e1000/e1000_ethtool.c
drivers/net/ethernet/intel/e1000/e1000_hw.c
drivers/net/ethernet/intel/e1000/e1000_hw.h
drivers/net/ethernet/intel/e1000/e1000_main.c
drivers/net/ethernet/intel/e1000/e1000_osdep.h
drivers/net/ethernet/intel/e1000/e1000_param.c
drivers/net/ethernet/intel/e1000e/80003es2lan.c
drivers/net/ethernet/intel/e1000e/80003es2lan.h
drivers/net/ethernet/intel/e1000e/82571.c
drivers/net/ethernet/intel/e1000e/82571.h
drivers/net/ethernet/intel/e1000e/Makefile
drivers/net/ethernet/intel/e1000e/defines.h
drivers/net/ethernet/intel/e1000e/e1000.h
drivers/net/ethernet/intel/e1000e/ethtool.c
drivers/net/ethernet/intel/e1000e/hw.h
drivers/net/ethernet/intel/e1000e/ich8lan.c
drivers/net/ethernet/intel/e1000e/ich8lan.h
drivers/net/ethernet/intel/e1000e/mac.c
drivers/net/ethernet/intel/e1000e/mac.h
drivers/net/ethernet/intel/e1000e/manage.c
drivers/net/ethernet/intel/e1000e/manage.h
drivers/net/ethernet/intel/e1000e/netdev.c
drivers/net/ethernet/intel/e1000e/nvm.c
drivers/net/ethernet/intel/e1000e/nvm.h
drivers/net/ethernet/intel/e1000e/param.c
drivers/net/ethernet/intel/e1000e/phy.c
drivers/net/ethernet/intel/e1000e/phy.h
drivers/net/ethernet/intel/e1000e/ptp.c
drivers/net/ethernet/intel/e1000e/regs.h
drivers/net/ethernet/intel/fm10k/Makefile
drivers/net/ethernet/intel/fm10k/fm10k.h
drivers/net/ethernet/intel/fm10k/fm10k_common.c
drivers/net/ethernet/intel/fm10k/fm10k_common.h
drivers/net/ethernet/intel/fm10k/fm10k_dcbnl.c
drivers/net/ethernet/intel/fm10k/fm10k_debugfs.c
drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c
drivers/net/ethernet/intel/fm10k/fm10k_iov.c
drivers/net/ethernet/intel/fm10k/fm10k_main.c
drivers/net/ethernet/intel/fm10k/fm10k_mbx.c
drivers/net/ethernet/intel/fm10k/fm10k_mbx.h
drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
drivers/net/ethernet/intel/fm10k/fm10k_pci.c
drivers/net/ethernet/intel/fm10k/fm10k_pf.c
drivers/net/ethernet/intel/fm10k/fm10k_pf.h
drivers/net/ethernet/intel/fm10k/fm10k_tlv.c
drivers/net/ethernet/intel/fm10k/fm10k_tlv.h
drivers/net/ethernet/intel/fm10k/fm10k_type.h
drivers/net/ethernet/intel/fm10k/fm10k_vf.c
drivers/net/ethernet/intel/fm10k/fm10k_vf.h
drivers/net/ethernet/intel/i40e/Makefile
drivers/net/ethernet/intel/i40e/i40e.h
drivers/net/ethernet/intel/i40e/i40e_adminq.c
drivers/net/ethernet/intel/i40e/i40e_adminq.h
drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
drivers/net/ethernet/intel/i40e/i40e_alloc.h
drivers/net/ethernet/intel/i40e/i40e_client.c
drivers/net/ethernet/intel/i40e/i40e_client.h
drivers/net/ethernet/intel/i40e/i40e_common.c
drivers/net/ethernet/intel/i40e/i40e_dcb.c
drivers/net/ethernet/intel/i40e/i40e_dcb.h
drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c
drivers/net/ethernet/intel/i40e/i40e_debugfs.c
drivers/net/ethernet/intel/i40e/i40e_devids.h
drivers/net/ethernet/intel/i40e/i40e_diag.c
drivers/net/ethernet/intel/i40e/i40e_diag.h
drivers/net/ethernet/intel/i40e/i40e_ethtool.c
drivers/net/ethernet/intel/i40e/i40e_hmc.c
drivers/net/ethernet/intel/i40e/i40e_hmc.h
drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c
drivers/net/ethernet/intel/i40e/i40e_lan_hmc.h
drivers/net/ethernet/intel/i40e/i40e_main.c
drivers/net/ethernet/intel/i40e/i40e_nvm.c
drivers/net/ethernet/intel/i40e/i40e_osdep.h
drivers/net/ethernet/intel/i40e/i40e_prototype.h
drivers/net/ethernet/intel/i40e/i40e_ptp.c
drivers/net/ethernet/intel/i40e/i40e_register.h
drivers/net/ethernet/intel/i40e/i40e_status.h
drivers/net/ethernet/intel/i40e/i40e_trace.h
drivers/net/ethernet/intel/i40e/i40e_txrx.c
drivers/net/ethernet/intel/i40e/i40e_txrx.h
drivers/net/ethernet/intel/i40e/i40e_type.h
drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
drivers/net/ethernet/intel/i40evf/Makefile
drivers/net/ethernet/intel/i40evf/i40e_adminq.c
drivers/net/ethernet/intel/i40evf/i40e_adminq.h
drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h
drivers/net/ethernet/intel/i40evf/i40e_alloc.h
drivers/net/ethernet/intel/i40evf/i40e_common.c
drivers/net/ethernet/intel/i40evf/i40e_devids.h
drivers/net/ethernet/intel/i40evf/i40e_hmc.h
drivers/net/ethernet/intel/i40evf/i40e_lan_hmc.h
drivers/net/ethernet/intel/i40evf/i40e_osdep.h
drivers/net/ethernet/intel/i40evf/i40e_prototype.h
drivers/net/ethernet/intel/i40evf/i40e_register.h
drivers/net/ethernet/intel/i40evf/i40e_status.h
drivers/net/ethernet/intel/i40evf/i40e_trace.h
drivers/net/ethernet/intel/i40evf/i40e_txrx.c
drivers/net/ethernet/intel/i40evf/i40e_txrx.h
drivers/net/ethernet/intel/i40evf/i40e_type.h
drivers/net/ethernet/intel/i40evf/i40evf.h
drivers/net/ethernet/intel/i40evf/i40evf_client.c
drivers/net/ethernet/intel/i40evf/i40evf_client.h
drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
drivers/net/ethernet/intel/i40evf/i40evf_main.c
drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
drivers/net/ethernet/intel/ice/ice_common.c
drivers/net/ethernet/intel/ice/ice_controlq.c
drivers/net/ethernet/intel/ice/ice_hw_autogen.h
drivers/net/ethernet/intel/ice/ice_main.c
drivers/net/ethernet/intel/ice/ice_nvm.c
drivers/net/ethernet/intel/ice/ice_sched.c
drivers/net/ethernet/intel/igb/Makefile
drivers/net/ethernet/intel/igb/e1000_82575.c
drivers/net/ethernet/intel/igb/e1000_82575.h
drivers/net/ethernet/intel/igb/e1000_defines.h
drivers/net/ethernet/intel/igb/e1000_hw.h
drivers/net/ethernet/intel/igb/e1000_i210.c
drivers/net/ethernet/intel/igb/e1000_i210.h
drivers/net/ethernet/intel/igb/e1000_mac.c
drivers/net/ethernet/intel/igb/e1000_mac.h
drivers/net/ethernet/intel/igb/e1000_mbx.c
drivers/net/ethernet/intel/igb/e1000_mbx.h
drivers/net/ethernet/intel/igb/e1000_nvm.c
drivers/net/ethernet/intel/igb/e1000_nvm.h
drivers/net/ethernet/intel/igb/e1000_phy.c
drivers/net/ethernet/intel/igb/e1000_phy.h
drivers/net/ethernet/intel/igb/e1000_regs.h
drivers/net/ethernet/intel/igb/igb.h
drivers/net/ethernet/intel/igb/igb_ethtool.c
drivers/net/ethernet/intel/igb/igb_hwmon.c
drivers/net/ethernet/intel/igb/igb_main.c
drivers/net/ethernet/intel/igb/igb_ptp.c
drivers/net/ethernet/intel/igbvf/Makefile
drivers/net/ethernet/intel/igbvf/defines.h
drivers/net/ethernet/intel/igbvf/ethtool.c
drivers/net/ethernet/intel/igbvf/igbvf.h
drivers/net/ethernet/intel/igbvf/mbx.c
drivers/net/ethernet/intel/igbvf/mbx.h
drivers/net/ethernet/intel/igbvf/netdev.c
drivers/net/ethernet/intel/igbvf/regs.h
drivers/net/ethernet/intel/igbvf/vf.c
drivers/net/ethernet/intel/igbvf/vf.h
drivers/net/ethernet/intel/ixgb/Makefile
drivers/net/ethernet/intel/ixgb/ixgb.h
drivers/net/ethernet/intel/ixgb/ixgb_ee.c
drivers/net/ethernet/intel/ixgb/ixgb_ee.h
drivers/net/ethernet/intel/ixgb/ixgb_ethtool.c
drivers/net/ethernet/intel/ixgb/ixgb_hw.c
drivers/net/ethernet/intel/ixgb/ixgb_hw.h
drivers/net/ethernet/intel/ixgb/ixgb_ids.h
drivers/net/ethernet/intel/ixgb/ixgb_main.c
drivers/net/ethernet/intel/ixgb/ixgb_osdep.h
drivers/net/ethernet/intel/ixgb/ixgb_param.c
drivers/net/ethernet/intel/ixgbe/Makefile
drivers/net/ethernet/intel/ixgbe/ixgbe.h
drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c
drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c
drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
drivers/net/ethernet/intel/ixgbe/ixgbe_common.h
drivers/net/ethernet/intel/ixgbe/ixgbe_dcb.c
drivers/net/ethernet/intel/ixgbe/ixgbe_dcb.h
drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.c
drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.h
drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82599.c
drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82599.h
drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c
drivers/net/ethernet/intel/ixgbe/ixgbe_debugfs.c
drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c
drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.h
drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.h
drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.c
drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h
drivers/net/ethernet/intel/ixgbe/ixgbe_model.h
drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c
drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h
drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h
drivers/net/ethernet/intel/ixgbe/ixgbe_sysfs.c
drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c
drivers/net/ethernet/intel/ixgbe/ixgbe_x540.h
drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
drivers/net/ethernet/intel/ixgbevf/Makefile
drivers/net/ethernet/intel/ixgbevf/defines.h
drivers/net/ethernet/intel/ixgbevf/ethtool.c
drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
drivers/net/ethernet/intel/ixgbevf/mbx.c
drivers/net/ethernet/intel/ixgbevf/mbx.h
drivers/net/ethernet/intel/ixgbevf/regs.h
drivers/net/ethernet/intel/ixgbevf/vf.c
drivers/net/ethernet/intel/ixgbevf/vf.h
drivers/net/ethernet/marvell/Kconfig
drivers/net/ethernet/marvell/mvmdio.c
drivers/net/ethernet/marvell/mvpp2.c
drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
drivers/net/ethernet/mellanox/mlx4/en_netdev.c
drivers/net/ethernet/mellanox/mlx4/en_rx.c
drivers/net/ethernet/mellanox/mlx4/en_tx.c
drivers/net/ethernet/mellanox/mlx4/fw.c
drivers/net/ethernet/mellanox/mlx4/main.c
drivers/net/ethernet/mellanox/mlx4/mlx4.h
drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
drivers/net/ethernet/mellanox/mlx5/core/Kconfig
drivers/net/ethernet/mellanox/mlx5/core/Makefile
drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx5/core/cmd.c
drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c
drivers/net/ethernet/mellanox/mlx5/core/en.h
drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_stats.c [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
drivers/net/ethernet/mellanox/mlx5/core/en_dim.c
drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
drivers/net/ethernet/mellanox/mlx5/core/en_main.c
drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
drivers/net/ethernet/mellanox/mlx5/core/eq.c
drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h
drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.h
drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.h [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
drivers/net/ethernet/mellanox/mlx5/core/main.c
drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
drivers/net/ethernet/mellanox/mlx5/core/mr.c
drivers/net/ethernet/mellanox/mlx5/core/qp.c
drivers/net/ethernet/mellanox/mlx5/core/vport.c
drivers/net/ethernet/mellanox/mlx5/core/wq.h
drivers/net/ethernet/mellanox/mlxsw/cmd.h
drivers/net/ethernet/mellanox/mlxsw/core.c
drivers/net/ethernet/mellanox/mlxsw/core.h
drivers/net/ethernet/mellanox/mlxsw/pci.c
drivers/net/ethernet/mellanox/mlxsw/pci_hw.h
drivers/net/ethernet/mellanox/mlxsw/reg.h
drivers/net/ethernet/mellanox/mlxsw/resources.h
drivers/net/ethernet/mellanox/mlxsw/spectrum.c
drivers/net/ethernet/mellanox/mlxsw/spectrum.h
drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.h [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlxsw/switchx2.c
drivers/net/ethernet/mscc/Kconfig [new file with mode: 0644]
drivers/net/ethernet/mscc/Makefile [new file with mode: 0644]
drivers/net/ethernet/mscc/ocelot.c [new file with mode: 0644]
drivers/net/ethernet/mscc/ocelot.h [new file with mode: 0644]
drivers/net/ethernet/mscc/ocelot_ana.h [new file with mode: 0644]
drivers/net/ethernet/mscc/ocelot_board.c [new file with mode: 0644]
drivers/net/ethernet/mscc/ocelot_dev.h [new file with mode: 0644]
drivers/net/ethernet/mscc/ocelot_dev_gmii.h [new file with mode: 0644]
drivers/net/ethernet/mscc/ocelot_hsio.h [new file with mode: 0644]
drivers/net/ethernet/mscc/ocelot_io.c [new file with mode: 0644]
drivers/net/ethernet/mscc/ocelot_qs.h [new file with mode: 0644]
drivers/net/ethernet/mscc/ocelot_qsys.h [new file with mode: 0644]
drivers/net/ethernet/mscc/ocelot_regs.c [new file with mode: 0644]
drivers/net/ethernet/mscc/ocelot_rew.h [new file with mode: 0644]
drivers/net/ethernet/mscc/ocelot_sys.h [new file with mode: 0644]
drivers/net/ethernet/netronome/nfp/bpf/cmsg.c
drivers/net/ethernet/netronome/nfp/bpf/fw.h
drivers/net/ethernet/netronome/nfp/bpf/jit.c
drivers/net/ethernet/netronome/nfp/bpf/main.c
drivers/net/ethernet/netronome/nfp/bpf/main.h
drivers/net/ethernet/netronome/nfp/bpf/offload.c
drivers/net/ethernet/netronome/nfp/bpf/verifier.c
drivers/net/ethernet/netronome/nfp/flower/action.c
drivers/net/ethernet/netronome/nfp/flower/cmsg.h
drivers/net/ethernet/netronome/nfp/flower/main.c
drivers/net/ethernet/netronome/nfp/flower/main.h
drivers/net/ethernet/netronome/nfp/flower/metadata.c
drivers/net/ethernet/netronome/nfp/flower/offload.c
drivers/net/ethernet/netronome/nfp/nfp_app.c
drivers/net/ethernet/netronome/nfp/nfp_app_nic.c
drivers/net/ethernet/netronome/nfp/nfp_asm.h
drivers/net/ethernet/netronome/nfp/nfp_devlink.c
drivers/net/ethernet/netronome/nfp/nfp_main.c
drivers/net/ethernet/netronome/nfp/nfp_main.h
drivers/net/ethernet/netronome/nfp/nfp_net_main.c
drivers/net/ethernet/netronome/nfp/nfp_net_repr.c
drivers/net/ethernet/netronome/nfp/nfp_net_repr.h
drivers/net/ethernet/netronome/nfp/nfpcore/nfp.h
drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c
drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h
drivers/net/ethernet/netronome/nfp/nfpcore/nfp_mutex.c
drivers/net/ethernet/netronome/nfp/nfpcore/nfp_resource.c
drivers/net/ethernet/ni/nixge.c
drivers/net/ethernet/qlogic/qed/qed.h
drivers/net/ethernet/qlogic/qed/qed_dcbx.c
drivers/net/ethernet/qlogic/qed/qed_debug.c
drivers/net/ethernet/qlogic/qed/qed_dev.c
drivers/net/ethernet/qlogic/qed/qed_fcoe.c
drivers/net/ethernet/qlogic/qed/qed_hsi.h
drivers/net/ethernet/qlogic/qed/qed_l2.c
drivers/net/ethernet/qlogic/qed/qed_ll2.c
drivers/net/ethernet/qlogic/qed/qed_main.c
drivers/net/ethernet/qlogic/qed/qed_mcp.c
drivers/net/ethernet/qlogic/qed/qed_mcp.h
drivers/net/ethernet/qlogic/qed/qed_roce.c
drivers/net/ethernet/qlogic/qed/qed_sp.h
drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
drivers/net/ethernet/qlogic/qede/qede.h
drivers/net/ethernet/qlogic/qede/qede_ethtool.c
drivers/net/ethernet/qlogic/qede/qede_fp.c
drivers/net/ethernet/qlogic/qede/qede_main.c
drivers/net/ethernet/qlogic/qede/qede_rdma.c
drivers/net/ethernet/qualcomm/emac/emac-mac.c
drivers/net/ethernet/qualcomm/emac/emac-sgmii.c
drivers/net/ethernet/qualcomm/emac/emac-sgmii.h
drivers/net/ethernet/qualcomm/emac/emac.c
drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h
drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c
drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
drivers/net/ethernet/realtek/8139too.c
drivers/net/ethernet/realtek/r8169.c
drivers/net/ethernet/renesas/sh_eth.c
drivers/net/ethernet/renesas/sh_eth.h
drivers/net/ethernet/rocker/rocker_main.c
drivers/net/ethernet/sfc/ef10.c
drivers/net/ethernet/sfc/efx.c
drivers/net/ethernet/sfc/efx.h
drivers/net/ethernet/sfc/farch.c
drivers/net/ethernet/sfc/net_driver.h
drivers/net/ethernet/sfc/rx.c
drivers/net/ethernet/stmicro/stmmac/Makefile
drivers/net/ethernet/stmicro/stmmac/common.h
drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c
drivers/net/ethernet/stmicro/stmmac/dwmac100_dma.c
drivers/net/ethernet/stmicro/stmmac/dwmac4.h
drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h
drivers/net/ethernet/stmicro/stmmac/dwmac5.c
drivers/net/ethernet/stmicro/stmmac/dwmac5.h
drivers/net/ethernet/stmicro/stmmac/enh_desc.c
drivers/net/ethernet/stmicro/stmmac/hwif.c
drivers/net/ethernet/stmicro/stmmac/hwif.h
drivers/net/ethernet/stmicro/stmmac/norm_desc.c
drivers/net/ethernet/stmicro/stmmac/stmmac.h
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c [new file with mode: 0644]
drivers/net/ethernet/sun/niu.c
drivers/net/ethernet/ti/Kconfig
drivers/net/ethernet/ti/cpsw-phy-sel.c
drivers/net/ethernet/ti/cpsw.c
drivers/net/ethernet/ti/cpts.c
drivers/net/ethernet/ti/davinci_cpdma.c
drivers/net/ethernet/ti/davinci_emac.c
drivers/net/ethernet/ti/davinci_mdio.c
drivers/net/hamradio/mkiss.c
drivers/net/hippi/rrunner.c
drivers/net/hyperv/hyperv_net.h
drivers/net/hyperv/netvsc.c
drivers/net/hyperv/netvsc_drv.c
drivers/net/hyperv/rndis_filter.c
drivers/net/ieee802154/atusb.c
drivers/net/ieee802154/mcr20a.c
drivers/net/ipvlan/ipvlan_main.c
drivers/net/macvlan.c
drivers/net/phy/Kconfig
drivers/net/phy/Makefile
drivers/net/phy/bcm-phy-lib.c
drivers/net/phy/broadcom.c
drivers/net/phy/dp83tc811.c [new file with mode: 0644]
drivers/net/phy/marvell.c
drivers/net/phy/mdio-gpio.c
drivers/net/phy/mdio-mscc-miim.c [new file with mode: 0644]
drivers/net/phy/micrel.c
drivers/net/phy/microchip_t1.c [new file with mode: 0644]
drivers/net/phy/phy_device.c
drivers/net/phy/phylink.c
drivers/net/phy/sfp-bus.c
drivers/net/phy/smsc.c
drivers/net/ppp/pppoe.c
drivers/net/team/team.c
drivers/net/tun.c
drivers/net/usb/Kconfig
drivers/net/usb/lan78xx.c
drivers/net/usb/qmi_wwan.c
drivers/net/virtio_net.c
drivers/net/vmxnet3/vmxnet3_drv.c
drivers/net/vmxnet3/vmxnet3_ethtool.c
drivers/net/vmxnet3/vmxnet3_int.h
drivers/net/wireless/ath/ath6kl/main.c
drivers/net/wireless/ath/wil6210/debugfs.c
drivers/net/wireless/ath/wil6210/wmi.c
drivers/net/wireless/broadcom/b43/dma.c
drivers/net/wireless/broadcom/b43legacy/dma.c
drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c
drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c
drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.h
drivers/net/wireless/broadcom/brcm80211/brcmfmac/msgbuf.h
drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c
drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c
drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c
drivers/net/wireless/intel/ipw2x00/ipw2100.c
drivers/net/wireless/intel/ipw2x00/ipw2100.h
drivers/net/wireless/intel/ipw2x00/ipw2200.c
drivers/net/wireless/intel/iwlwifi/cfg/1000.c
drivers/net/wireless/intel/iwlwifi/cfg/2000.c
drivers/net/wireless/intel/iwlwifi/cfg/22000.c
drivers/net/wireless/intel/iwlwifi/cfg/5000.c
drivers/net/wireless/intel/iwlwifi/cfg/6000.c
drivers/net/wireless/intel/iwlwifi/cfg/7000.c
drivers/net/wireless/intel/iwlwifi/cfg/8000.c
drivers/net/wireless/intel/iwlwifi/cfg/9000.c
drivers/net/wireless/intel/iwlwifi/fw/api/scan.h
drivers/net/wireless/intel/iwlwifi/fw/file.h
drivers/net/wireless/intel/iwlwifi/fw/img.h
drivers/net/wireless/intel/iwlwifi/fw/paging.c
drivers/net/wireless/intel/iwlwifi/iwl-config.h
drivers/net/wireless/intel/iwlwifi/iwl-csr.h
drivers/net/wireless/intel/iwlwifi/iwl-drv.c
drivers/net/wireless/intel/iwlwifi/iwl-eeprom-read.c
drivers/net/wireless/intel/iwlwifi/iwl-modparams.h
drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c
drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.h
drivers/net/wireless/intel/iwlwifi/iwl-trans.h
drivers/net/wireless/intel/iwlwifi/mvm/d3.c
drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
drivers/net/wireless/intel/iwlwifi/mvm/ops.c
drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
drivers/net/wireless/intel/iwlwifi/mvm/tx.c
drivers/net/wireless/intel/iwlwifi/mvm/utils.c
drivers/net/wireless/intel/iwlwifi/pcie/internal.h
drivers/net/wireless/intel/iwlwifi/pcie/rx.c
drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c
drivers/net/wireless/intel/iwlwifi/pcie/trans.c
drivers/net/wireless/intel/iwlwifi/pcie/tx.c
drivers/net/wireless/mac80211_hwsim.c
drivers/net/wireless/marvell/mwifiex/cfg80211.c
drivers/net/wireless/marvell/mwifiex/cmdevt.c
drivers/net/wireless/marvell/mwifiex/main.c
drivers/net/wireless/marvell/mwifiex/main.h
drivers/net/wireless/marvell/mwifiex/uap_event.c
drivers/net/wireless/mediatek/mt76/agg-rx.c
drivers/net/wireless/mediatek/mt76/mac80211.c
drivers/net/wireless/mediatek/mt76/mt76.h
drivers/net/wireless/mediatek/mt76/mt76x2.h
drivers/net/wireless/mediatek/mt76/mt76x2_init.c
drivers/net/wireless/mediatek/mt76/mt76x2_mac.c
drivers/net/wireless/mediatek/mt76/mt76x2_phy.c
drivers/net/wireless/mediatek/mt76/tx.c
drivers/net/wireless/quantenna/qtnfmac/core.c
drivers/net/wireless/quantenna/qtnfmac/event.c
drivers/net/wireless/ralink/rt2x00/rt2800.h
drivers/net/wireless/ralink/rt2x00/rt2800lib.c
drivers/net/wireless/ralink/rt2x00/rt2800lib.h
drivers/net/wireless/ralink/rt2x00/rt2800mmio.c
drivers/net/wireless/ralink/rt2x00/rt2800pci.c
drivers/net/wireless/ralink/rt2x00/rt2800soc.c
drivers/net/wireless/ralink/rt2x00/rt2800usb.c
drivers/net/wireless/ralink/rt2x00/rt2x00.h
drivers/net/wireless/ralink/rt2x00/rt2x00mac.c
drivers/net/wireless/ralink/rt2x00/rt2x00queue.c
drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8192e2ant.c
drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8723b2ant.c
drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8821a2ant.c
drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.c
drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.c
drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c
drivers/net/wireless/realtek/rtlwifi/wifi.h
drivers/net/wireless/rsi/rsi_91x_coex.c
drivers/net/wireless/rsi/rsi_91x_core.c
drivers/net/wireless/rsi/rsi_91x_mac80211.c
drivers/net/wireless/rsi/rsi_91x_usb.c
drivers/net/wireless/ti/wlcore/sdio.c
drivers/nvdimm/Kconfig
drivers/nvdimm/dimm_devs.c
drivers/nvdimm/of_pmem.c
drivers/nvme/host/Kconfig
drivers/nvme/host/core.c
drivers/nvme/host/fabrics.c
drivers/nvme/host/multipath.c
drivers/nvme/host/nvme.h
drivers/nvme/host/pci.c
drivers/nvme/target/Kconfig
drivers/nvme/target/loop.c
drivers/of/fdt.c
drivers/of/of_mdio.c
drivers/of/overlay.c
drivers/parisc/ccio-dma.c
drivers/pci/dwc/pcie-kirin.c
drivers/pci/host/pci-aardvark.c
drivers/pci/pci-driver.c
drivers/pci/pci.c
drivers/phy/marvell/phy-mvebu-cp110-comphy.c
drivers/pinctrl/intel/pinctrl-cherryview.c
drivers/pinctrl/intel/pinctrl-sunrisepoint.c
drivers/pinctrl/meson/pinctrl-meson-axg.c
drivers/platform/x86/Kconfig
drivers/platform/x86/asus-wireless.c
drivers/ptp/ptp_pch.c
drivers/rapidio/devices/rio_mport_cdev.c
drivers/remoteproc/qcom_q6v5_pil.c
drivers/remoteproc/remoteproc_core.c
drivers/reset/reset-uniphier.c
drivers/rpmsg/rpmsg_char.c
drivers/rtc/rtc-opal.c
drivers/s390/block/dasd_alias.c
drivers/s390/cio/chsc.c
drivers/s390/cio/qdio_setup.c
drivers/s390/cio/vfio_ccw_cp.c
drivers/s390/cio/vfio_ccw_fsm.c
drivers/s390/net/lcs.c
drivers/s390/net/qeth_core.h
drivers/s390/net/qeth_core_main.c
drivers/s390/net/qeth_core_mpc.h
drivers/s390/net/qeth_core_sys.c
drivers/s390/net/qeth_l2_main.c
drivers/s390/net/qeth_l3_main.c
drivers/sbus/char/oradax.c
drivers/scsi/aacraid/commsup.c
drivers/scsi/fnic/fnic_trace.c
drivers/scsi/isci/port_config.c
drivers/scsi/megaraid/megaraid_sas_fusion.c
drivers/scsi/qedf/qedf_fip.c
drivers/scsi/qedf/qedf_main.c
drivers/scsi/qedi/qedi_iscsi.c
drivers/scsi/scsi_debug.c
drivers/scsi/scsi_transport_iscsi.c
drivers/scsi/sd.c
drivers/scsi/sd_zbc.c
drivers/scsi/storvsc_drv.c
drivers/scsi/ufs/ufshcd.c
drivers/scsi/vmw_pvscsi.c
drivers/slimbus/messaging.c
drivers/soc/bcm/raspberrypi-power.c
drivers/spi/spi-bcm-qspi.c
drivers/spi/spi-bcm2835aux.c
drivers/spi/spi-cadence.c
drivers/spi/spi-imx.c
drivers/spi/spi-pxa2xx.h
drivers/spi/spi-sh-msiof.c
drivers/staging/media/imx/imx-media-csi.c
drivers/staging/wilc1000/host_interface.c
drivers/target/target_core_iblock.c
drivers/target/target_core_pscsi.c
drivers/tee/tee_core.c
drivers/tee/tee_shm.c
drivers/thermal/int340x_thermal/int3403_thermal.c
drivers/thermal/samsung/exynos_tmu.c
drivers/tty/n_gsm.c
drivers/tty/serial/earlycon.c
drivers/tty/serial/imx.c
drivers/tty/serial/mvebu-uart.c
drivers/tty/serial/qcom_geni_serial.c
drivers/tty/serial/xilinx_uartps.c
drivers/tty/tty_io.c
drivers/tty/tty_ldisc.c
drivers/uio/uio_hv_generic.c
drivers/usb/Kconfig
drivers/usb/core/config.c
drivers/usb/core/hcd.c
drivers/usb/core/hub.c
drivers/usb/core/phy.c
drivers/usb/core/phy.h
drivers/usb/core/quirks.c
drivers/usb/dwc2/core.h
drivers/usb/dwc2/gadget.c
drivers/usb/dwc2/hcd.c
drivers/usb/dwc2/pci.c
drivers/usb/dwc3/gadget.c
drivers/usb/gadget/function/f_phonet.c
drivers/usb/host/ehci-mem.c
drivers/usb/host/ehci-sched.c
drivers/usb/host/xhci-dbgtty.c
drivers/usb/host/xhci-hub.c
drivers/usb/host/xhci-pci.c
drivers/usb/host/xhci-plat.c
drivers/usb/host/xhci.c
drivers/usb/host/xhci.h
drivers/usb/musb/musb_dsps.c
drivers/usb/musb/musb_gadget.c
drivers/usb/musb/musb_host.c
drivers/usb/musb/musb_host.h
drivers/usb/musb/musb_virthub.c
drivers/usb/serial/Kconfig
drivers/usb/serial/cp210x.c
drivers/usb/serial/ftdi_sio.c
drivers/usb/serial/option.c
drivers/usb/serial/usb-serial-simple.c
drivers/usb/serial/visor.c
drivers/usb/typec/tcpm.c
drivers/usb/typec/tps6598x.c
drivers/usb/typec/ucsi/Makefile
drivers/usb/typec/ucsi/ucsi.c
drivers/usb/usbip/stub.h
drivers/usb/usbip/stub_dev.c
drivers/usb/usbip/stub_main.c
drivers/usb/usbip/usbip_common.h
drivers/usb/usbip/usbip_event.c
drivers/usb/usbip/vhci_hcd.c
drivers/vhost/net.c
drivers/virt/vboxguest/vboxguest_core.c
drivers/virt/vboxguest/vboxguest_core.h
drivers/virt/vboxguest/vboxguest_linux.c
drivers/virt/vboxguest/vboxguest_utils.c
drivers/watchdog/aspeed_wdt.c
drivers/watchdog/renesas_wdt.c
drivers/watchdog/sch311x_wdt.c
drivers/watchdog/w83977f_wdt.c
drivers/watchdog/wafer5823wdt.c
fs/afs/addr_list.c
fs/afs/callback.c
fs/afs/cmservice.c
fs/afs/dir.c
fs/afs/file.c
fs/afs/flock.c
fs/afs/fsclient.c
fs/afs/inode.c
fs/afs/internal.h
fs/afs/rotate.c
fs/afs/rxrpc.c
fs/afs/security.c
fs/afs/server.c
fs/afs/server_list.c
fs/afs/super.c
fs/afs/write.c
fs/autofs4/root.c
fs/binfmt_elf.c
fs/btrfs/ctree.c
fs/btrfs/ctree.h
fs/btrfs/delayed-inode.c
fs/btrfs/delayed-ref.c
fs/btrfs/delayed-ref.h
fs/btrfs/disk-io.c
fs/btrfs/extent-tree.c
fs/btrfs/file.c
fs/btrfs/inode.c
fs/btrfs/print-tree.c
fs/btrfs/print-tree.h
fs/btrfs/props.c
fs/btrfs/qgroup.c
fs/btrfs/relocation.c
fs/btrfs/send.c
fs/btrfs/transaction.c
fs/btrfs/transaction.h
fs/btrfs/tree-log.c
fs/btrfs/volumes.c
fs/ceph/file.c
fs/ceph/xattr.c
fs/cifs/Kconfig
fs/cifs/cifs_debug.h
fs/cifs/cifsfs.c
fs/cifs/cifssmb.c
fs/cifs/connect.c
fs/cifs/dir.c
fs/cifs/file.c
fs/cifs/smb2ops.c
fs/cifs/smb2pdu.c
fs/cifs/smb2pdu.h
fs/cifs/smbdirect.c
fs/cifs/transport.c
fs/ext4/balloc.c
fs/ext4/extents.c
fs/ext4/super.c
fs/fs-writeback.c
fs/hfsplus/super.c
fs/jbd2/transaction.c
fs/ocfs2/refcounttree.c
fs/proc/Kconfig
fs/proc/base.c
fs/proc/kcore.c
fs/proc/loadavg.c
fs/proc/task_mmu.c
fs/proc/vmcore.c
fs/xfs/libxfs/xfs_attr.c
fs/xfs/libxfs/xfs_bmap.c
fs/xfs/libxfs/xfs_inode_buf.c
fs/xfs/xfs_file.c
include/asm-generic/vmlinux.lds.h
include/drm/drm_hdcp.h
include/dt-bindings/clock/stm32mp1-clks.h
include/kvm/arm_psci.h
include/kvm/arm_vgic.h
include/linux/avf/virtchnl.h
include/linux/backing-dev-defs.h
include/linux/backing-dev.h
include/linux/blk-mq.h
include/linux/blkdev.h
include/linux/bpf.h
include/linux/bpf_trace.h
include/linux/bpf_types.h
include/linux/bpf_verifier.h
include/linux/brcmphy.h
include/linux/btf.h
include/linux/ceph/osd_client.h
include/linux/clk-provider.h
include/linux/compiler-clang.h
include/linux/coresight-pmu.h
include/linux/crash_dump.h
include/linux/device.h
include/linux/efi.h
include/linux/ethtool.h
include/linux/filter.h
include/linux/fsnotify_backend.h
include/linux/genhd.h
include/linux/hrtimer.h
include/linux/if_bridge.h
include/linux/if_macvlan.h
include/linux/kcore.h
include/linux/kthread.h
include/linux/kvm_host.h
include/linux/mlx4/device.h
include/linux/mlx5/driver.h
include/linux/mlx5/fs.h
include/linux/mlx5/mlx5_ifc.h
include/linux/mlx5/mlx5_ifc_fpga.h
include/linux/mm.h
include/linux/mmc/sdio_ids.h
include/linux/mtd/flashchip.h
include/linux/mtd/map.h
include/linux/mtd/rawnand.h
include/linux/net_dim.h
include/linux/netdev_features.h
include/linux/netdevice.h
include/linux/netfilter/nf_osf.h [new file with mode: 0644]
include/linux/netfilter_bridge/ebtables.h
include/linux/oom.h
include/linux/percpu-rwsem.h
include/linux/phy.h
include/linux/phy/phy.h
include/linux/platform_data/b53.h
include/linux/platform_data/mv88e6xxx.h [new file with mode: 0644]
include/linux/qed/qed_if.h
include/linux/qed/qed_ll2_if.h
include/linux/rbtree_augmented.h
include/linux/rbtree_latch.h
include/linux/remoteproc.h
include/linux/rhashtable.h
include/linux/rwsem.h
include/linux/sched.h
include/linux/sched/signal.h
include/linux/serial_core.h
include/linux/skb_array.h
include/linux/skbuff.h
include/linux/sock_diag.h
include/linux/socket.h
include/linux/stringhash.h
include/linux/tcp.h
include/linux/thread_info.h
include/linux/ti-emif-sram.h
include/linux/timekeeper_internal.h
include/linux/timekeeping.h
include/linux/timekeeping32.h
include/linux/timer.h
include/linux/tnum.h
include/linux/tty.h
include/linux/u64_stats_sync.h
include/linux/udp.h
include/linux/usb/composite.h
include/linux/vbox_utils.h
include/linux/virtio.h
include/linux/wait_bit.h
include/media/i2c/tvp7002.h
include/media/videobuf-core.h
include/media/videobuf-dma-sg.h
include/media/videobuf-vmalloc.h
include/net/addrconf.h
include/net/bluetooth/hci_core.h
include/net/bonding.h
include/net/devlink.h
include/net/dsa.h
include/net/erspan.h
include/net/flow_dissector.h
include/net/ife.h
include/net/inet_connection_sock.h
include/net/inet_sock.h
include/net/inet_timewait_sock.h
include/net/ip.h
include/net/ip6_fib.h
include/net/ip6_route.h
include/net/ip_tunnels.h
include/net/ip_vs.h
include/net/ipv6.h
include/net/llc_conn.h
include/net/mac80211.h
include/net/neighbour.h
include/net/netfilter/ipv4/nf_nat_masquerade.h
include/net/netfilter/ipv6/nf_nat_masquerade.h
include/net/netfilter/nf_flow_table.h
include/net/netfilter/nf_nat.h
include/net/netfilter/nf_nat_l3proto.h
include/net/netfilter/nf_nat_l4proto.h
include/net/netfilter/nf_nat_redirect.h
include/net/netfilter/nf_tables.h
include/net/netfilter/nf_tables_core.h
include/net/netfilter/nfnetlink_log.h
include/net/netfilter/nft_meta.h [deleted file]
include/net/netns/ipv4.h
include/net/netns/ipv6.h
include/net/pkt_cls.h
include/net/sch_generic.h
include/net/sctp/constants.h
include/net/sctp/sctp.h
include/net/sctp/sm.h
include/net/sctp/structs.h
include/net/sock.h
include/net/switchdev.h
include/net/tcp.h
include/net/tipc.h
include/net/tls.h
include/net/udp.h
include/net/xdp.h
include/net/xdp_sock.h [new file with mode: 0644]
include/net/xfrm.h
include/scsi/scsi_dbg.h
include/soc/bcm2835/raspberrypi-firmware.h
include/sound/control.h
include/trace/events/afs.h
include/trace/events/bpf.h [deleted file]
include/trace/events/fib6.h
include/trace/events/initcall.h
include/trace/events/rxrpc.h
include/trace/events/sunrpc.h
include/trace/events/ufs.h
include/trace/events/workqueue.h
include/trace/events/xen.h
include/uapi/linux/bpf.h
include/uapi/linux/btf.h
include/uapi/linux/cn_proc.h
include/uapi/linux/devlink.h
include/uapi/linux/elf.h
include/uapi/linux/if_infiniband.h
include/uapi/linux/if_xdp.h [new file with mode: 0644]
include/uapi/linux/kvm.h
include/uapi/linux/netfilter/nf_conntrack_tcp.h
include/uapi/linux/netfilter/nf_nat.h
include/uapi/linux/netfilter/nf_osf.h [new file with mode: 0644]
include/uapi/linux/netfilter/nf_tables.h
include/uapi/linux/netfilter/nfnetlink_conntrack.h
include/uapi/linux/netfilter/xt_osf.h
include/uapi/linux/netfilter_bridge/ebtables.h
include/uapi/linux/netfilter_ipv6/ip6t_srh.h
include/uapi/linux/nl80211.h
include/uapi/linux/perf_event.h
include/uapi/linux/pkt_cls.h
include/uapi/linux/random.h
include/uapi/linux/rds.h
include/uapi/linux/snmp.h
include/uapi/linux/sysctl.h
include/uapi/linux/tcp.h
include/uapi/linux/time.h
include/uapi/linux/tipc.h
include/uapi/linux/tls.h
include/uapi/linux/udp.h
include/uapi/linux/virtio_balloon.h
include/uapi/linux/vmcore.h [new file with mode: 0644]
include/uapi/rdma/cxgb3-abi.h
include/uapi/rdma/cxgb4-abi.h
include/uapi/rdma/hns-abi.h
include/uapi/rdma/ib_user_cm.h
include/uapi/rdma/ib_user_ioctl_verbs.h
include/uapi/rdma/ib_user_mad.h
include/uapi/rdma/ib_user_sa.h
include/uapi/rdma/ib_user_verbs.h
include/uapi/rdma/mlx4-abi.h
include/uapi/rdma/mlx5-abi.h
include/uapi/rdma/mthca-abi.h
include/uapi/rdma/nes-abi.h
include/uapi/rdma/qedr-abi.h
include/uapi/rdma/rdma_user_cm.h
include/uapi/rdma/rdma_user_ioctl.h
include/uapi/rdma/rdma_user_rxe.h
init/Kconfig
init/main.c
kernel/bpf/Makefile
kernel/bpf/arraymap.c
kernel/bpf/btf.c
kernel/bpf/core.c
kernel/bpf/inode.c
kernel/bpf/offload.c
kernel/bpf/sockmap.c
kernel/bpf/stackmap.c
kernel/bpf/syscall.c
kernel/bpf/tnum.c
kernel/bpf/verifier.c
kernel/bpf/xskmap.c [new file with mode: 0644]
kernel/compat.c
kernel/events/callchain.c
kernel/events/core.c
kernel/events/ring_buffer.c
kernel/events/uprobes.c
kernel/fork.c
kernel/kprobes.c
kernel/kthread.c
kernel/locking/rwsem-xadd.c
kernel/locking/rwsem.c
kernel/locking/rwsem.h
kernel/module.c
kernel/sched/autogroup.c
kernel/sched/core.c
kernel/sched/cpufreq_schedutil.c
kernel/sched/deadline.c
kernel/sched/fair.c
kernel/sched/rt.c
kernel/sched/sched.h
kernel/signal.c
kernel/stop_machine.c
kernel/sysctl_binary.c
kernel/time/clocksource.c
kernel/time/hrtimer.c
kernel/time/posix-cpu-timers.c
kernel/time/posix-stubs.c
kernel/time/posix-timers.c
kernel/time/tick-broadcast.c
kernel/time/tick-common.c
kernel/time/tick-internal.h
kernel/time/tick-oneshot.c
kernel/time/tick-sched.c
kernel/time/timekeeping.c
kernel/time/timekeeping.h
kernel/trace/bpf_trace.c
kernel/trace/ftrace.c
kernel/trace/trace.c
kernel/trace/trace_entries.h
kernel/trace/trace_events_filter.c
kernel/trace/trace_events_hist.c
kernel/trace/trace_kprobe.c
kernel/trace/trace_stack.c
kernel/trace/trace_uprobe.c
kernel/tracepoint.c
lib/dma-direct.c
lib/errseq.c
lib/find_bit_benchmark.c
lib/kobject.c
lib/kobject_uevent.c
lib/radix-tree.c
lib/rhashtable.c
lib/swiotlb.c
lib/test_bitmap.c
lib/test_bpf.c
lib/vsprintf.c
mm/Kconfig
mm/backing-dev.c
mm/filemap.c
mm/gup.c
mm/huge_memory.c
mm/memcontrol.c
mm/migrate.c
mm/mmap.c
mm/oom_kill.c
mm/page-writeback.c
mm/rmap.c
mm/sparse.c
mm/vmstat.c
mm/z3fold.c
net/8021q/vlan.c
net/8021q/vlan.h
net/8021q/vlan_dev.c
net/8021q/vlan_netlink.c
net/9p/mod.c
net/9p/trans_common.c
net/9p/trans_fd.c
net/9p/trans_rdma.c
net/9p/trans_virtio.c
net/9p/trans_xen.c
net/Kconfig
net/Makefile
net/atm/lec.c
net/bluetooth/hci_core.c
net/bluetooth/hci_event.c
net/bluetooth/hci_request.c
net/bridge/br.c
net/bridge/br_fdb.c
net/bridge/br_forward.c
net/bridge/br_if.c
net/bridge/br_private.h
net/bridge/br_switchdev.c
net/bridge/br_vlan.c
net/bridge/netfilter/Kconfig
net/bridge/netfilter/Makefile
net/bridge/netfilter/ebt_stp.c
net/bridge/netfilter/ebtables.c
net/bridge/netfilter/nft_meta_bridge.c [deleted file]
net/ceph/messenger.c
net/ceph/mon_client.c
net/ceph/osd_client.c
net/compat.c
net/core/dev.c
net/core/devlink.c
net/core/ethtool.c
net/core/fib_rules.c
net/core/filter.c
net/core/flow_dissector.c
net/core/neighbour.c
net/core/skbuff.c
net/core/sock.c
net/core/xdp.c
net/dccp/ccids/ccid2.c
net/dccp/timer.c
net/dsa/Kconfig
net/dsa/dsa2.c
net/dsa/dsa_priv.h
net/dsa/master.c
net/dsa/port.c
net/dsa/slave.c
net/ethernet/eth.c
net/ieee802154/6lowpan/6lowpan_i.h
net/ieee802154/6lowpan/reassembly.c
net/ife/ife.c
net/ipv4/af_inet.c
net/ipv4/fib_frontend.c
net/ipv4/inet_connection_sock.c
net/ipv4/ip_gre.c
net/ipv4/ip_output.c
net/ipv4/ip_tunnel_core.c
net/ipv4/ipconfig.c
net/ipv4/netfilter/ip_tables.c
net/ipv4/netfilter/ipt_MASQUERADE.c
net/ipv4/netfilter/ipt_rpfilter.c
net/ipv4/netfilter/iptable_nat.c
net/ipv4/netfilter/nf_flow_table_ipv4.c
net/ipv4/netfilter/nf_nat_h323.c
net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
net/ipv4/netfilter/nf_nat_pptp.c
net/ipv4/netfilter/nf_nat_proto_gre.c
net/ipv4/netfilter/nf_nat_proto_icmp.c
net/ipv4/netfilter/nft_chain_nat_ipv4.c
net/ipv4/netfilter/nft_masq_ipv4.c
net/ipv4/ping.c
net/ipv4/proc.c
net/ipv4/route.c
net/ipv4/sysctl_net_ipv4.c
net/ipv4/tcp.c
net/ipv4/tcp_bbr.c
net/ipv4/tcp_input.c
net/ipv4/tcp_ipv4.c
net/ipv4/tcp_minisocks.c
net/ipv4/tcp_output.c
net/ipv4/tcp_recovery.c
net/ipv4/tcp_timer.c
net/ipv4/udp.c
net/ipv4/udp_offload.c
net/ipv6/Kconfig
net/ipv6/addrconf.c
net/ipv6/addrconf_core.c
net/ipv6/af_inet6.c
net/ipv6/exthdrs_core.c
net/ipv6/fib6_rules.c
net/ipv6/ip6_fib.c
net/ipv6/ip6_gre.c
net/ipv6/ip6_offload.c
net/ipv6/ip6_output.c
net/ipv6/ip6_vti.c
net/ipv6/netfilter/Kconfig
net/ipv6/netfilter/ip6_tables.c
net/ipv6/netfilter/ip6t_MASQUERADE.c
net/ipv6/netfilter/ip6t_srh.c
net/ipv6/netfilter/ip6table_nat.c
net/ipv6/netfilter/nf_flow_table_ipv6.c
net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
net/ipv6/netfilter/nf_nat_proto_icmpv6.c
net/ipv6/netfilter/nft_chain_nat_ipv6.c
net/ipv6/netfilter/nft_masq_ipv6.c
net/ipv6/netfilter/nft_redir_ipv6.c
net/ipv6/route.c
net/ipv6/seg6_iptunnel.c
net/ipv6/sysctl_net_ipv6.c
net/ipv6/tcp_ipv6.c
net/ipv6/udp.c
net/ipv6/udp_offload.c
net/ipv6/xfrm6_state.c
net/ipv6/xfrm6_tunnel.c
net/key/af_key.c
net/l2tp/l2tp_debugfs.c
net/l2tp/l2tp_ppp.c
net/llc/af_llc.c
net/llc/llc_c_ac.c
net/llc/llc_conn.c
net/mac80211/agg-tx.c
net/mac80211/mlme.c
net/mac80211/tx.c
net/ncsi/ncsi-rsp.c
net/netfilter/Kconfig
net/netfilter/Makefile
net/netfilter/core.c
net/netfilter/ipvs/Kconfig
net/netfilter/ipvs/Makefile
net/netfilter/ipvs/ip_vs_conn.c
net/netfilter/ipvs/ip_vs_core.c
net/netfilter/ipvs/ip_vs_ctl.c
net/netfilter/ipvs/ip_vs_dh.c
net/netfilter/ipvs/ip_vs_lblc.c
net/netfilter/ipvs/ip_vs_lblcr.c
net/netfilter/ipvs/ip_vs_mh.c [new file with mode: 0644]
net/netfilter/ipvs/ip_vs_proto_tcp.c
net/netfilter/ipvs/ip_vs_sh.c
net/netfilter/ipvs/ip_vs_sync.c
net/netfilter/nf_conntrack_core.c
net/netfilter/nf_conntrack_expect.c
net/netfilter/nf_conntrack_extend.c
net/netfilter/nf_conntrack_ftp.c
net/netfilter/nf_conntrack_irc.c
net/netfilter/nf_conntrack_netlink.c
net/netfilter/nf_conntrack_proto_tcp.c
net/netfilter/nf_conntrack_sane.c
net/netfilter/nf_conntrack_sip.c
net/netfilter/nf_conntrack_tftp.c
net/netfilter/nf_flow_table_core.c [moved from net/netfilter/nf_flow_table.c with 67% similarity]
net/netfilter/nf_flow_table_inet.c
net/netfilter/nf_flow_table_ip.c [new file with mode: 0644]
net/netfilter/nf_nat_core.c
net/netfilter/nf_nat_helper.c
net/netfilter/nf_nat_proto_common.c
net/netfilter/nf_nat_proto_dccp.c
net/netfilter/nf_nat_proto_sctp.c
net/netfilter/nf_nat_proto_tcp.c
net/netfilter/nf_nat_proto_udp.c
net/netfilter/nf_nat_proto_unknown.c
net/netfilter/nf_nat_redirect.c
net/netfilter/nf_nat_sip.c
net/netfilter/nf_osf.c [new file with mode: 0644]
net/netfilter/nf_tables_api.c
net/netfilter/nf_tables_core.c
net/netfilter/nfnetlink_acct.c
net/netfilter/nfnetlink_cthelper.c
net/netfilter/nfnetlink_log.c
net/netfilter/nft_compat.c
net/netfilter/nft_dynset.c
net/netfilter/nft_exthdr.c
net/netfilter/nft_flow_offload.c
net/netfilter/nft_hash.c
net/netfilter/nft_immediate.c
net/netfilter/nft_meta.c
net/netfilter/nft_nat.c
net/netfilter/nft_numgen.c
net/netfilter/nft_objref.c
net/netfilter/nft_rt.c
net/netfilter/nft_set_bitmap.c
net/netfilter/nft_set_hash.c
net/netfilter/nft_set_rbtree.c
net/netfilter/x_tables.c
net/netfilter/xt_NETMAP.c
net/netfilter/xt_NFLOG.c
net/netfilter/xt_REDIRECT.c
net/netfilter/xt_connmark.c
net/netfilter/xt_nat.c
net/netfilter/xt_osf.c
net/netlink/af_netlink.c
net/nsh/nsh.c
net/openvswitch/conntrack.c
net/openvswitch/flow_netlink.c
net/packet/af_packet.c
net/packet/internal.h
net/qrtr/Kconfig
net/qrtr/Makefile
net/qrtr/tun.c [new file with mode: 0644]
net/rds/ib_cm.c
net/rds/recv.c
net/rfkill/rfkill-gpio.c
net/rxrpc/af_rxrpc.c
net/rxrpc/ar-internal.h
net/rxrpc/conn_event.c
net/rxrpc/input.c
net/rxrpc/local_event.c
net/rxrpc/local_object.c
net/rxrpc/output.c
net/rxrpc/peer_event.c
net/rxrpc/rxkad.c
net/rxrpc/sendmsg.c
net/sched/act_csum.c
net/sched/act_ife.c
net/sched/act_skbedit.c
net/sched/act_skbmod.c
net/sched/act_vlan.c
net/sched/cls_api.c
net/sched/cls_flower.c
net/sched/sch_fq.c
net/sched/sch_generic.c
net/sched/sch_red.c
net/sched/sch_tbf.c
net/sctp/associola.c
net/sctp/chunk.c
net/sctp/inqueue.c
net/sctp/ipv6.c
net/sctp/output.c
net/sctp/outqueue.c
net/sctp/sm_make_chunk.c
net/sctp/sm_statefuns.c
net/sctp/socket.c
net/sctp/stream.c
net/sctp/transport.c
net/sctp/ulpevent.c
net/smc/af_smc.c
net/smc/smc.h
net/smc/smc_cdc.c
net/smc/smc_cdc.h
net/smc/smc_clc.c
net/smc/smc_clc.h
net/smc/smc_core.c
net/smc/smc_core.h
net/smc/smc_diag.c
net/smc/smc_ib.c
net/smc/smc_llc.c
net/smc/smc_llc.h
net/smc/smc_pnet.c
net/smc/smc_rx.c
net/smc/smc_rx.h
net/smc/smc_tx.c
net/smc/smc_tx.h
net/smc/smc_wr.c
net/strparser/strparser.c
net/sunrpc/xprtrdma/fmr_ops.c
net/sunrpc/xprtrdma/frwr_ops.c
net/sunrpc/xprtrdma/verbs.c
net/sunrpc/xprtrdma/xprt_rdma.h
net/tipc/name_table.c
net/tipc/node.c
net/tipc/node.h
net/tipc/socket.c
net/tls/Kconfig
net/tls/Makefile
net/tls/tls_device.c [new file with mode: 0644]
net/tls/tls_device_fallback.c [new file with mode: 0644]
net/tls/tls_main.c
net/tls/tls_sw.c
net/wireless/core.c
net/wireless/nl80211.c
net/wireless/reg.c
net/xdp/Kconfig [new file with mode: 0644]
net/xdp/Makefile [new file with mode: 0644]
net/xdp/xdp_umem.c [new file with mode: 0644]
net/xdp/xdp_umem.h [new file with mode: 0644]
net/xdp/xdp_umem_props.h [new file with mode: 0644]
net/xdp/xsk.c [new file with mode: 0644]
net/xdp/xsk_queue.c [new file with mode: 0644]
net/xdp/xsk_queue.h [new file with mode: 0644]
net/xfrm/xfrm_state.c
samples/bpf/Makefile
samples/bpf/bpf_insn.h [moved from samples/bpf/libbpf.h with 98% similarity]
samples/bpf/bpf_load.c
samples/bpf/bpf_load.h
samples/bpf/cookie_uid_helper_example.c
samples/bpf/cpustat_user.c
samples/bpf/fds_example.c
samples/bpf/lathist_user.c
samples/bpf/load_sock_ops.c
samples/bpf/lwt_len_hist_user.c
samples/bpf/map_perf_test_user.c
samples/bpf/offwaketime_user.c
samples/bpf/sampleip_user.c
samples/bpf/sock_example.c
samples/bpf/sock_example.h
samples/bpf/sockex1_user.c
samples/bpf/sockex2_user.c
samples/bpf/sockex3_user.c
samples/bpf/spintest_user.c
samples/bpf/syscall_tp_user.c
samples/bpf/tc_l2_redirect_user.c
samples/bpf/test_cgrp2_array_pin.c
samples/bpf/test_cgrp2_attach.c
samples/bpf/test_cgrp2_attach2.c
samples/bpf/test_cgrp2_sock.c
samples/bpf/test_cgrp2_sock2.c
samples/bpf/test_current_task_under_cgroup_user.c
samples/bpf/test_lru_dist.c
samples/bpf/test_map_in_map_user.c
samples/bpf/test_overhead_user.c
samples/bpf/test_probe_write_user_user.c
samples/bpf/test_tunnel_bpf.sh [deleted file]
samples/bpf/trace_event_user.c
samples/bpf/trace_output_user.c
samples/bpf/tracex1_user.c
samples/bpf/tracex2_user.c
samples/bpf/tracex3_user.c
samples/bpf/tracex4_user.c
samples/bpf/tracex5_user.c
samples/bpf/tracex6_user.c
samples/bpf/tracex7_user.c
samples/bpf/xdp1_user.c
samples/bpf/xdp_adjust_tail_user.c
samples/bpf/xdp_fwd_kern.c [new file with mode: 0644]
samples/bpf/xdp_fwd_user.c [new file with mode: 0644]
samples/bpf/xdp_monitor_user.c
samples/bpf/xdp_redirect_cpu_user.c
samples/bpf/xdp_redirect_map_user.c
samples/bpf/xdp_redirect_user.c
samples/bpf/xdp_router_ipv4_user.c
samples/bpf/xdp_rxq_info_user.c
samples/bpf/xdp_tx_iptunnel_user.c
samples/bpf/xdpsock.h [new file with mode: 0644]
samples/bpf/xdpsock_kern.c [new file with mode: 0644]
samples/bpf/xdpsock_user.c [new file with mode: 0644]
samples/sockmap/Makefile [deleted file]
samples/sockmap/sockmap_test.sh [deleted file]
scripts/Makefile.gcc-plugins
scripts/Makefile.lib
scripts/bpf_helpers_doc.py [new file with mode: 0755]
scripts/dtc/checks.c
scripts/extract_xc3028.pl
scripts/faddr2line
scripts/genksyms/Makefile
scripts/mod/sumversion.c
scripts/split-man.pl
security/commoncap.c
security/selinux/hooks.c
security/selinux/include/classmap.h
sound/core/control.c
sound/core/control_compat.c
sound/core/pcm_compat.c
sound/core/pcm_native.c
sound/core/rawmidi_compat.c
sound/core/seq/oss/seq_oss_event.c
sound/core/seq/oss/seq_oss_midi.c
sound/core/seq/oss/seq_oss_synth.c
sound/core/seq/oss/seq_oss_synth.h
sound/core/seq/seq_virmidi.c
sound/drivers/aloop.c
sound/drivers/opl3/opl3_synth.c
sound/firewire/amdtp-stream.c
sound/firewire/dice/dice-stream.c
sound/firewire/dice/dice.c
sound/pci/asihpi/hpimsginit.c
sound/pci/asihpi/hpioctl.c
sound/pci/hda/hda_hwdep.c
sound/pci/hda/hda_intel.c
sound/pci/hda/patch_hdmi.c
sound/pci/hda/patch_realtek.c
sound/pci/rme9652/hdspm.c
sound/pci/rme9652/rme9652.c
sound/soc/amd/acp-da7219-max98357a.c
sound/soc/codecs/adau17x1.c
sound/soc/codecs/adau17x1.h
sound/soc/codecs/msm8916-wcd-analog.c
sound/soc/codecs/rt5514.c
sound/soc/fsl/fsl_esai.c
sound/soc/fsl/fsl_ssi.c
sound/soc/intel/Kconfig
sound/soc/omap/omap-dmic.c
sound/soc/sh/rcar/core.c
sound/soc/soc-topology.c
sound/usb/line6/midi.c
sound/usb/mixer.c
sound/usb/mixer_maps.c
sound/usb/stream.c
sound/usb/usx2y/us122l.c
sound/usb/usx2y/usX2Yhwdep.c
sound/usb/usx2y/usx2yhwdeppcm.c
tools/arch/arm/include/uapi/asm/kvm.h
tools/arch/arm64/include/uapi/asm/kvm.h
tools/arch/x86/include/asm/cpufeatures.h
tools/arch/x86/include/asm/required-features.h
tools/arch/x86/include/uapi/asm/kvm.h
tools/bpf/Makefile
tools/bpf/bpf_dbg.c
tools/bpf/bpftool/.gitignore [new file with mode: 0644]
tools/bpf/bpftool/Documentation/bpftool-map.rst
tools/bpf/bpftool/Documentation/bpftool-prog.rst
tools/bpf/bpftool/Documentation/bpftool.rst
tools/bpf/bpftool/Makefile
tools/bpf/bpftool/bash-completion/bpftool
tools/bpf/bpftool/common.c
tools/bpf/bpftool/main.h
tools/bpf/bpftool/map.c
tools/bpf/bpftool/map_perf_ring.c [new file with mode: 0644]
tools/bpf/bpftool/prog.c
tools/include/linux/compiler.h
tools/include/linux/coresight-pmu.h
tools/include/linux/spinlock.h
tools/include/uapi/asm-generic/mman-common.h
tools/include/uapi/asm/bitsperlong.h [new file with mode: 0644]
tools/include/uapi/asm/errno.h [new file with mode: 0644]
tools/include/uapi/linux/bpf.h
tools/include/uapi/linux/btf.h
tools/include/uapi/linux/erspan.h [new file with mode: 0644]
tools/include/uapi/linux/if_link.h
tools/include/uapi/linux/kvm.h
tools/include/uapi/linux/perf_event.h
tools/include/uapi/sound/asound.h
tools/lib/bpf/Makefile
tools/lib/bpf/bpf.c
tools/lib/bpf/bpf.h
tools/lib/bpf/btf.c
tools/lib/bpf/libbpf.c
tools/lib/bpf/libbpf.h
tools/lib/subcmd/parse-options.c
tools/objtool/Makefile
tools/objtool/arch/x86/include/asm/insn.h
tools/objtool/check.c
tools/objtool/elf.c
tools/objtool/elf.h
tools/perf/Documentation/perf-config.txt
tools/perf/Documentation/perf-mem.txt
tools/perf/Documentation/perf-sched.txt
tools/perf/Documentation/perf-script.txt
tools/perf/Documentation/perf-stat.txt
tools/perf/Makefile.config
tools/perf/arch/arm/include/arch-tests.h [new file with mode: 0644]
tools/perf/arch/arm/tests/Build
tools/perf/arch/arm/tests/arch-tests.c [new file with mode: 0644]
tools/perf/arch/arm/util/auxtrace.c
tools/perf/arch/arm/util/cs-etm.c
tools/perf/arch/arm/util/cs-etm.h
tools/perf/arch/arm/util/pmu.c
tools/perf/arch/s390/util/auxtrace.c
tools/perf/arch/s390/util/header.c
tools/perf/arch/x86/Makefile
tools/perf/arch/x86/annotate/instructions.c
tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
tools/perf/bench/numa.c
tools/perf/builtin-help.c
tools/perf/builtin-mem.c
tools/perf/builtin-script.c
tools/perf/builtin-stat.c
tools/perf/builtin-version.c
tools/perf/perf.c
tools/perf/pmu-events/arch/s390/mapfile.csv
tools/perf/pmu-events/arch/x86/mapfile.csv
tools/perf/tests/attr/test-record-group-sampling
tools/perf/tests/bpf-script-example.c
tools/perf/tests/bpf-script-test-kbuild.c
tools/perf/tests/builtin-test.c
tools/perf/tests/mmap-basic.c
tools/perf/tests/shell/record+probe_libc_inet_pton.sh
tools/perf/trace/beauty/mmap.c
tools/perf/ui/browsers/annotate.c
tools/perf/ui/browsers/hists.c
tools/perf/util/annotate.c
tools/perf/util/annotate.h
tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
tools/perf/util/cs-etm.c
tools/perf/util/cs-etm.h
tools/perf/util/event.c
tools/perf/util/evsel.c
tools/perf/util/evsel.h
tools/perf/util/generate-cmdlist.sh
tools/perf/util/header.c
tools/perf/util/machine.c
tools/perf/util/parse-events.c
tools/perf/util/pmu.c
tools/perf/util/symbol.c
tools/perf/util/syscalltbl.c
tools/perf/util/trace-event-scripting.c
tools/power/acpi/Makefile.config
tools/testing/nvdimm/test/nfit.c
tools/testing/radix-tree/Makefile
tools/testing/radix-tree/multiorder.c
tools/testing/radix-tree/test.c
tools/testing/radix-tree/test.h
tools/testing/selftests/bpf/.gitignore
tools/testing/selftests/bpf/Makefile
tools/testing/selftests/bpf/bpf_helpers.h
tools/testing/selftests/bpf/bpf_rand.h [new file with mode: 0644]
tools/testing/selftests/bpf/test_btf.c
tools/testing/selftests/bpf/test_get_stack_rawtp.c [new file with mode: 0644]
tools/testing/selftests/bpf/test_progs.c
tools/testing/selftests/bpf/test_sock.c
tools/testing/selftests/bpf/test_sock_addr.c
tools/testing/selftests/bpf/test_sock_addr.sh
tools/testing/selftests/bpf/test_sockhash_kern.c [new file with mode: 0644]
tools/testing/selftests/bpf/test_sockmap.c [moved from samples/sockmap/sockmap_user.c with 56% similarity]
tools/testing/selftests/bpf/test_sockmap_kern.c [new file with mode: 0644]
tools/testing/selftests/bpf/test_sockmap_kern.h [moved from samples/sockmap/sockmap_kern.c with 88% similarity]
tools/testing/selftests/bpf/test_stacktrace_build_id.c
tools/testing/selftests/bpf/test_stacktrace_map.c
tools/testing/selftests/bpf/test_tunnel.sh [new file with mode: 0755]
tools/testing/selftests/bpf/test_tunnel_kern.c [moved from samples/bpf/tcbpf2_kern.c with 68% similarity]
tools/testing/selftests/bpf/test_verifier.c
tools/testing/selftests/bpf/trace_helpers.c [new file with mode: 0644]
tools/testing/selftests/bpf/trace_helpers.h [new file with mode: 0644]
tools/testing/selftests/bpf/urandom_read.c
tools/testing/selftests/filesystems/Makefile
tools/testing/selftests/firmware/Makefile
tools/testing/selftests/firmware/fw_lib.sh
tools/testing/selftests/firmware/fw_run_tests.sh
tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc
tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-multi-actions-accept.tc [new file with mode: 0644]
tools/testing/selftests/kvm/Makefile
tools/testing/selftests/kvm/include/test_util.h
tools/testing/selftests/kvm/lib/kvm_util.c
tools/testing/selftests/kvm/sync_regs_test.c
tools/testing/selftests/kvm/vmx_tsc_adjust_test.c
tools/testing/selftests/lib.mk
tools/testing/selftests/net/.gitignore
tools/testing/selftests/net/Makefile
tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh
tools/testing/selftests/net/forwarding/lib.sh
tools/testing/selftests/net/forwarding/mirror_gre.sh [new file with mode: 0755]
tools/testing/selftests/net/forwarding/mirror_gre_bound.sh [new file with mode: 0755]
tools/testing/selftests/net/forwarding/mirror_gre_changes.sh [new file with mode: 0755]
tools/testing/selftests/net/forwarding/mirror_gre_flower.sh [new file with mode: 0755]
tools/testing/selftests/net/forwarding/mirror_gre_lib.sh [new file with mode: 0644]
tools/testing/selftests/net/forwarding/mirror_gre_neigh.sh [new file with mode: 0755]
tools/testing/selftests/net/forwarding/mirror_gre_nh.sh [new file with mode: 0755]
tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh [new file with mode: 0644]
tools/testing/selftests/net/forwarding/mirror_lib.sh [new file with mode: 0644]
tools/testing/selftests/net/forwarding/router.sh
tools/testing/selftests/net/forwarding/router_multipath.sh
tools/testing/selftests/net/forwarding/tc_actions.sh
tools/testing/selftests/net/forwarding/tc_chains.sh
tools/testing/selftests/net/forwarding/tc_flower.sh
tools/testing/selftests/net/forwarding/tc_shblocks.sh
tools/testing/selftests/net/pmtu.sh
tools/testing/selftests/net/rtnetlink.sh
tools/testing/selftests/net/tcp_inq.c [new file with mode: 0644]
tools/testing/selftests/net/tcp_mmap.c
tools/testing/selftests/net/udpgso.c [new file with mode: 0644]
tools/testing/selftests/net/udpgso.sh [new file with mode: 0755]
tools/testing/selftests/net/udpgso_bench.sh [new file with mode: 0755]
tools/testing/selftests/net/udpgso_bench_rx.c [new file with mode: 0644]
tools/testing/selftests/net/udpgso_bench_tx.c [new file with mode: 0644]
tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json
tools/testing/selftests/tc-testing/tc-tests/actions/csum.json
tools/testing/selftests/tc-testing/tc-tests/actions/ife.json
tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json
tools/testing/selftests/tc-testing/tc-tests/actions/police.json
tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json
tools/testing/selftests/x86/Makefile
tools/testing/selftests/x86/mov_ss_trap.c [new file with mode: 0644]
tools/testing/selftests/x86/mpx-mini-test.c
tools/testing/selftests/x86/pkey-helpers.h
tools/testing/selftests/x86/protection_keys.c
tools/testing/selftests/x86/test_syscall_vdso.c
virt/kvm/arm/arm.c
virt/kvm/arm/psci.c
virt/kvm/arm/vgic/vgic-debug.c
virt/kvm/arm/vgic/vgic-init.c
virt/kvm/arm/vgic/vgic-its.c
virt/kvm/arm/vgic/vgic-mmio-v2.c
virt/kvm/arm/vgic/vgic-mmio.c
virt/kvm/arm/vgic/vgic-v2.c
virt/kvm/arm/vgic/vgic-v3.c
virt/kvm/arm/vgic/vgic.c
virt/kvm/arm/vgic/vgic.h

index 640f65e79ef1c00c94508b6b9f9fe8b63a1305a6..8e69345c37ccd9e1d74bd86f5858a45b1573f838 100644 (file)
@@ -244,3 +244,11 @@ Description:    read only
                 Returns 1 if the psl timebase register is synchronized
                 with the core timebase register, 0 otherwise.
 Users:          https://github.com/ibm-capi/libcxl
+
+What:           /sys/class/cxl/<card>/tunneled_ops_supported
+Date:           May 2018
+Contact:        linuxppc-dev@lists.ozlabs.org
+Description:    read only
+                Returns 1 if tunneled operations are supported in capi mode,
+                0 otherwise.
+Users:          https://github.com/ibm-capi/libcxl
index d2b6fda3d67b97eeeb44c0c045eb3e5963f9233f..ab2fe0eda1d7c317faefab52363ce96755ac64d5 100644 (file)
@@ -145,7 +145,7 @@ feature enabled.]
 
 In this mode ``intel_pstate`` registers utilization update callbacks with the
 CPU scheduler in order to run a P-state selection algorithm, either
-``powersave`` or ``performance``, depending on the ``scaling_cur_freq`` policy
+``powersave`` or ``performance``, depending on the ``scaling_governor`` policy
 setting in ``sysfs``.  The current CPU frequency information to be made
 available from the ``scaling_cur_freq`` policy attribute in ``sysfs`` is
 periodically updated by those utilization update callbacks too.
index 1e5c0f00cb2fee319032ea0b27e93a03eef8aee6..dbf5acd49f350de349bf228e9b7c824e2cfd04b1 100644 (file)
@@ -15,7 +15,7 @@ Sleep States That Can Be Supported
 ==================================
 
 Depending on its configuration and the capabilities of the platform it runs on,
-the Linux kernel can support up to four system sleep states, includig
+the Linux kernel can support up to four system sleep states, including
 hibernation and up to three variants of system suspend.  The sleep states that
 can be supported by the kernel are listed below.
 
diff --git a/Documentation/bpf/README.rst b/Documentation/bpf/README.rst
new file mode 100644 (file)
index 0000000..b9a80c9
--- /dev/null
@@ -0,0 +1,36 @@
+=================
+BPF documentation
+=================
+
+This directory contains documentation for the BPF (Berkeley Packet
+Filter) facility, with a focus on the extended BPF version (eBPF).
+
+This kernel side documentation is still work in progress.  The main
+textual documentation is (for historical reasons) described in
+`Documentation/networking/filter.txt`_, which describe both classical
+and extended BPF instruction-set.
+The Cilium project also maintains a `BPF and XDP Reference Guide`_
+that goes into great technical depth about the BPF Architecture.
+
+The primary info for the bpf syscall is available in the `man-pages`_
+for `bpf(2)`_.
+
+
+
+Frequently asked questions (FAQ)
+================================
+
+Two sets of Questions and Answers (Q&A) are maintained.
+
+* QA for common questions about BPF see: bpf_design_QA_
+
+* QA for developers interacting with BPF subsystem: bpf_devel_QA_
+
+
+.. Links:
+.. _bpf_design_QA: bpf_design_QA.rst
+.. _bpf_devel_QA:  bpf_devel_QA.rst
+.. _Documentation/networking/filter.txt: ../networking/filter.txt
+.. _man-pages: https://www.kernel.org/doc/man-pages/
+.. _bpf(2): http://man7.org/linux/man-pages/man2/bpf.2.html
+.. _BPF and XDP Reference Guide: http://cilium.readthedocs.io/en/latest/bpf/
diff --git a/Documentation/bpf/bpf_design_QA.rst b/Documentation/bpf/bpf_design_QA.rst
new file mode 100644 (file)
index 0000000..6780a6d
--- /dev/null
@@ -0,0 +1,221 @@
+==============
+BPF Design Q&A
+==============
+
+BPF extensibility and applicability to networking, tracing, security
+in the linux kernel and several user space implementations of BPF
+virtual machine led to a number of misunderstanding on what BPF actually is.
+This short QA is an attempt to address that and outline a direction
+of where BPF is heading long term.
+
+.. contents::
+    :local:
+    :depth: 3
+
+Questions and Answers
+=====================
+
+Q: Is BPF a generic instruction set similar to x64 and arm64?
+-------------------------------------------------------------
+A: NO.
+
+Q: Is BPF a generic virtual machine ?
+-------------------------------------
+A: NO.
+
+BPF is generic instruction set *with* C calling convention.
+-----------------------------------------------------------
+
+Q: Why C calling convention was chosen?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+A: Because BPF programs are designed to run in the linux kernel
+which is written in C, hence BPF defines instruction set compatible
+with two most used architectures x64 and arm64 (and takes into
+consideration important quirks of other architectures) and
+defines calling convention that is compatible with C calling
+convention of the linux kernel on those architectures.
+
+Q: can multiple return values be supported in the future?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+A: NO. BPF allows only register R0 to be used as return value.
+
+Q: can more than 5 function arguments be supported in the future?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+A: NO. BPF calling convention only allows registers R1-R5 to be used
+as arguments. BPF is not a standalone instruction set.
+(unlike x64 ISA that allows msft, cdecl and other conventions)
+
+Q: can BPF programs access instruction pointer or return address?
+-----------------------------------------------------------------
+A: NO.
+
+Q: can BPF programs access stack pointer ?
+------------------------------------------
+A: NO.
+
+Only frame pointer (register R10) is accessible.
+From compiler point of view it's necessary to have stack pointer.
+For example LLVM defines register R11 as stack pointer in its
+BPF backend, but it makes sure that generated code never uses it.
+
+Q: Does C-calling convention diminishes possible use cases?
+-----------------------------------------------------------
+A: YES.
+
+BPF design forces addition of major functionality in the form
+of kernel helper functions and kernel objects like BPF maps with
+seamless interoperability between them. It lets kernel call into
+BPF programs and programs call kernel helpers with zero overhead.
+As all of them were native C code. That is particularly the case
+for JITed BPF programs that are indistinguishable from
+native kernel C code.
+
+Q: Does it mean that 'innovative' extensions to BPF code are disallowed?
+------------------------------------------------------------------------
+A: Soft yes.
+
+At least for now until BPF core has support for
+bpf-to-bpf calls, indirect calls, loops, global variables,
+jump tables, read only sections and all other normal constructs
+that C code can produce.
+
+Q: Can loops be supported in a safe way?
+----------------------------------------
+A: It's not clear yet.
+
+BPF developers are trying to find a way to
+support bounded loops where the verifier can guarantee that
+the program terminates in less than 4096 instructions.
+
+Instruction level questions
+---------------------------
+
+Q: LD_ABS and LD_IND instructions vs C code
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Q: How come LD_ABS and LD_IND instruction are present in BPF whereas
+C code cannot express them and has to use builtin intrinsics?
+
+A: This is artifact of compatibility with classic BPF. Modern
+networking code in BPF performs better without them.
+See 'direct packet access'.
+
+Q: BPF instructions mapping not one-to-one to native CPU
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Q: It seems not all BPF instructions are one-to-one to native CPU.
+For example why BPF_JNE and other compare and jumps are not cpu-like?
+
+A: This was necessary to avoid introducing flags into ISA which are
+impossible to make generic and efficient across CPU architectures.
+
+Q: why BPF_DIV instruction doesn't map to x64 div?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+A: Because if we picked one-to-one relationship to x64 it would have made
+it more complicated to support on arm64 and other archs. Also it
+needs div-by-zero runtime check.
+
+Q: why there is no BPF_SDIV for signed divide operation?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+A: Because it would be rarely used. llvm errors in such case and
+prints a suggestion to use unsigned divide instead
+
+Q: Why BPF has implicit prologue and epilogue?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+A: Because architectures like sparc have register windows and in general
+there are enough subtle differences between architectures, so naive
+store return address into stack won't work. Another reason is BPF has
+to be safe from division by zero (and legacy exception path
+of LD_ABS insn). Those instructions need to invoke epilogue and
+return implicitly.
+
+Q: Why BPF_JLT and BPF_JLE instructions were not introduced in the beginning?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+A: Because classic BPF didn't have them and BPF authors felt that compiler
+workaround would be acceptable. Turned out that programs lose performance
+due to lack of these compare instructions and they were added.
+These two instructions is a perfect example what kind of new BPF
+instructions are acceptable and can be added in the future.
+These two already had equivalent instructions in native CPUs.
+New instructions that don't have one-to-one mapping to HW instructions
+will not be accepted.
+
+Q: BPF 32-bit subregister requirements
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Q: BPF 32-bit subregisters have a requirement to zero upper 32-bits of BPF
+registers which makes BPF inefficient virtual machine for 32-bit
+CPU architectures and 32-bit HW accelerators. Can true 32-bit registers
+be added to BPF in the future?
+
+A: NO. The first thing to improve performance on 32-bit archs is to teach
+LLVM to generate code that uses 32-bit subregisters. Then second step
+is to teach verifier to mark operations where zero-ing upper bits
+is unnecessary. Then JITs can take advantage of those markings and
+drastically reduce size of generated code and improve performance.
+
+Q: Does BPF have a stable ABI?
+------------------------------
+A: YES. BPF instructions, arguments to BPF programs, set of helper
+functions and their arguments, recognized return codes are all part
+of ABI. However when tracing programs are using bpf_probe_read() helper
+to walk kernel internal datastructures and compile with kernel
+internal headers these accesses can and will break with newer
+kernels. The union bpf_attr -> kern_version is checked at load time
+to prevent accidentally loading kprobe-based bpf programs written
+for a different kernel. Networking programs don't do kern_version check.
+
+Q: How much stack space a BPF program uses?
+-------------------------------------------
+A: Currently all program types are limited to 512 bytes of stack
+space, but the verifier computes the actual amount of stack used
+and both interpreter and most JITed code consume necessary amount.
+
+Q: Can BPF be offloaded to HW?
+------------------------------
+A: YES. BPF HW offload is supported by NFP driver.
+
+Q: Does classic BPF interpreter still exist?
+--------------------------------------------
+A: NO. Classic BPF programs are converted into extend BPF instructions.
+
+Q: Can BPF call arbitrary kernel functions?
+-------------------------------------------
+A: NO. BPF programs can only call a set of helper functions which
+is defined for every program type.
+
+Q: Can BPF overwrite arbitrary kernel memory?
+---------------------------------------------
+A: NO.
+
+Tracing bpf programs can *read* arbitrary memory with bpf_probe_read()
+and bpf_probe_read_str() helpers. Networking programs cannot read
+arbitrary memory, since they don't have access to these helpers.
+Programs can never read or write arbitrary memory directly.
+
+Q: Can BPF overwrite arbitrary user memory?
+-------------------------------------------
+A: Sort-of.
+
+Tracing BPF programs can overwrite the user memory
+of the current task with bpf_probe_write_user(). Every time such
+program is loaded the kernel will print warning message, so
+this helper is only useful for experiments and prototypes.
+Tracing BPF programs are root only.
+
+Q: bpf_trace_printk() helper warning
+------------------------------------
+Q: When bpf_trace_printk() helper is used the kernel prints nasty
+warning message. Why is that?
+
+A: This is done to nudge program authors into better interfaces when
+programs need to pass data to user space. Like bpf_perf_event_output()
+can be used to efficiently stream data via perf ring buffer.
+BPF maps can be used for asynchronous data sharing between kernel
+and user space. bpf_trace_printk() should only be used for debugging.
+
+Q: New functionality via kernel modules?
+----------------------------------------
+Q: Can BPF functionality such as new program or map types, new
+helpers, etc be added out of kernel module code?
+
+A: NO.
diff --git a/Documentation/bpf/bpf_design_QA.txt b/Documentation/bpf/bpf_design_QA.txt
deleted file mode 100644 (file)
index f3e458a..0000000
+++ /dev/null
@@ -1,156 +0,0 @@
-BPF extensibility and applicability to networking, tracing, security
-in the linux kernel and several user space implementations of BPF
-virtual machine led to a number of misunderstanding on what BPF actually is.
-This short QA is an attempt to address that and outline a direction
-of where BPF is heading long term.
-
-Q: Is BPF a generic instruction set similar to x64 and arm64?
-A: NO.
-
-Q: Is BPF a generic virtual machine ?
-A: NO.
-
-BPF is generic instruction set _with_ C calling convention.
-
-Q: Why C calling convention was chosen?
-A: Because BPF programs are designed to run in the linux kernel
-   which is written in C, hence BPF defines instruction set compatible
-   with two most used architectures x64 and arm64 (and takes into
-   consideration important quirks of other architectures) and
-   defines calling convention that is compatible with C calling
-   convention of the linux kernel on those architectures.
-
-Q: can multiple return values be supported in the future?
-A: NO. BPF allows only register R0 to be used as return value.
-
-Q: can more than 5 function arguments be supported in the future?
-A: NO. BPF calling convention only allows registers R1-R5 to be used
-   as arguments. BPF is not a standalone instruction set.
-   (unlike x64 ISA that allows msft, cdecl and other conventions)
-
-Q: can BPF programs access instruction pointer or return address?
-A: NO.
-
-Q: can BPF programs access stack pointer ?
-A: NO. Only frame pointer (register R10) is accessible.
-   From compiler point of view it's necessary to have stack pointer.
-   For example LLVM defines register R11 as stack pointer in its
-   BPF backend, but it makes sure that generated code never uses it.
-
-Q: Does C-calling convention diminishes possible use cases?
-A: YES. BPF design forces addition of major functionality in the form
-   of kernel helper functions and kernel objects like BPF maps with
-   seamless interoperability between them. It lets kernel call into
-   BPF programs and programs call kernel helpers with zero overhead.
-   As all of them were native C code. That is particularly the case
-   for JITed BPF programs that are indistinguishable from
-   native kernel C code.
-
-Q: Does it mean that 'innovative' extensions to BPF code are disallowed?
-A: Soft yes. At least for now until BPF core has support for
-   bpf-to-bpf calls, indirect calls, loops, global variables,
-   jump tables, read only sections and all other normal constructs
-   that C code can produce.
-
-Q: Can loops be supported in a safe way?
-A: It's not clear yet. BPF developers are trying to find a way to
-   support bounded loops where the verifier can guarantee that
-   the program terminates in less than 4096 instructions.
-
-Q: How come LD_ABS and LD_IND instruction are present in BPF whereas
-   C code cannot express them and has to use builtin intrinsics?
-A: This is artifact of compatibility with classic BPF. Modern
-   networking code in BPF performs better without them.
-   See 'direct packet access'.
-
-Q: It seems not all BPF instructions are one-to-one to native CPU.
-   For example why BPF_JNE and other compare and jumps are not cpu-like?
-A: This was necessary to avoid introducing flags into ISA which are
-   impossible to make generic and efficient across CPU architectures.
-
-Q: why BPF_DIV instruction doesn't map to x64 div?
-A: Because if we picked one-to-one relationship to x64 it would have made
-   it more complicated to support on arm64 and other archs. Also it
-   needs div-by-zero runtime check.
-
-Q: why there is no BPF_SDIV for signed divide operation?
-A: Because it would be rarely used. llvm errors in such case and
-   prints a suggestion to use unsigned divide instead
-
-Q: Why BPF has implicit prologue and epilogue?
-A: Because architectures like sparc have register windows and in general
-   there are enough subtle differences between architectures, so naive
-   store return address into stack won't work. Another reason is BPF has
-   to be safe from division by zero (and legacy exception path
-   of LD_ABS insn). Those instructions need to invoke epilogue and
-   return implicitly.
-
-Q: Why BPF_JLT and BPF_JLE instructions were not introduced in the beginning?
-A: Because classic BPF didn't have them and BPF authors felt that compiler
-   workaround would be acceptable. Turned out that programs lose performance
-   due to lack of these compare instructions and they were added.
-   These two instructions is a perfect example what kind of new BPF
-   instructions are acceptable and can be added in the future.
-   These two already had equivalent instructions in native CPUs.
-   New instructions that don't have one-to-one mapping to HW instructions
-   will not be accepted.
-
-Q: BPF 32-bit subregisters have a requirement to zero upper 32-bits of BPF
-   registers which makes BPF inefficient virtual machine for 32-bit
-   CPU architectures and 32-bit HW accelerators. Can true 32-bit registers
-   be added to BPF in the future?
-A: NO. The first thing to improve performance on 32-bit archs is to teach
-   LLVM to generate code that uses 32-bit subregisters. Then second step
-   is to teach verifier to mark operations where zero-ing upper bits
-   is unnecessary. Then JITs can take advantage of those markings and
-   drastically reduce size of generated code and improve performance.
-
-Q: Does BPF have a stable ABI?
-A: YES. BPF instructions, arguments to BPF programs, set of helper
-   functions and their arguments, recognized return codes are all part
-   of ABI. However when tracing programs are using bpf_probe_read() helper
-   to walk kernel internal datastructures and compile with kernel
-   internal headers these accesses can and will break with newer
-   kernels. The union bpf_attr -> kern_version is checked at load time
-   to prevent accidentally loading kprobe-based bpf programs written
-   for a different kernel. Networking programs don't do kern_version check.
-
-Q: How much stack space a BPF program uses?
-A: Currently all program types are limited to 512 bytes of stack
-   space, but the verifier computes the actual amount of stack used
-   and both interpreter and most JITed code consume necessary amount.
-
-Q: Can BPF be offloaded to HW?
-A: YES. BPF HW offload is supported by NFP driver.
-
-Q: Does classic BPF interpreter still exist?
-A: NO. Classic BPF programs are converted into extend BPF instructions.
-
-Q: Can BPF call arbitrary kernel functions?
-A: NO. BPF programs can only call a set of helper functions which
-   is defined for every program type.
-
-Q: Can BPF overwrite arbitrary kernel memory?
-A: NO. Tracing bpf programs can _read_ arbitrary memory with bpf_probe_read()
-   and bpf_probe_read_str() helpers. Networking programs cannot read
-   arbitrary memory, since they don't have access to these helpers.
-   Programs can never read or write arbitrary memory directly.
-
-Q: Can BPF overwrite arbitrary user memory?
-A: Sort-of. Tracing BPF programs can overwrite the user memory
-   of the current task with bpf_probe_write_user(). Every time such
-   program is loaded the kernel will print warning message, so
-   this helper is only useful for experiments and prototypes.
-   Tracing BPF programs are root only.
-
-Q: When bpf_trace_printk() helper is used the kernel prints nasty
-   warning message. Why is that?
-A: This is done to nudge program authors into better interfaces when
-   programs need to pass data to user space. Like bpf_perf_event_output()
-   can be used to efficiently stream data via perf ring buffer.
-   BPF maps can be used for asynchronous data sharing between kernel
-   and user space. bpf_trace_printk() should only be used for debugging.
-
-Q: Can BPF functionality such as new program or map types, new
-   helpers, etc be added out of kernel module code?
-A: NO.
diff --git a/Documentation/bpf/bpf_devel_QA.rst b/Documentation/bpf/bpf_devel_QA.rst
new file mode 100644 (file)
index 0000000..0e7c1d9
--- /dev/null
@@ -0,0 +1,640 @@
+=================================
+HOWTO interact with BPF subsystem
+=================================
+
+This document provides information for the BPF subsystem about various
+workflows related to reporting bugs, submitting patches, and queueing
+patches for stable kernels.
+
+For general information about submitting patches, please refer to
+`Documentation/process/`_. This document only describes additional specifics
+related to BPF.
+
+.. contents::
+    :local:
+    :depth: 2
+
+Reporting bugs
+==============
+
+Q: How do I report bugs for BPF kernel code?
+--------------------------------------------
+A: Since all BPF kernel development as well as bpftool and iproute2 BPF
+loader development happens through the netdev kernel mailing list,
+please report any found issues around BPF to the following mailing
+list:
+
+ netdev@vger.kernel.org
+
+This may also include issues related to XDP, BPF tracing, etc.
+
+Given netdev has a high volume of traffic, please also add the BPF
+maintainers to Cc (from kernel MAINTAINERS_ file):
+
+* Alexei Starovoitov <ast@kernel.org>
+* Daniel Borkmann <daniel@iogearbox.net>
+
+In case a buggy commit has already been identified, make sure to keep
+the actual commit authors in Cc as well for the report. They can
+typically be identified through the kernel's git tree.
+
+**Please do NOT report BPF issues to bugzilla.kernel.org since it
+is a guarantee that the reported issue will be overlooked.**
+
+Submitting patches
+==================
+
+Q: To which mailing list do I need to submit my BPF patches?
+------------------------------------------------------------
+A: Please submit your BPF patches to the netdev kernel mailing list:
+
+ netdev@vger.kernel.org
+
+Historically, BPF came out of networking and has always been maintained
+by the kernel networking community. Although these days BPF touches
+many other subsystems as well, the patches are still routed mainly
+through the networking community.
+
+In case your patch has changes in various different subsystems (e.g.
+tracing, security, etc), make sure to Cc the related kernel mailing
+lists and maintainers from there as well, so they are able to review
+the changes and provide their Acked-by's to the patches.
+
+Q: Where can I find patches currently under discussion for BPF subsystem?
+-------------------------------------------------------------------------
+A: All patches that are Cc'ed to netdev are queued for review under netdev
+patchwork project:
+
+  http://patchwork.ozlabs.org/project/netdev/list/
+
+Those patches which target BPF, are assigned to a 'bpf' delegate for
+further processing from BPF maintainers. The current queue with
+patches under review can be found at:
+
+  https://patchwork.ozlabs.org/project/netdev/list/?delegate=77147
+
+Once the patches have been reviewed by the BPF community as a whole
+and approved by the BPF maintainers, their status in patchwork will be
+changed to 'Accepted' and the submitter will be notified by mail. This
+means that the patches look good from a BPF perspective and have been
+applied to one of the two BPF kernel trees.
+
+In case feedback from the community requires a respin of the patches,
+their status in patchwork will be set to 'Changes Requested', and purged
+from the current review queue. Likewise for cases where patches would
+get rejected or are not applicable to the BPF trees (but assigned to
+the 'bpf' delegate).
+
+Q: How do the changes make their way into Linux?
+------------------------------------------------
+A: There are two BPF kernel trees (git repositories). Once patches have
+been accepted by the BPF maintainers, they will be applied to one
+of the two BPF trees:
+
+ * https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf.git/
+ * https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/
+
+The bpf tree itself is for fixes only, whereas bpf-next for features,
+cleanups or other kind of improvements ("next-like" content). This is
+analogous to net and net-next trees for networking. Both bpf and
+bpf-next will only have a master branch in order to simplify against
+which branch patches should get rebased to.
+
+Accumulated BPF patches in the bpf tree will regularly get pulled
+into the net kernel tree. Likewise, accumulated BPF patches accepted
+into the bpf-next tree will make their way into net-next tree. net and
+net-next are both run by David S. Miller. From there, they will go
+into the kernel mainline tree run by Linus Torvalds. To read up on the
+process of net and net-next being merged into the mainline tree, see
+the `netdev FAQ`_ under:
+
+ `Documentation/networking/netdev-FAQ.txt`_
+
+Occasionally, to prevent merge conflicts, we might send pull requests
+to other trees (e.g. tracing) with a small subset of the patches, but
+net and net-next are always the main trees targeted for integration.
+
+The pull requests will contain a high-level summary of the accumulated
+patches and can be searched on netdev kernel mailing list through the
+following subject lines (``yyyy-mm-dd`` is the date of the pull
+request)::
+
+  pull-request: bpf yyyy-mm-dd
+  pull-request: bpf-next yyyy-mm-dd
+
+Q: How do I indicate which tree (bpf vs. bpf-next) my patch should be applied to?
+---------------------------------------------------------------------------------
+
+A: The process is the very same as described in the `netdev FAQ`_, so
+please read up on it. The subject line must indicate whether the
+patch is a fix or rather "next-like" content in order to let the
+maintainers know whether it is targeted at bpf or bpf-next.
+
+For fixes eventually landing in bpf -> net tree, the subject must
+look like::
+
+  git format-patch --subject-prefix='PATCH bpf' start..finish
+
+For features/improvements/etc that should eventually land in
+bpf-next -> net-next, the subject must look like::
+
+  git format-patch --subject-prefix='PATCH bpf-next' start..finish
+
+If unsure whether the patch or patch series should go into bpf
+or net directly, or bpf-next or net-next directly, it is not a
+problem either if the subject line says net or net-next as target.
+It is eventually up to the maintainers to do the delegation of
+the patches.
+
+If it is clear that patches should go into bpf or bpf-next tree,
+please make sure to rebase the patches against those trees in
+order to reduce potential conflicts.
+
+In case the patch or patch series has to be reworked and sent out
+again in a second or later revision, it is also required to add a
+version number (``v2``, ``v3``, ...) into the subject prefix::
+
+  git format-patch --subject-prefix='PATCH net-next v2' start..finish
+
+When changes have been requested to the patch series, always send the
+whole patch series again with the feedback incorporated (never send
+individual diffs on top of the old series).
+
+Q: What does it mean when a patch gets applied to bpf or bpf-next tree?
+-----------------------------------------------------------------------
+A: It means that the patch looks good for mainline inclusion from
+a BPF point of view.
+
+Be aware that this is not a final verdict that the patch will
+automatically get accepted into net or net-next trees eventually:
+
+On the netdev kernel mailing list reviews can come in at any point
+in time. If discussions around a patch conclude that they cannot
+get included as-is, we will either apply a follow-up fix or drop
+them from the trees entirely. Therefore, we also reserve to rebase
+the trees when deemed necessary. After all, the purpose of the tree
+is to:
+
+i) accumulate and stage BPF patches for integration into trees
+   like net and net-next, and
+
+ii) run extensive BPF test suite and
+    workloads on the patches before they make their way any further.
+
+Once the BPF pull request was accepted by David S. Miller, then
+the patches end up in net or net-next tree, respectively, and
+make their way from there further into mainline. Again, see the
+`netdev FAQ`_ for additional information e.g. on how often they are
+merged to mainline.
+
+Q: How long do I need to wait for feedback on my BPF patches?
+-------------------------------------------------------------
+A: We try to keep the latency low. The usual time to feedback will
+be around 2 or 3 business days. It may vary depending on the
+complexity of changes and current patch load.
+
+Q: How often do you send pull requests to major kernel trees like net or net-next?
+----------------------------------------------------------------------------------
+
+A: Pull requests will be sent out rather often in order to not
+accumulate too many patches in bpf or bpf-next.
+
+As a rule of thumb, expect pull requests for each tree regularly
+at the end of the week. In some cases pull requests could additionally
+come also in the middle of the week depending on the current patch
+load or urgency.
+
+Q: Are patches applied to bpf-next when the merge window is open?
+-----------------------------------------------------------------
+A: For the time when the merge window is open, bpf-next will not be
+processed. This is roughly analogous to net-next patch processing,
+so feel free to read up on the `netdev FAQ`_ about further details.
+
+During those two weeks of merge window, we might ask you to resend
+your patch series once bpf-next is open again. Once Linus released
+a ``v*-rc1`` after the merge window, we continue processing of bpf-next.
+
+For non-subscribers to kernel mailing lists, there is also a status
+page run by David S. Miller on net-next that provides guidance:
+
+  http://vger.kernel.org/~davem/net-next.html
+
+Q: Verifier changes and test cases
+----------------------------------
+Q: I made a BPF verifier change, do I need to add test cases for
+BPF kernel selftests_?
+
+A: If the patch has changes to the behavior of the verifier, then yes,
+it is absolutely necessary to add test cases to the BPF kernel
+selftests_ suite. If they are not present and we think they are
+needed, then we might ask for them before accepting any changes.
+
+In particular, test_verifier.c is tracking a high number of BPF test
+cases, including a lot of corner cases that LLVM BPF back end may
+generate out of the restricted C code. Thus, adding test cases is
+absolutely crucial to make sure future changes do not accidentally
+affect prior use-cases. Thus, treat those test cases as: verifier
+behavior that is not tracked in test_verifier.c could potentially
+be subject to change.
+
+Q: samples/bpf preference vs selftests?
+---------------------------------------
+Q: When should I add code to `samples/bpf/`_ and when to BPF kernel
+selftests_ ?
+
+A: In general, we prefer additions to BPF kernel selftests_ rather than
+`samples/bpf/`_. The rationale is very simple: kernel selftests are
+regularly run by various bots to test for kernel regressions.
+
+The more test cases we add to BPF selftests, the better the coverage
+and the less likely it is that those could accidentally break. It is
+not that BPF kernel selftests cannot demo how a specific feature can
+be used.
+
+That said, `samples/bpf/`_ may be a good place for people to get started,
+so it might be advisable that simple demos of features could go into
+`samples/bpf/`_, but advanced functional and corner-case testing rather
+into kernel selftests.
+
+If your sample looks like a test case, then go for BPF kernel selftests
+instead!
+
+Q: When should I add code to the bpftool?
+-----------------------------------------
+A: The main purpose of bpftool (under tools/bpf/bpftool/) is to provide
+a central user space tool for debugging and introspection of BPF programs
+and maps that are active in the kernel. If UAPI changes related to BPF
+enable for dumping additional information of programs or maps, then
+bpftool should be extended as well to support dumping them.
+
+Q: When should I add code to iproute2's BPF loader?
+---------------------------------------------------
+A: For UAPI changes related to the XDP or tc layer (e.g. ``cls_bpf``),
+the convention is that those control-path related changes are added to
+iproute2's BPF loader as well from user space side. This is not only
+useful to have UAPI changes properly designed to be usable, but also
+to make those changes available to a wider user base of major
+downstream distributions.
+
+Q: Do you accept patches as well for iproute2's BPF loader?
+-----------------------------------------------------------
+A: Patches for the iproute2's BPF loader have to be sent to:
+
+  netdev@vger.kernel.org
+
+While those patches are not processed by the BPF kernel maintainers,
+please keep them in Cc as well, so they can be reviewed.
+
+The official git repository for iproute2 is run by Stephen Hemminger
+and can be found at:
+
+  https://git.kernel.org/pub/scm/linux/kernel/git/shemminger/iproute2.git/
+
+The patches need to have a subject prefix of '``[PATCH iproute2
+master]``' or '``[PATCH iproute2 net-next]``'. '``master``' or
+'``net-next``' describes the target branch where the patch should be
+applied to. Meaning, if kernel changes went into the net-next kernel
+tree, then the related iproute2 changes need to go into the iproute2
+net-next branch, otherwise they can be targeted at master branch. The
+iproute2 net-next branch will get merged into the master branch after
+the current iproute2 version from master has been released.
+
+Like BPF, the patches end up in patchwork under the netdev project and
+are delegated to 'shemminger' for further processing:
+
+  http://patchwork.ozlabs.org/project/netdev/list/?delegate=389
+
+Q: What is the minimum requirement before I submit my BPF patches?
+------------------------------------------------------------------
+A: When submitting patches, always take the time and properly test your
+patches *prior* to submission. Never rush them! If maintainers find
+that your patches have not been properly tested, it is a good way to
+get them grumpy. Testing patch submissions is a hard requirement!
+
+Note, fixes that go to bpf tree *must* have a ``Fixes:`` tag included.
+The same applies to fixes that target bpf-next, where the affected
+commit is in net-next (or in some cases bpf-next). The ``Fixes:`` tag is
+crucial in order to identify follow-up commits and tremendously helps
+for people having to do backporting, so it is a must have!
+
+We also don't accept patches with an empty commit message. Take your
+time and properly write up a high quality commit message, it is
+essential!
+
+Think about it this way: other developers looking at your code a month
+from now need to understand *why* a certain change has been done that
+way, and whether there have been flaws in the analysis or assumptions
+that the original author did. Thus providing a proper rationale and
+describing the use-case for the changes is a must.
+
+Patch submissions with >1 patch must have a cover letter which includes
+a high level description of the series. This high level summary will
+then be placed into the merge commit by the BPF maintainers such that
+it is also accessible from the git log for future reference.
+
+Q: Features changing BPF JIT and/or LLVM
+----------------------------------------
+Q: What do I need to consider when adding a new instruction or feature
+that would require BPF JIT and/or LLVM integration as well?
+
+A: We try hard to keep all BPF JITs up to date such that the same user
+experience can be guaranteed when running BPF programs on different
+architectures without having the program punt to the less efficient
+interpreter in case the in-kernel BPF JIT is enabled.
+
+If you are unable to implement or test the required JIT changes for
+certain architectures, please work together with the related BPF JIT
+developers in order to get the feature implemented in a timely manner.
+Please refer to the git log (``arch/*/net/``) to locate the necessary
+people for helping out.
+
+Also always make sure to add BPF test cases (e.g. test_bpf.c and
+test_verifier.c) for new instructions, so that they can receive
+broad test coverage and help run-time testing the various BPF JITs.
+
+In case of new BPF instructions, once the changes have been accepted
+into the Linux kernel, please implement support into LLVM's BPF back
+end. See LLVM_ section below for further information.
+
+Stable submission
+=================
+
+Q: I need a specific BPF commit in stable kernels. What should I do?
+--------------------------------------------------------------------
+A: In case you need a specific fix in stable kernels, first check whether
+the commit has already been applied in the related ``linux-*.y`` branches:
+
+  https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git/
+
+If not the case, then drop an email to the BPF maintainers with the
+netdev kernel mailing list in Cc and ask for the fix to be queued up:
+
+  netdev@vger.kernel.org
+
+The process in general is the same as on netdev itself, see also the
+`netdev FAQ`_ document.
+
+Q: Do you also backport to kernels not currently maintained as stable?
+----------------------------------------------------------------------
+A: No. If you need a specific BPF commit in kernels that are currently not
+maintained by the stable maintainers, then you are on your own.
+
+The current stable and longterm stable kernels are all listed here:
+
+  https://www.kernel.org/
+
+Q: The BPF patch I am about to submit needs to go to stable as well
+-------------------------------------------------------------------
+What should I do?
+
+A: The same rules apply as with netdev patch submissions in general, see
+`netdev FAQ`_ under:
+
+  `Documentation/networking/netdev-FAQ.txt`_
+
+Never add "``Cc: stable@vger.kernel.org``" to the patch description, but
+ask the BPF maintainers to queue the patches instead. This can be done
+with a note, for example, under the ``---`` part of the patch which does
+not go into the git log. Alternatively, this can be done as a simple
+request by mail instead.
+
+Q: Queue stable patches
+-----------------------
+Q: Where do I find currently queued BPF patches that will be submitted
+to stable?
+
+A: Once patches that fix critical bugs got applied into the bpf tree, they
+are queued up for stable submission under:
+
+  http://patchwork.ozlabs.org/bundle/bpf/stable/?state=*
+
+They will be on hold there at minimum until the related commit made its
+way into the mainline kernel tree.
+
+After having been under broader exposure, the queued patches will be
+submitted by the BPF maintainers to the stable maintainers.
+
+Testing patches
+===============
+
+Q: How to run BPF selftests
+---------------------------
+A: After you have booted into the newly compiled kernel, navigate to
+the BPF selftests_ suite in order to test BPF functionality (current
+working directory points to the root of the cloned git tree)::
+
+  $ cd tools/testing/selftests/bpf/
+  $ make
+
+To run the verifier tests::
+
+  $ sudo ./test_verifier
+
+The verifier tests print out all the current checks being
+performed. The summary at the end of running all tests will dump
+information of test successes and failures::
+
+  Summary: 418 PASSED, 0 FAILED
+
+In order to run through all BPF selftests, the following command is
+needed::
+
+  $ sudo make run_tests
+
+See the kernels selftest `Documentation/dev-tools/kselftest.rst`_
+document for further documentation.
+
+Q: Which BPF kernel selftests version should I run my kernel against?
+---------------------------------------------------------------------
+A: If you run a kernel ``xyz``, then always run the BPF kernel selftests
+from that kernel ``xyz`` as well. Do not expect that the BPF selftest
+from the latest mainline tree will pass all the time.
+
+In particular, test_bpf.c and test_verifier.c have a large number of
+test cases and are constantly updated with new BPF test sequences, or
+existing ones are adapted to verifier changes e.g. due to verifier
+becoming smarter and being able to better track certain things.
+
+LLVM
+====
+
+Q: Where do I find LLVM with BPF support?
+-----------------------------------------
+A: The BPF back end for LLVM is upstream in LLVM since version 3.7.1.
+
+All major distributions these days ship LLVM with BPF back end enabled,
+so for the majority of use-cases it is not required to compile LLVM by
+hand anymore, just install the distribution provided package.
+
+LLVM's static compiler lists the supported targets through
+``llc --version``, make sure BPF targets are listed. Example::
+
+     $ llc --version
+     LLVM (http://llvm.org/):
+       LLVM version 6.0.0svn
+       Optimized build.
+       Default target: x86_64-unknown-linux-gnu
+       Host CPU: skylake
+
+       Registered Targets:
+         bpf    - BPF (host endian)
+         bpfeb  - BPF (big endian)
+         bpfel  - BPF (little endian)
+         x86    - 32-bit X86: Pentium-Pro and above
+         x86-64 - 64-bit X86: EM64T and AMD64
+
+For developers in order to utilize the latest features added to LLVM's
+BPF back end, it is advisable to run the latest LLVM releases. Support
+for new BPF kernel features such as additions to the BPF instruction
+set are often developed together.
+
+All LLVM releases can be found at: http://releases.llvm.org/
+
+Q: Got it, so how do I build LLVM manually anyway?
+--------------------------------------------------
+A: You need cmake and gcc-c++ as build requisites for LLVM. Once you have
+that set up, proceed with building the latest LLVM and clang version
+from the git repositories::
+
+     $ git clone http://llvm.org/git/llvm.git
+     $ cd llvm/tools
+     $ git clone --depth 1 http://llvm.org/git/clang.git
+     $ cd ..; mkdir build; cd build
+     $ cmake .. -DLLVM_TARGETS_TO_BUILD="BPF;X86" \
+                -DBUILD_SHARED_LIBS=OFF           \
+                -DCMAKE_BUILD_TYPE=Release        \
+                -DLLVM_BUILD_RUNTIME=OFF
+     $ make -j $(getconf _NPROCESSORS_ONLN)
+
+The built binaries can then be found in the build/bin/ directory, where
+you can point the PATH variable to.
+
+Q: Reporting LLVM BPF issues
+----------------------------
+Q: Should I notify BPF kernel maintainers about issues in LLVM's BPF code
+generation back end or about LLVM generated code that the verifier
+refuses to accept?
+
+A: Yes, please do!
+
+LLVM's BPF back end is a key piece of the whole BPF
+infrastructure and it ties deeply into verification of programs from the
+kernel side. Therefore, any issues on either side need to be investigated
+and fixed whenever necessary.
+
+Therefore, please make sure to bring them up at netdev kernel mailing
+list and Cc BPF maintainers for LLVM and kernel bits:
+
+* Yonghong Song <yhs@fb.com>
+* Alexei Starovoitov <ast@kernel.org>
+* Daniel Borkmann <daniel@iogearbox.net>
+
+LLVM also has an issue tracker where BPF related bugs can be found:
+
+  https://bugs.llvm.org/buglist.cgi?quicksearch=bpf
+
+However, it is better to reach out through mailing lists with having
+maintainers in Cc.
+
+Q: New BPF instruction for kernel and LLVM
+------------------------------------------
+Q: I have added a new BPF instruction to the kernel, how can I integrate
+it into LLVM?
+
+A: LLVM has a ``-mcpu`` selector for the BPF back end in order to allow
+the selection of BPF instruction set extensions. By default the
+``generic`` processor target is used, which is the base instruction set
+(v1) of BPF.
+
+LLVM has an option to select ``-mcpu=probe`` where it will probe the host
+kernel for supported BPF instruction set extensions and selects the
+optimal set automatically.
+
+For cross-compilation, a specific version can be select manually as well ::
+
+     $ llc -march bpf -mcpu=help
+     Available CPUs for this target:
+
+       generic - Select the generic processor.
+       probe   - Select the probe processor.
+       v1      - Select the v1 processor.
+       v2      - Select the v2 processor.
+     [...]
+
+Newly added BPF instructions to the Linux kernel need to follow the same
+scheme, bump the instruction set version and implement probing for the
+extensions such that ``-mcpu=probe`` users can benefit from the
+optimization transparently when upgrading their kernels.
+
+If you are unable to implement support for the newly added BPF instruction
+please reach out to BPF developers for help.
+
+By the way, the BPF kernel selftests run with ``-mcpu=probe`` for better
+test coverage.
+
+Q: clang flag for target bpf?
+-----------------------------
+Q: In some cases clang flag ``-target bpf`` is used but in other cases the
+default clang target, which matches the underlying architecture, is used.
+What is the difference and when I should use which?
+
+A: Although LLVM IR generation and optimization try to stay architecture
+independent, ``-target <arch>`` still has some impact on generated code:
+
+- BPF program may recursively include header file(s) with file scope
+  inline assembly codes. The default target can handle this well,
+  while ``bpf`` target may fail if bpf backend assembler does not
+  understand these assembly codes, which is true in most cases.
+
+- When compiled without ``-g``, additional elf sections, e.g.,
+  .eh_frame and .rela.eh_frame, may be present in the object file
+  with default target, but not with ``bpf`` target.
+
+- The default target may turn a C switch statement into a switch table
+  lookup and jump operation. Since the switch table is placed
+  in the global readonly section, the bpf program will fail to load.
+  The bpf target does not support switch table optimization.
+  The clang option ``-fno-jump-tables`` can be used to disable
+  switch table generation.
+
+- For clang ``-target bpf``, it is guaranteed that pointer or long /
+  unsigned long types will always have a width of 64 bit, no matter
+  whether underlying clang binary or default target (or kernel) is
+  32 bit. However, when native clang target is used, then it will
+  compile these types based on the underlying architecture's conventions,
+  meaning in case of 32 bit architecture, pointer or long / unsigned
+  long types e.g. in BPF context structure will have width of 32 bit
+  while the BPF LLVM back end still operates in 64 bit. The native
+  target is mostly needed in tracing for the case of walking ``pt_regs``
+  or other kernel structures where CPU's register width matters.
+  Otherwise, ``clang -target bpf`` is generally recommended.
+
+You should use default target when:
+
+- Your program includes a header file, e.g., ptrace.h, which eventually
+  pulls in some header files containing file scope host assembly codes.
+
+- You can add ``-fno-jump-tables`` to work around the switch table issue.
+
+Otherwise, you can use ``bpf`` target. Additionally, you *must* use bpf target
+when:
+
+- Your program uses data structures with pointer or long / unsigned long
+  types that interface with BPF helpers or context data structures. Access
+  into these structures is verified by the BPF verifier and may result
+  in verification failures if the native architecture is not aligned with
+  the BPF architecture, e.g. 64-bit. An example of this is
+  BPF_PROG_TYPE_SK_MSG require ``-target bpf``
+
+
+.. Links
+.. _Documentation/process/: https://www.kernel.org/doc/html/latest/process/
+.. _MAINTAINERS: ../../MAINTAINERS
+.. _Documentation/networking/netdev-FAQ.txt: ../networking/netdev-FAQ.txt
+.. _netdev FAQ: ../networking/netdev-FAQ.txt
+.. _samples/bpf/: ../../samples/bpf/
+.. _selftests: ../../tools/testing/selftests/bpf/
+.. _Documentation/dev-tools/kselftest.rst:
+   https://www.kernel.org/doc/html/latest/dev-tools/kselftest.html
+
+Happy BPF hacking!
diff --git a/Documentation/bpf/bpf_devel_QA.txt b/Documentation/bpf/bpf_devel_QA.txt
deleted file mode 100644 (file)
index 1a0b704..0000000
+++ /dev/null
@@ -1,562 +0,0 @@
-This document provides information for the BPF subsystem about various
-workflows related to reporting bugs, submitting patches, and queueing
-patches for stable kernels.
-
-For general information about submitting patches, please refer to
-Documentation/process/. This document only describes additional specifics
-related to BPF.
-
-Reporting bugs:
----------------
-
-Q: How do I report bugs for BPF kernel code?
-
-A: Since all BPF kernel development as well as bpftool and iproute2 BPF
-   loader development happens through the netdev kernel mailing list,
-   please report any found issues around BPF to the following mailing
-   list:
-
-     netdev@vger.kernel.org
-
-   This may also include issues related to XDP, BPF tracing, etc.
-
-   Given netdev has a high volume of traffic, please also add the BPF
-   maintainers to Cc (from kernel MAINTAINERS file):
-
-     Alexei Starovoitov <ast@kernel.org>
-     Daniel Borkmann <daniel@iogearbox.net>
-
-   In case a buggy commit has already been identified, make sure to keep
-   the actual commit authors in Cc as well for the report. They can
-   typically be identified through the kernel's git tree.
-
-   Please do *not* report BPF issues to bugzilla.kernel.org since it
-   is a guarantee that the reported issue will be overlooked.
-
-Submitting patches:
--------------------
-
-Q: To which mailing list do I need to submit my BPF patches?
-
-A: Please submit your BPF patches to the netdev kernel mailing list:
-
-     netdev@vger.kernel.org
-
-   Historically, BPF came out of networking and has always been maintained
-   by the kernel networking community. Although these days BPF touches
-   many other subsystems as well, the patches are still routed mainly
-   through the networking community.
-
-   In case your patch has changes in various different subsystems (e.g.
-   tracing, security, etc), make sure to Cc the related kernel mailing
-   lists and maintainers from there as well, so they are able to review
-   the changes and provide their Acked-by's to the patches.
-
-Q: Where can I find patches currently under discussion for BPF subsystem?
-
-A: All patches that are Cc'ed to netdev are queued for review under netdev
-   patchwork project:
-
-     http://patchwork.ozlabs.org/project/netdev/list/
-
-   Those patches which target BPF, are assigned to a 'bpf' delegate for
-   further processing from BPF maintainers. The current queue with
-   patches under review can be found at:
-
-     https://patchwork.ozlabs.org/project/netdev/list/?delegate=77147
-
-   Once the patches have been reviewed by the BPF community as a whole
-   and approved by the BPF maintainers, their status in patchwork will be
-   changed to 'Accepted' and the submitter will be notified by mail. This
-   means that the patches look good from a BPF perspective and have been
-   applied to one of the two BPF kernel trees.
-
-   In case feedback from the community requires a respin of the patches,
-   their status in patchwork will be set to 'Changes Requested', and purged
-   from the current review queue. Likewise for cases where patches would
-   get rejected or are not applicable to the BPF trees (but assigned to
-   the 'bpf' delegate).
-
-Q: How do the changes make their way into Linux?
-
-A: There are two BPF kernel trees (git repositories). Once patches have
-   been accepted by the BPF maintainers, they will be applied to one
-   of the two BPF trees:
-
-     https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf.git/
-     https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/
-
-   The bpf tree itself is for fixes only, whereas bpf-next for features,
-   cleanups or other kind of improvements ("next-like" content). This is
-   analogous to net and net-next trees for networking. Both bpf and
-   bpf-next will only have a master branch in order to simplify against
-   which branch patches should get rebased to.
-
-   Accumulated BPF patches in the bpf tree will regularly get pulled
-   into the net kernel tree. Likewise, accumulated BPF patches accepted
-   into the bpf-next tree will make their way into net-next tree. net and
-   net-next are both run by David S. Miller. From there, they will go
-   into the kernel mainline tree run by Linus Torvalds. To read up on the
-   process of net and net-next being merged into the mainline tree, see
-   the netdev FAQ under:
-
-     Documentation/networking/netdev-FAQ.txt
-
-   Occasionally, to prevent merge conflicts, we might send pull requests
-   to other trees (e.g. tracing) with a small subset of the patches, but
-   net and net-next are always the main trees targeted for integration.
-
-   The pull requests will contain a high-level summary of the accumulated
-   patches and can be searched on netdev kernel mailing list through the
-   following subject lines (yyyy-mm-dd is the date of the pull request):
-
-     pull-request: bpf yyyy-mm-dd
-     pull-request: bpf-next yyyy-mm-dd
-
-Q: How do I indicate which tree (bpf vs. bpf-next) my patch should be
-   applied to?
-
-A: The process is the very same as described in the netdev FAQ, so
-   please read up on it. The subject line must indicate whether the
-   patch is a fix or rather "next-like" content in order to let the
-   maintainers know whether it is targeted at bpf or bpf-next.
-
-   For fixes eventually landing in bpf -> net tree, the subject must
-   look like:
-
-     git format-patch --subject-prefix='PATCH bpf' start..finish
-
-   For features/improvements/etc that should eventually land in
-   bpf-next -> net-next, the subject must look like:
-
-     git format-patch --subject-prefix='PATCH bpf-next' start..finish
-
-   If unsure whether the patch or patch series should go into bpf
-   or net directly, or bpf-next or net-next directly, it is not a
-   problem either if the subject line says net or net-next as target.
-   It is eventually up to the maintainers to do the delegation of
-   the patches.
-
-   If it is clear that patches should go into bpf or bpf-next tree,
-   please make sure to rebase the patches against those trees in
-   order to reduce potential conflicts.
-
-   In case the patch or patch series has to be reworked and sent out
-   again in a second or later revision, it is also required to add a
-   version number (v2, v3, ...) into the subject prefix:
-
-     git format-patch --subject-prefix='PATCH net-next v2' start..finish
-
-   When changes have been requested to the patch series, always send the
-   whole patch series again with the feedback incorporated (never send
-   individual diffs on top of the old series).
-
-Q: What does it mean when a patch gets applied to bpf or bpf-next tree?
-
-A: It means that the patch looks good for mainline inclusion from
-   a BPF point of view.
-
-   Be aware that this is not a final verdict that the patch will
-   automatically get accepted into net or net-next trees eventually:
-
-   On the netdev kernel mailing list reviews can come in at any point
-   in time. If discussions around a patch conclude that they cannot
-   get included as-is, we will either apply a follow-up fix or drop
-   them from the trees entirely. Therefore, we also reserve to rebase
-   the trees when deemed necessary. After all, the purpose of the tree
-   is to i) accumulate and stage BPF patches for integration into trees
-   like net and net-next, and ii) run extensive BPF test suite and
-   workloads on the patches before they make their way any further.
-
-   Once the BPF pull request was accepted by David S. Miller, then
-   the patches end up in net or net-next tree, respectively, and
-   make their way from there further into mainline. Again, see the
-   netdev FAQ for additional information e.g. on how often they are
-   merged to mainline.
-
-Q: How long do I need to wait for feedback on my BPF patches?
-
-A: We try to keep the latency low. The usual time to feedback will
-   be around 2 or 3 business days. It may vary depending on the
-   complexity of changes and current patch load.
-
-Q: How often do you send pull requests to major kernel trees like
-   net or net-next?
-
-A: Pull requests will be sent out rather often in order to not
-   accumulate too many patches in bpf or bpf-next.
-
-   As a rule of thumb, expect pull requests for each tree regularly
-   at the end of the week. In some cases pull requests could additionally
-   come also in the middle of the week depending on the current patch
-   load or urgency.
-
-Q: Are patches applied to bpf-next when the merge window is open?
-
-A: For the time when the merge window is open, bpf-next will not be
-   processed. This is roughly analogous to net-next patch processing,
-   so feel free to read up on the netdev FAQ about further details.
-
-   During those two weeks of merge window, we might ask you to resend
-   your patch series once bpf-next is open again. Once Linus released
-   a v*-rc1 after the merge window, we continue processing of bpf-next.
-
-   For non-subscribers to kernel mailing lists, there is also a status
-   page run by David S. Miller on net-next that provides guidance:
-
-     http://vger.kernel.org/~davem/net-next.html
-
-Q: I made a BPF verifier change, do I need to add test cases for
-   BPF kernel selftests?
-
-A: If the patch has changes to the behavior of the verifier, then yes,
-   it is absolutely necessary to add test cases to the BPF kernel
-   selftests suite. If they are not present and we think they are
-   needed, then we might ask for them before accepting any changes.
-
-   In particular, test_verifier.c is tracking a high number of BPF test
-   cases, including a lot of corner cases that LLVM BPF back end may
-   generate out of the restricted C code. Thus, adding test cases is
-   absolutely crucial to make sure future changes do not accidentally
-   affect prior use-cases. Thus, treat those test cases as: verifier
-   behavior that is not tracked in test_verifier.c could potentially
-   be subject to change.
-
-Q: When should I add code to samples/bpf/ and when to BPF kernel
-   selftests?
-
-A: In general, we prefer additions to BPF kernel selftests rather than
-   samples/bpf/. The rationale is very simple: kernel selftests are
-   regularly run by various bots to test for kernel regressions.
-
-   The more test cases we add to BPF selftests, the better the coverage
-   and the less likely it is that those could accidentally break. It is
-   not that BPF kernel selftests cannot demo how a specific feature can
-   be used.
-
-   That said, samples/bpf/ may be a good place for people to get started,
-   so it might be advisable that simple demos of features could go into
-   samples/bpf/, but advanced functional and corner-case testing rather
-   into kernel selftests.
-
-   If your sample looks like a test case, then go for BPF kernel selftests
-   instead!
-
-Q: When should I add code to the bpftool?
-
-A: The main purpose of bpftool (under tools/bpf/bpftool/) is to provide
-   a central user space tool for debugging and introspection of BPF programs
-   and maps that are active in the kernel. If UAPI changes related to BPF
-   enable for dumping additional information of programs or maps, then
-   bpftool should be extended as well to support dumping them.
-
-Q: When should I add code to iproute2's BPF loader?
-
-A: For UAPI changes related to the XDP or tc layer (e.g. cls_bpf), the
-   convention is that those control-path related changes are added to
-   iproute2's BPF loader as well from user space side. This is not only
-   useful to have UAPI changes properly designed to be usable, but also
-   to make those changes available to a wider user base of major
-   downstream distributions.
-
-Q: Do you accept patches as well for iproute2's BPF loader?
-
-A: Patches for the iproute2's BPF loader have to be sent to:
-
-     netdev@vger.kernel.org
-
-   While those patches are not processed by the BPF kernel maintainers,
-   please keep them in Cc as well, so they can be reviewed.
-
-   The official git repository for iproute2 is run by Stephen Hemminger
-   and can be found at:
-
-     https://git.kernel.org/pub/scm/linux/kernel/git/shemminger/iproute2.git/
-
-   The patches need to have a subject prefix of '[PATCH iproute2 master]'
-   or '[PATCH iproute2 net-next]'. 'master' or 'net-next' describes the
-   target branch where the patch should be applied to. Meaning, if kernel
-   changes went into the net-next kernel tree, then the related iproute2
-   changes need to go into the iproute2 net-next branch, otherwise they
-   can be targeted at master branch. The iproute2 net-next branch will get
-   merged into the master branch after the current iproute2 version from
-   master has been released.
-
-   Like BPF, the patches end up in patchwork under the netdev project and
-   are delegated to 'shemminger' for further processing:
-
-     http://patchwork.ozlabs.org/project/netdev/list/?delegate=389
-
-Q: What is the minimum requirement before I submit my BPF patches?
-
-A: When submitting patches, always take the time and properly test your
-   patches *prior* to submission. Never rush them! If maintainers find
-   that your patches have not been properly tested, it is a good way to
-   get them grumpy. Testing patch submissions is a hard requirement!
-
-   Note, fixes that go to bpf tree *must* have a Fixes: tag included. The
-   same applies to fixes that target bpf-next, where the affected commit
-   is in net-next (or in some cases bpf-next). The Fixes: tag is crucial
-   in order to identify follow-up commits and tremendously helps for people
-   having to do backporting, so it is a must have!
-
-   We also don't accept patches with an empty commit message. Take your
-   time and properly write up a high quality commit message, it is
-   essential!
-
-   Think about it this way: other developers looking at your code a month
-   from now need to understand *why* a certain change has been done that
-   way, and whether there have been flaws in the analysis or assumptions
-   that the original author did. Thus providing a proper rationale and
-   describing the use-case for the changes is a must.
-
-   Patch submissions with >1 patch must have a cover letter which includes
-   a high level description of the series. This high level summary will
-   then be placed into the merge commit by the BPF maintainers such that
-   it is also accessible from the git log for future reference.
-
-Q: What do I need to consider when adding a new instruction or feature
-   that would require BPF JIT and/or LLVM integration as well?
-
-A: We try hard to keep all BPF JITs up to date such that the same user
-   experience can be guaranteed when running BPF programs on different
-   architectures without having the program punt to the less efficient
-   interpreter in case the in-kernel BPF JIT is enabled.
-
-   If you are unable to implement or test the required JIT changes for
-   certain architectures, please work together with the related BPF JIT
-   developers in order to get the feature implemented in a timely manner.
-   Please refer to the git log (arch/*/net/) to locate the necessary
-   people for helping out.
-
-   Also always make sure to add BPF test cases (e.g. test_bpf.c and
-   test_verifier.c) for new instructions, so that they can receive
-   broad test coverage and help run-time testing the various BPF JITs.
-
-   In case of new BPF instructions, once the changes have been accepted
-   into the Linux kernel, please implement support into LLVM's BPF back
-   end. See LLVM section below for further information.
-
-Stable submission:
-------------------
-
-Q: I need a specific BPF commit in stable kernels. What should I do?
-
-A: In case you need a specific fix in stable kernels, first check whether
-   the commit has already been applied in the related linux-*.y branches:
-
-     https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git/
-
-   If not the case, then drop an email to the BPF maintainers with the
-   netdev kernel mailing list in Cc and ask for the fix to be queued up:
-
-     netdev@vger.kernel.org
-
-   The process in general is the same as on netdev itself, see also the
-   netdev FAQ document.
-
-Q: Do you also backport to kernels not currently maintained as stable?
-
-A: No. If you need a specific BPF commit in kernels that are currently not
-   maintained by the stable maintainers, then you are on your own.
-
-   The current stable and longterm stable kernels are all listed here:
-
-     https://www.kernel.org/
-
-Q: The BPF patch I am about to submit needs to go to stable as well. What
-   should I do?
-
-A: The same rules apply as with netdev patch submissions in general, see
-   netdev FAQ under:
-
-     Documentation/networking/netdev-FAQ.txt
-
-   Never add "Cc: stable@vger.kernel.org" to the patch description, but
-   ask the BPF maintainers to queue the patches instead. This can be done
-   with a note, for example, under the "---" part of the patch which does
-   not go into the git log. Alternatively, this can be done as a simple
-   request by mail instead.
-
-Q: Where do I find currently queued BPF patches that will be submitted
-   to stable?
-
-A: Once patches that fix critical bugs got applied into the bpf tree, they
-   are queued up for stable submission under:
-
-     http://patchwork.ozlabs.org/bundle/bpf/stable/?state=*
-
-   They will be on hold there at minimum until the related commit made its
-   way into the mainline kernel tree.
-
-   After having been under broader exposure, the queued patches will be
-   submitted by the BPF maintainers to the stable maintainers.
-
-Testing patches:
-----------------
-
-Q: Which BPF kernel selftests version should I run my kernel against?
-
-A: If you run a kernel xyz, then always run the BPF kernel selftests from
-   that kernel xyz as well. Do not expect that the BPF selftest from the
-   latest mainline tree will pass all the time.
-
-   In particular, test_bpf.c and test_verifier.c have a large number of
-   test cases and are constantly updated with new BPF test sequences, or
-   existing ones are adapted to verifier changes e.g. due to verifier
-   becoming smarter and being able to better track certain things.
-
-LLVM:
------
-
-Q: Where do I find LLVM with BPF support?
-
-A: The BPF back end for LLVM is upstream in LLVM since version 3.7.1.
-
-   All major distributions these days ship LLVM with BPF back end enabled,
-   so for the majority of use-cases it is not required to compile LLVM by
-   hand anymore, just install the distribution provided package.
-
-   LLVM's static compiler lists the supported targets through 'llc --version',
-   make sure BPF targets are listed. Example:
-
-     $ llc --version
-     LLVM (http://llvm.org/):
-       LLVM version 6.0.0svn
-       Optimized build.
-       Default target: x86_64-unknown-linux-gnu
-       Host CPU: skylake
-
-       Registered Targets:
-         bpf    - BPF (host endian)
-         bpfeb  - BPF (big endian)
-         bpfel  - BPF (little endian)
-         x86    - 32-bit X86: Pentium-Pro and above
-         x86-64 - 64-bit X86: EM64T and AMD64
-
-   For developers in order to utilize the latest features added to LLVM's
-   BPF back end, it is advisable to run the latest LLVM releases. Support
-   for new BPF kernel features such as additions to the BPF instruction
-   set are often developed together.
-
-   All LLVM releases can be found at: http://releases.llvm.org/
-
-Q: Got it, so how do I build LLVM manually anyway?
-
-A: You need cmake and gcc-c++ as build requisites for LLVM. Once you have
-   that set up, proceed with building the latest LLVM and clang version
-   from the git repositories:
-
-     $ git clone http://llvm.org/git/llvm.git
-     $ cd llvm/tools
-     $ git clone --depth 1 http://llvm.org/git/clang.git
-     $ cd ..; mkdir build; cd build
-     $ cmake .. -DLLVM_TARGETS_TO_BUILD="BPF;X86" \
-                -DBUILD_SHARED_LIBS=OFF           \
-                -DCMAKE_BUILD_TYPE=Release        \
-                -DLLVM_BUILD_RUNTIME=OFF
-     $ make -j $(getconf _NPROCESSORS_ONLN)
-
-   The built binaries can then be found in the build/bin/ directory, where
-   you can point the PATH variable to.
-
-Q: Should I notify BPF kernel maintainers about issues in LLVM's BPF code
-   generation back end or about LLVM generated code that the verifier
-   refuses to accept?
-
-A: Yes, please do! LLVM's BPF back end is a key piece of the whole BPF
-   infrastructure and it ties deeply into verification of programs from the
-   kernel side. Therefore, any issues on either side need to be investigated
-   and fixed whenever necessary.
-
-   Therefore, please make sure to bring them up at netdev kernel mailing
-   list and Cc BPF maintainers for LLVM and kernel bits:
-
-     Yonghong Song <yhs@fb.com>
-     Alexei Starovoitov <ast@kernel.org>
-     Daniel Borkmann <daniel@iogearbox.net>
-
-   LLVM also has an issue tracker where BPF related bugs can be found:
-
-     https://bugs.llvm.org/buglist.cgi?quicksearch=bpf
-
-   However, it is better to reach out through mailing lists with having
-   maintainers in Cc.
-
-Q: I have added a new BPF instruction to the kernel, how can I integrate
-   it into LLVM?
-
-A: LLVM has a -mcpu selector for the BPF back end in order to allow the
-   selection of BPF instruction set extensions. By default the 'generic'
-   processor target is used, which is the base instruction set (v1) of BPF.
-
-   LLVM has an option to select -mcpu=probe where it will probe the host
-   kernel for supported BPF instruction set extensions and selects the
-   optimal set automatically.
-
-   For cross-compilation, a specific version can be select manually as well.
-
-     $ llc -march bpf -mcpu=help
-     Available CPUs for this target:
-
-       generic - Select the generic processor.
-       probe   - Select the probe processor.
-       v1      - Select the v1 processor.
-       v2      - Select the v2 processor.
-     [...]
-
-   Newly added BPF instructions to the Linux kernel need to follow the same
-   scheme, bump the instruction set version and implement probing for the
-   extensions such that -mcpu=probe users can benefit from the optimization
-   transparently when upgrading their kernels.
-
-   If you are unable to implement support for the newly added BPF instruction
-   please reach out to BPF developers for help.
-
-   By the way, the BPF kernel selftests run with -mcpu=probe for better
-   test coverage.
-
-Q: In some cases clang flag "-target bpf" is used but in other cases the
-   default clang target, which matches the underlying architecture, is used.
-   What is the difference and when I should use which?
-
-A: Although LLVM IR generation and optimization try to stay architecture
-   independent, "-target <arch>" still has some impact on generated code:
-
-     - BPF program may recursively include header file(s) with file scope
-       inline assembly codes. The default target can handle this well,
-       while bpf target may fail if bpf backend assembler does not
-       understand these assembly codes, which is true in most cases.
-
-     - When compiled without -g, additional elf sections, e.g.,
-       .eh_frame and .rela.eh_frame, may be present in the object file
-       with default target, but not with bpf target.
-
-     - The default target may turn a C switch statement into a switch table
-       lookup and jump operation. Since the switch table is placed
-       in the global readonly section, the bpf program will fail to load.
-       The bpf target does not support switch table optimization.
-       The clang option "-fno-jump-tables" can be used to disable
-       switch table generation.
-
-     - For clang -target bpf, it is guaranteed that pointer or long /
-       unsigned long types will always have a width of 64 bit, no matter
-       whether underlying clang binary or default target (or kernel) is
-       32 bit. However, when native clang target is used, then it will
-       compile these types based on the underlying architecture's conventions,
-       meaning in case of 32 bit architecture, pointer or long / unsigned
-       long types e.g. in BPF context structure will have width of 32 bit
-       while the BPF LLVM back end still operates in 64 bit. The native
-       target is mostly needed in tracing for the case of walking pt_regs
-       or other kernel structures where CPU's register width matters.
-       Otherwise, clang -target bpf is generally recommended.
-
-   You should use default target when:
-
-     - Your program includes a header file, e.g., ptrace.h, which eventually
-       pulls in some header files containing file scope host assembly codes.
-     - You can add "-fno-jump-tables" to work around the switch table issue.
-
-   Otherwise, you can use bpf target.
-
-Happy BPF hacking!
index 4bcd4b7f79f9437077b5c0b5dc0e9b8e119fb9ac..3d01948ea0611f56e4cf603dd2df14548b42471c 100644 (file)
@@ -264,7 +264,10 @@ i) Constructor
                           data device, but just remove the mapping.
 
       read_only: Don't allow any changes to be made to the pool
-                metadata.
+                metadata.  This mode is only available after the
+                thin-pool has been created and first used in full
+                read/write mode.  It cannot be specified on initial
+                thin-pool creation.
 
       error_if_no_space: Error IOs, instead of queueing, if no space.
 
index f4006d3c9fdf444df5a6b730a5f9542968c95c85..c760ecb8138136085eead51ab07c38074d989c4a 100644 (file)
@@ -30,7 +30,6 @@ compatible:
 Optional properties:
 - dma-coherent      : Present if dma operations are coherent
 - clocks            : a list of phandle + clock specifier pairs
-- resets            : a list of phandle + reset specifier pairs
 - target-supply     : regulator for SATA target power
 - phys              : reference to the SATA PHY node
 - phy-names         : must be "sata-phy"
index 557fa765adcb9450c4003555d1211978e73a9703..5d2519af4bb5ca5e33f59b3b5a42e8c9242345a2 100644 (file)
@@ -38,7 +38,7 @@ Display Timings
   require specific display timings. The panel-timing subnode expresses those
   timings as specified in the timing subnode section of the display timing
   bindings defined in
-  Documentation/devicetree/bindings/display/display-timing.txt.
+  Documentation/devicetree/bindings/display/panel/display-timing.txt.
 
 
 Connectivity
index aadfb236d53abdd12fc69cf36cd1940303441deb..61315eaa76606d777a05d61a3caa3dbf845bd81a 100644 (file)
@@ -26,6 +26,7 @@ Required Properties:
                - "renesas,dmac-r8a7794" (R-Car E2)
                - "renesas,dmac-r8a7795" (R-Car H3)
                - "renesas,dmac-r8a7796" (R-Car M3-W)
+               - "renesas,dmac-r8a77965" (R-Car M3-N)
                - "renesas,dmac-r8a77970" (R-Car V3M)
                - "renesas,dmac-r8a77980" (R-Car V3H)
 
index 23e3abc3fdef0c084ed61a78d15c49597f075c72..c88919480d373ea373be6cdff30e9206224603db 100644 (file)
@@ -4,6 +4,13 @@ Required properties:
 - compatible:
     atmel,maxtouch
 
+    The following compatibles have been used in various products but are
+    deprecated:
+       atmel,qt602240_ts
+       atmel,atmel_mxt_ts
+       atmel,atmel_mxt_tp
+       atmel,mXT224
+
 - reg: The I2C address of the device
 
 - interrupts: The sink for the touchpad's IRQ output
index 93c3a6ae32f995e92bbfedb4ba710004c1c0a0b0..ac71daa4619505030ac1373e678ca13241727f82 100644 (file)
@@ -5,7 +5,9 @@ Required properties:
 - compatible: Must contain one or more of the following:
   - "renesas,rcar-gen3-canfd" for R-Car Gen3 compatible controller.
   - "renesas,r8a7795-canfd" for R8A7795 (R-Car H3) compatible controller.
-  - "renesas,r8a7796-canfd" for R8A7796 (R-Car M3) compatible controller.
+  - "renesas,r8a7796-canfd" for R8A7796 (R-Car M3-W) compatible controller.
+  - "renesas,r8a77970-canfd" for R8A77970 (R-Car V3M) compatible controller.
+  - "renesas,r8a77980-canfd" for R8A77980 (R-Car V3H) compatible controller.
 
   When compatible with the generic version, nodes must list the
   SoC-specific version corresponding to the platform first, followed by the
index cfe8f64eca4fbea222f53cfd78b62d7df4e10020..3ceeb8de11963572cc1bd8ce324433e0dbf6bd03 100644 (file)
@@ -82,8 +82,6 @@ linked into one DSA cluster.
 
        switch0: switch0@0 {
                compatible = "marvell,mv88e6085";
-               #address-cells = <1>;
-               #size-cells = <0>;
                reg = <0>;
 
                dsa,member = <0 0>;
@@ -135,8 +133,6 @@ linked into one DSA cluster.
 
        switch1: switch1@0 {
                compatible = "marvell,mv88e6085";
-               #address-cells = <1>;
-               #size-cells = <0>;
                reg = <0>;
 
                dsa,member = <0 1>;
@@ -204,8 +200,6 @@ linked into one DSA cluster.
 
        switch2: switch2@0 {
                compatible = "marvell,mv88e6085";
-               #address-cells = <1>;
-               #size-cells = <0>;
                reg = <0>;
 
                dsa,member = <0 2>;
index 3d6d5fa0c4d5fc670d8bec94e53a0f07a0a2e013..cfe724398a12b68204abe29e5989547a7066b01a 100644 (file)
@@ -7,6 +7,7 @@ Required properties:
 - compatible: must be one of the following string:
                "allwinner,sun8i-a83t-emac"
                "allwinner,sun8i-h3-emac"
+               "allwinner,sun8i-r40-gmac"
                "allwinner,sun8i-v3s-emac"
                "allwinner,sun50i-a64-emac"
 - reg: address and length of the register for the device.
@@ -20,18 +21,18 @@ Required properties:
 - phy-handle: See ethernet.txt
 - #address-cells: shall be 1
 - #size-cells: shall be 0
-- syscon: A phandle to the syscon of the SoC with one of the following
- compatible string:
-  - allwinner,sun8i-h3-system-controller
-  - allwinner,sun8i-v3s-system-controller
-  - allwinner,sun50i-a64-system-controller
-  - allwinner,sun8i-a83t-system-controller
+- syscon: A phandle to the device containing the EMAC or GMAC clock register
 
 Optional properties:
-- allwinner,tx-delay-ps: TX clock delay chain value in ps. Range value is 0-700. Default is 0)
-- allwinner,rx-delay-ps: RX clock delay chain value in ps. Range value is 0-3100. Default is 0)
-Both delay properties need to be a multiple of 100. They control the delay for
-external PHY.
+- allwinner,tx-delay-ps: TX clock delay chain value in ps.
+                        Range is 0-700. Default is 0.
+                        Unavailable for allwinner,sun8i-r40-gmac
+- allwinner,rx-delay-ps: RX clock delay chain value in ps.
+                        Range is 0-3100. Default is 0.
+                        Range is 0-700 for allwinner,sun8i-r40-gmac
+Both delay properties need to be a multiple of 100. They control the
+clock delay for external RGMII PHY. They do not apply to the internal
+PHY or external non-RGMII PHYs.
 
 Optional properties for the following compatibles:
   - "allwinner,sun8i-h3-emac",
index 1814fa13f6ab8078a52a32fa2a55156124d696bb..fc019df0d8638df3c08e51bc8827987623c0c244 100644 (file)
@@ -21,9 +21,10 @@ Required properties:
        - main controller clock (for both armada-375-pp2 and armada-7k-pp2)
        - GOP clock (for both armada-375-pp2 and armada-7k-pp2)
        - MG clock (only for armada-7k-pp2)
+       - MG Core clock (only for armada-7k-pp2)
        - AXI clock (only for armada-7k-pp2)
-- clock-names: names of used clocks, must be "pp_clk", "gop_clk", "mg_clk"
-  and "axi_clk" (the 2 latter only for armada-7k-pp2).
+- clock-names: names of used clocks, must be "pp_clk", "gop_clk", "mg_clk",
+  "mg_core_clk" and "axi_clk" (the 3 latter only for armada-7k-pp2).
 
 The ethernet ports are represented by subnodes. At least one port is
 required.
@@ -80,8 +81,8 @@ cpm_ethernet: ethernet@0 {
        compatible = "marvell,armada-7k-pp22";
        reg = <0x0 0x100000>, <0x129000 0xb000>;
        clocks = <&cpm_syscon0 1 3>, <&cpm_syscon0 1 9>,
-                <&cpm_syscon0 1 5>, <&cpm_syscon0 1 18>;
-       clock-names = "pp_clk", "gop_clk", "gp_clk", "axi_clk";
+                <&cpm_syscon0 1 5>, <&cpm_syscon0 1 6>, <&cpm_syscon0 1 18>;
+       clock-names = "pp_clk", "gop_clk", "mg_clk", "mg_core_clk", "axi_clk";
 
        eth0: eth0 {
                interrupts = <ICU_GRP_NSR 39 IRQ_TYPE_LEVEL_HIGH>,
index 61cada22ae6c73304413ede617bee86b800e41a0..1321bb194ed99e455b9a1105fbf8dade63767b45 100644 (file)
@@ -11,6 +11,7 @@ Required properties on all platforms:
                        - "amlogic,meson8b-dwmac"
                        - "amlogic,meson8m2-dwmac"
                        - "amlogic,meson-gxbb-dwmac"
+                       - "amlogic,meson-axg-dwmac"
                Additionally "snps,dwmac" and any applicable more
                detailed version number described in net/stmmac.txt
                should be used.
index 42a248301615d9e69915a424104f1fc932605175..e22d8cfea687435550f72e56385da9b07a0b739e 100644 (file)
@@ -57,6 +57,13 @@ KSZ9031:
       - txd2-skew-ps : Skew control of TX data 2 pad
       - txd3-skew-ps : Skew control of TX data 3 pad
 
+    - micrel,force-master:
+        Boolean, force phy to master mode. Only set this option if the phy
+        reference clock provided at CLK125_NDO pin is used as MAC reference
+        clock because the clock jitter in slave mode is to high (errata#2).
+        Attention: The link partner must be configurable as slave otherwise
+        no link will be established.
+
 Examples:
 
        mdio {
diff --git a/Documentation/devicetree/bindings/net/mscc-miim.txt b/Documentation/devicetree/bindings/net/mscc-miim.txt
new file mode 100644 (file)
index 0000000..7104679
--- /dev/null
@@ -0,0 +1,26 @@
+Microsemi MII Management Controller (MIIM) / MDIO
+=================================================
+
+Properties:
+- compatible: must be "mscc,ocelot-miim"
+- reg: The base address of the MDIO bus controller register bank. Optionally, a
+  second register bank can be defined if there is an associated reset register
+  for internal PHYs
+- #address-cells: Must be <1>.
+- #size-cells: Must be <0>.  MDIO addresses have no size component.
+- interrupts: interrupt specifier (refer to the interrupt binding)
+
+Typically an MDIO bus might have several children.
+
+Example:
+       mdio@107009c {
+               #address-cells = <1>;
+               #size-cells = <0>;
+               compatible = "mscc,ocelot-miim";
+               reg = <0x107009c 0x36>, <0x10700f0 0x8>;
+               interrupts = <14>;
+
+               phy0: ethernet-phy@0 {
+                       reg = <0>;
+               };
+       };
diff --git a/Documentation/devicetree/bindings/net/mscc-ocelot.txt b/Documentation/devicetree/bindings/net/mscc-ocelot.txt
new file mode 100644 (file)
index 0000000..0a84711
--- /dev/null
@@ -0,0 +1,82 @@
+Microsemi Ocelot network Switch
+===============================
+
+The Microsemi Ocelot network switch can be found on Microsemi SoCs (VSC7513,
+VSC7514)
+
+Required properties:
+- compatible: Should be "mscc,vsc7514-switch"
+- reg: Must contain an (offset, length) pair of the register set for each
+  entry in reg-names.
+- reg-names: Must include the following entries:
+  - "sys"
+  - "rew"
+  - "qs"
+  - "hsio"
+  - "qsys"
+  - "ana"
+  - "portX" with X from 0 to the number of last port index available on that
+    switch
+- interrupts: Should contain the switch interrupts for frame extraction and
+  frame injection
+- interrupt-names: should contain the interrupt names: "xtr", "inj"
+- ethernet-ports: A container for child nodes representing switch ports.
+
+The ethernet-ports container has the following properties
+
+Required properties:
+
+- #address-cells: Must be 1
+- #size-cells: Must be 0
+
+Each port node must have the following mandatory properties:
+- reg: Describes the port address in the switch
+
+Port nodes may also contain the following optional standardised
+properties, described in binding documents:
+
+- phy-handle: Phandle to a PHY on an MDIO bus. See
+  Documentation/devicetree/bindings/net/ethernet.txt for details.
+
+Example:
+
+       switch@1010000 {
+               compatible = "mscc,vsc7514-switch";
+               reg = <0x1010000 0x10000>,
+                     <0x1030000 0x10000>,
+                     <0x1080000 0x100>,
+                     <0x10d0000 0x10000>,
+                     <0x11e0000 0x100>,
+                     <0x11f0000 0x100>,
+                     <0x1200000 0x100>,
+                     <0x1210000 0x100>,
+                     <0x1220000 0x100>,
+                     <0x1230000 0x100>,
+                     <0x1240000 0x100>,
+                     <0x1250000 0x100>,
+                     <0x1260000 0x100>,
+                     <0x1270000 0x100>,
+                     <0x1280000 0x100>,
+                     <0x1800000 0x80000>,
+                     <0x1880000 0x10000>;
+               reg-names = "sys", "rew", "qs", "hsio", "port0",
+                           "port1", "port2", "port3", "port4", "port5",
+                           "port6", "port7", "port8", "port9", "port10",
+                           "qsys", "ana";
+               interrupts = <21 22>;
+               interrupt-names = "xtr", "inj";
+
+               ethernet-ports {
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+
+                       port0: port@0 {
+                               reg = <0>;
+                               phy-handle = <&phy0>;
+                       };
+                       port1: port@1 {
+                               reg = <1>;
+                               phy-handle = <&phy1>;
+                       };
+               };
+       };
diff --git a/Documentation/devicetree/bindings/net/qualcomm-bluetooth.txt b/Documentation/devicetree/bindings/net/qualcomm-bluetooth.txt
new file mode 100644 (file)
index 0000000..0ea18a5
--- /dev/null
@@ -0,0 +1,30 @@
+Qualcomm Bluetooth Chips
+---------------------
+
+This documents the binding structure and common properties for serial
+attached Qualcomm devices.
+
+Serial attached Qualcomm devices shall be a child node of the host UART
+device the slave device is attached to.
+
+Required properties:
+ - compatible: should contain one of the following:
+   * "qcom,qca6174-bt"
+
+Optional properties:
+ - enable-gpios: gpio specifier used to enable chip
+ - clocks: clock provided to the controller (SUSCLK_32KHZ)
+
+Example:
+
+serial@7570000 {
+       label = "BT-UART";
+       status = "okay";
+
+       bluetooth {
+               compatible = "qcom,qca6174-bt";
+
+               enable-gpios = <&pm8994_gpios 19 GPIO_ACTIVE_HIGH>;
+               clocks = <&divclk4>;
+       };
+};
index c306f55d335b15e322a05aaed4cc09b7ae522fb4..890526dbfc26478f623a8e0b7f78d966509171e7 100644 (file)
@@ -18,6 +18,7 @@ Required properties:
 
       - "renesas,etheravb-r8a7795" for the R8A7795 SoC.
       - "renesas,etheravb-r8a7796" for the R8A7796 SoC.
+      - "renesas,etheravb-r8a77965" for the R8A77965 SoC.
       - "renesas,etheravb-r8a77970" for the R8A77970 SoC.
       - "renesas,etheravb-r8a77980" for the R8A77980 SoC.
       - "renesas,etheravb-r8a77995" for the R8A77995 SoC.
index 5172799a7f1a478d4eb0e834a8672806d87509e7..82a4cf2c145dd6552b71630ac8960f6248eb3bdb 100644 (file)
@@ -14,6 +14,7 @@ Required properties:
              "renesas,ether-r8a7791"  if the device is a part of R8A7791 SoC.
              "renesas,ether-r8a7793"  if the device is a part of R8A7793 SoC.
              "renesas,ether-r8a7794"  if the device is a part of R8A7794 SoC.
+             "renesas,gether-r8a77980" if the device is a part of R8A77980 SoC.
              "renesas,ether-r7s72100" if the device is a part of R7S72100 SoC.
              "renesas,rcar-gen1-ether" for a generic R-Car Gen1 device.
              "renesas,rcar-gen2-ether" for a generic R-Car Gen2 or RZ/G1
index ed5eb547afc81da53cb951506226341bbdfcf6ec..64bc5c2a76da613609b3c4db8f3bc00bf9a38107 100644 (file)
@@ -56,9 +56,9 @@ pins it needs, and how they should be configured, with regard to muxer
 configuration, drive strength and pullups. If one of these options is
 not set, its actual value will be unspecified.
 
-This driver supports the generic pin multiplexing and configuration
-bindings. For details on each properties, you can refer to
-./pinctrl-bindings.txt.
+Allwinner A1X Pin Controller supports the generic pin multiplexing and
+configuration bindings. For details on each properties, you can refer to
+ ./pinctrl-bindings.txt.
 
 Required sub-node properties:
   - pins
index 8ff65fa632fdedd739b005027e198051f62c8c34..c06c045126fc9070ca8f4faefc18b17a8de85ee6 100644 (file)
@@ -21,7 +21,7 @@ Required properties:
 - interrupts : identifier to the device interrupt
 - clocks : a list of phandle + clock-specifier pairs, one for each
           entry in clock names.
-- clocks-names :
+- clock-names :
    * "xtal" for external xtal clock identifier
    * "pclk" for the bus core clock, either the clk81 clock or the gate clock
    * "baud" for the source of the baudrate generator, can be either the xtal
index 2ae2fee7e023c01375f7fa09b9b21f7f812293c1..b7e0e32b9ac62c1029ba93998b8257ce71478de4 100644 (file)
@@ -24,7 +24,7 @@ Required properties:
     - Must contain two elements for the extended variant of the IP
       (marvell,armada-3700-uart-ext): "uart-tx" and "uart-rx",
       respectively the UART TX interrupt and the UART RX interrupt. A
-      corresponding interrupts-names property must be defined.
+      corresponding interrupt-names property must be defined.
     - For backward compatibility reasons, a single element interrupts
       property is also supported for the standard variant of the IP,
       containing only the UART sum interrupt. This form is deprecated
index ad962f4ec3aaffe73991118aabd78b9da3d5cb87..106808b55b6da64d97d8d727673b14445b45bbb5 100644 (file)
@@ -17,6 +17,8 @@ Required properties:
     - "renesas,scifa-r8a7745" for R8A7745 (RZ/G1E) SCIFA compatible UART.
     - "renesas,scifb-r8a7745" for R8A7745 (RZ/G1E) SCIFB compatible UART.
     - "renesas,hscif-r8a7745" for R8A7745 (RZ/G1E) HSCIF compatible UART.
+    - "renesas,scif-r8a77470" for R8A77470 (RZ/G1C) SCIF compatible UART.
+    - "renesas,hscif-r8a77470" for R8A77470 (RZ/G1C) HSCIF compatible UART.
     - "renesas,scif-r8a7778" for R8A7778 (R-Car M1) SCIF compatible UART.
     - "renesas,scif-r8a7779" for R8A7779 (R-Car H1) SCIF compatible UART.
     - "renesas,scif-r8a7790" for R8A7790 (R-Car H2) SCIF compatible UART.
@@ -41,6 +43,8 @@ Required properties:
     - "renesas,hscif-r8a7795" for R8A7795 (R-Car H3) HSCIF compatible UART.
     - "renesas,scif-r8a7796" for R8A7796 (R-Car M3-W) SCIF compatible UART.
     - "renesas,hscif-r8a7796" for R8A7796 (R-Car M3-W) HSCIF compatible UART.
+    - "renesas,scif-r8a77965" for R8A77965 (R-Car M3-N) SCIF compatible UART.
+    - "renesas,hscif-r8a77965" for R8A77965 (R-Car M3-N) HSCIF compatible UART.
     - "renesas,scif-r8a77970" for R8A77970 (R-Car V3M) SCIF compatible UART.
     - "renesas,hscif-r8a77970" for R8A77970 (R-Car V3M) HSCIF compatible UART.
     - "renesas,scif-r8a77980" for R8A77980 (R-Car V3H) SCIF compatible UART.
index c4c00dff4b569f94821a184784895a7aed1d315b..bd1dd316fb231f84d02a6125d566e881600df387 100644 (file)
@@ -28,7 +28,10 @@ Required properties:
   - interrupts: one XHCI interrupt should be described here.
 
 Optional properties:
-  - clocks: reference to a clock
+  - clocks: reference to the clocks
+  - clock-names: mandatory if there is a second clock, in this case
+    the name must be "core" for the first clock and "reg" for the
+    second one
   - usb2-lpm-disable: indicate if we don't want to enable USB2 HW LPM
   - usb3-lpm-capable: determines if platform is USB3 LPM capable
   - quirk-broken-port-ped: set if the controller has broken port disable mechanism
index b5f978a4cac67471a7398d24a388fd04b88702ff..a38d8bfae19c3a7897032d1b80c83622ba92da5d 100644 (file)
@@ -182,6 +182,7 @@ karo        Ka-Ro electronics GmbH
 keithkoep      Keith & Koep GmbH
 keymile        Keymile GmbH
 khadas Khadas
+kiebackpeter    Kieback & Peter GmbH
 kinetic Kinetic Technologies
 kingnovel      Kingnovel Technology Co., Ltd.
 kosagi Sutajio Ko-Usagi PTE Ltd.
index a4feb6dde8cd08e83b7b9672c90f863311dc9d75..725fb8d255c16a8ae722a4c4d47d1492900d724a 100644 (file)
@@ -98,6 +98,14 @@ Finally, if you need to remove all overlays in one-go, just call
 of_overlay_remove_all() which will remove every single one in the correct
 order.
 
+In addition, there is the option to register notifiers that get called on
+overlay operations. See of_overlay_notifier_register/unregister and
+enum of_overlay_notify_action for details.
+
+Note that a notifier callback is not supposed to store pointers to a device
+tree node or its content beyond OF_OVERLAY_POST_REMOVE corresponding to the
+respective node it received.
+
 Overlay DTS Format
 ------------------
 
index 96a0423d5dba9d486647dcc4e453a9ac79606e64..82a3e43b68644ab863ee8f459f26a18956808b03 100644 (file)
@@ -177,14 +177,14 @@ BUGS
 ****
 
 
-Report bugs to Mauro Carvalho Chehab <mchehab@s-opensource.com>
+Report bugs to Mauro Carvalho Chehab <mchehab@kernel.org>
 
 
 COPYRIGHT
 *********
 
 
-Copyright (c) 2016 by Mauro Carvalho Chehab <mchehab@s-opensource.com>.
+Copyright (c) 2016 by Mauro Carvalho Chehab <mchehab+samsung@kernel.org>.
 
 License GPLv2: GNU GPL version 2 <http://gnu.org/licenses/gpl.html>.
 
index cf4516dfbf964b0ada60136fc49579df6c113823..d5ec95a7195bf001f6fa855dc6d7fa922ba40ed7 100644 (file)
@@ -17,17 +17,17 @@ an error is returned.
 
 request_firmware
 ----------------
-.. kernel-doc:: drivers/base/firmware_class.c
+.. kernel-doc:: drivers/base/firmware_loader/main.c
    :functions: request_firmware
 
 request_firmware_direct
 -----------------------
-.. kernel-doc:: drivers/base/firmware_class.c
+.. kernel-doc:: drivers/base/firmware_loader/main.c
    :functions: request_firmware_direct
 
 request_firmware_into_buf
 -------------------------
-.. kernel-doc:: drivers/base/firmware_class.c
+.. kernel-doc:: drivers/base/firmware_loader/main.c
    :functions: request_firmware_into_buf
 
 Asynchronous firmware requests
@@ -41,7 +41,7 @@ in atomic contexts.
 
 request_firmware_nowait
 -----------------------
-.. kernel-doc:: drivers/base/firmware_class.c
+.. kernel-doc:: drivers/base/firmware_loader/main.c
    :functions: request_firmware_nowait
 
 Special optimizations on reboot
@@ -50,12 +50,12 @@ Special optimizations on reboot
 Some devices have an optimization in place to enable the firmware to be
 retained during system reboot. When such optimizations are used the driver
 author must ensure the firmware is still available on resume from suspend,
-this can be done with firmware_request_cache() insted of requesting for the
-firmare to be loaded.
+this can be done with firmware_request_cache() instead of requesting for the
+firmware to be loaded.
 
 firmware_request_cache()
------------------------
-.. kernel-doc:: drivers/base/firmware_class.c
+------------------------
+.. kernel-doc:: drivers/base/firmware_loader/main.c
    :functions: firmware_request_cache
 
 request firmware API expected driver use
index 6d9ff316b608db48b46de6413d9503e23b51536a..bee1b9a1702f1cc6c89811aff6b8bdbc1eefb0b0 100644 (file)
@@ -28,7 +28,7 @@ Device Drivers Base
 .. kernel-doc:: drivers/base/node.c
    :internal:
 
-.. kernel-doc:: drivers/base/firmware_class.c
+.. kernel-doc:: drivers/base/firmware_loader/main.c
    :export:
 
 .. kernel-doc:: drivers/base/transport_class.c
index feb31946490b01c13106e2f9f43d1d28fd918ea0..48ff58095f115ae5c1d0c25e6987d187937dd365 100644 (file)
@@ -210,7 +210,7 @@ If the connector is dual-role capable, there may also be a switch for the data
 role. USB Type-C Connector Class does not supply separate API for them. The
 port drivers can use USB Role Class API with those.
 
-Illustration of the muxes behind a connector that supports an alternate mode:
+Illustration of the muxes behind a connector that supports an alternate mode::
 
                      ------------------------
                      |       Connector      |
index 5efae00f6c7fd82f863939ac2e6f2883a52fee09..d2963123eb1ccca8a299486f1eb998f05acf5041 100644 (file)
@@ -5,6 +5,7 @@ Written 1996 by Gero Kuhlmann <gero@gkminix.han.de>
 Updated 1997 by Martin Mares <mj@atrey.karlin.mff.cuni.cz>
 Updated 2006 by Nico Schottelius <nico-kernel-nfsroot@schottelius.org>
 Updated 2006 by Horms <horms@verge.net.au>
+Updated 2018 by Chris Novakovic <chris@chrisn.me.uk>
 
 
 
@@ -79,7 +80,7 @@ nfsroot=[<server-ip>:]<root-dir>[,<nfs-options>]
 
 
 ip=<client-ip>:<server-ip>:<gw-ip>:<netmask>:<hostname>:<device>:<autoconf>:
-   <dns0-ip>:<dns1-ip>
+   <dns0-ip>:<dns1-ip>:<ntp0-ip>
 
   This parameter tells the kernel how to configure IP addresses of devices
   and also how to set up the IP routing table. It was originally called
@@ -110,6 +111,9 @@ ip=<client-ip>:<server-ip>:<gw-ip>:<netmask>:<hostname>:<device>:<autoconf>:
                will not be triggered if it is missing and NFS root is not
                in operation.
 
+               Value is exported to /proc/net/pnp with the prefix "bootserver "
+               (see below).
+
                Default: Determined using autoconfiguration.
                         The address of the autoconfiguration server is used.
 
@@ -123,10 +127,13 @@ ip=<client-ip>:<server-ip>:<gw-ip>:<netmask>:<hostname>:<device>:<autoconf>:
 
                Default:  Determined using autoconfiguration.
 
-  <hostname>   Name of the client. May be supplied by autoconfiguration,
-               but its absence will not trigger autoconfiguration.
-               If specified and DHCP is used, the user provided hostname will
-               be carried in the DHCP request to hopefully update DNS record.
+  <hostname>   Name of the client. If a '.' character is present, anything
+               before the first '.' is used as the client's hostname, and anything
+               after it is used as its NIS domain name. May be supplied by
+               autoconfiguration, but its absence will not trigger autoconfiguration.
+               If specified and DHCP is used, the user-provided hostname (and NIS
+               domain name, if present) will be carried in the DHCP request; this
+               may cause a DNS record to be created or updated for the client.
 
                Default: Client IP address is used in ASCII notation.
 
@@ -162,12 +169,55 @@ ip=<client-ip>:<server-ip>:<gw-ip>:<netmask>:<hostname>:<device>:<autoconf>:
 
                 Default: any
 
-  <dns0-ip>    IP address of first nameserver.
-               Value gets exported by /proc/net/pnp which is often linked
-               on embedded systems by /etc/resolv.conf.
+  <dns0-ip>    IP address of primary nameserver.
+               Value is exported to /proc/net/pnp with the prefix "nameserver "
+               (see below).
+
+               Default: None if not using autoconfiguration; determined
+               automatically if using autoconfiguration.
+
+  <dns1-ip>    IP address of secondary nameserver.
+               See <dns0-ip>.
+
+  <ntp0-ip>    IP address of a Network Time Protocol (NTP) server.
+               Value is exported to /proc/net/ipconfig/ntp_servers, but is
+               otherwise unused (see below).
+
+               Default: None if not using autoconfiguration; determined
+               automatically if using autoconfiguration.
+
+  After configuration (whether manual or automatic) is complete, two files
+  are created in the following format; lines are omitted if their respective
+  value is empty following configuration:
+
+  - /proc/net/pnp:
+
+       #PROTO: <DHCP|BOOTP|RARP|MANUAL>        (depending on configuration method)
+       domain <dns-domain>                     (if autoconfigured, the DNS domain)
+       nameserver <dns0-ip>                    (primary name server IP)
+       nameserver <dns1-ip>                    (secondary name server IP)
+       nameserver <dns2-ip>                    (tertiary name server IP)
+       bootserver <server-ip>                  (NFS server IP)
+
+  - /proc/net/ipconfig/ntp_servers:
+
+       <ntp0-ip>                               (NTP server IP)
+       <ntp1-ip>                               (NTP server IP)
+       <ntp2-ip>                               (NTP server IP)
+
+  <dns-domain> and <dns2-ip> (in /proc/net/pnp) and <ntp1-ip> and <ntp2-ip>
+  (in /proc/net/ipconfig/ntp_servers) are requested during autoconfiguration;
+  they cannot be specified as part of the "ip=" kernel command line parameter.
+
+  Because the "domain" and "nameserver" options are recognised by DNS
+  resolvers, /etc/resolv.conf is often linked to /proc/net/pnp on systems
+  that use an NFS root filesystem.
 
-  <dns1-ip>    IP address of second nameserver.
-               Same as above.
+  Note that the kernel will not synchronise the system time with any NTP
+  servers it discovers; this is the responsibility of a user space process
+  (e.g. an initrd/initramfs script that passes the IP addresses listed in
+  /proc/net/ipconfig/ntp_servers to an NTP client before mounting the real
+  root filesystem if it is on NFS).
 
 
 nfsrootdebug
index d04e6e4964ee7f88387f2e0e9ce3803b96d4ae18..fbed645ccd7563e90e86796d13cae7cfa7329748 100644 (file)
@@ -9,8 +9,8 @@ i2c adapters present on your system at a given time. i2cdetect is part of
 the i2c-tools package.
 
 I2C device files are character device files with major device number 89
-and a minor device number corresponding to the number assigned as 
-explained above. They should be called "i2c-%d" (i2c-0, i2c-1, ..., 
+and a minor device number corresponding to the number assigned as
+explained above. They should be called "i2c-%d" (i2c-0, i2c-1, ...,
 i2c-10, ...). All 256 minor device numbers are reserved for i2c.
 
 
@@ -23,11 +23,6 @@ First, you need to include these two headers:
   #include <linux/i2c-dev.h>
   #include <i2c/smbus.h>
 
-(Please note that there are two files named "i2c-dev.h" out there. One is
-distributed with the Linux kernel and the other one is included in the
-source tree of i2c-tools. They used to be different in content but since 2012
-they're identical. You should use "linux/i2c-dev.h").
-
 Now, you have to decide which adapter you want to access. You should
 inspect /sys/class/i2c-dev/ or run "i2cdetect -l" to decide this.
 Adapter numbers are assigned somewhat dynamically, so you can not
@@ -38,7 +33,7 @@ Next thing, open the device file, as follows:
   int file;
   int adapter_nr = 2; /* probably dynamically determined */
   char filename[20];
-  
+
   snprintf(filename, 19, "/dev/i2c-%d", adapter_nr);
   file = open(filename, O_RDWR);
   if (file < 0) {
@@ -72,8 +67,10 @@ the device supports them. Both are illustrated below.
     /* res contains the read word */
   }
 
-  /* Using I2C Write, equivalent of 
-     i2c_smbus_write_word_data(file, reg, 0x6543) */
+  /*
+   * Using I2C Write, equivalent of
+   * i2c_smbus_write_word_data(file, reg, 0x6543)
+   */
   buf[0] = reg;
   buf[1] = 0x43;
   buf[2] = 0x65;
@@ -140,14 +137,14 @@ ioctl(file, I2C_RDWR, struct i2c_rdwr_ioctl_data *msgset)
   set in each message, overriding the values set with the above ioctl's.
 
 ioctl(file, I2C_SMBUS, struct i2c_smbus_ioctl_data *args)
-  Not meant to be called  directly; instead, use the access functions
-  below.
+  If possible, use the provided i2c_smbus_* methods described below instead
+  of issuing direct ioctls.
 
 You can do plain i2c transactions by using read(2) and write(2) calls.
 You do not need to pass the address byte; instead, set it through
 ioctl I2C_SLAVE before you try to access the device.
 
-You can do SMBus level transactions (see documentation file smbus-protocol 
+You can do SMBus level transactions (see documentation file smbus-protocol
 for details) through the following functions:
   __s32 i2c_smbus_write_quick(int file, __u8 value);
   __s32 i2c_smbus_read_byte(int file);
@@ -158,7 +155,7 @@ for details) through the following functions:
   __s32 i2c_smbus_write_word_data(int file, __u8 command, __u16 value);
   __s32 i2c_smbus_process_call(int file, __u8 command, __u16 value);
   __s32 i2c_smbus_read_block_data(int file, __u8 command, __u8 *values);
-  __s32 i2c_smbus_write_block_data(int file, __u8 command, __u8 length, 
+  __s32 i2c_smbus_write_block_data(int file, __u8 command, __u8 length,
                                    __u8 *values);
 All these transactions return -1 on failure; you can read errno to see
 what happened. The 'write' transactions return 0 on success; the
@@ -166,10 +163,9 @@ what happened. The 'write' transactions return 0 on success; the
 returns the number of values read. The block buffers need not be longer
 than 32 bytes.
 
-The above functions are all inline functions, that resolve to calls to
-the i2c_smbus_access function, that on its turn calls a specific ioctl
-with the data in a specific format. Read the source code if you
-want to know what happens behind the screens.
+The above functions are made available by linking against the libi2c library,
+which is provided by the i2c-tools project.  See:
+https://git.kernel.org/pub/scm/utils/i2c-tools/i2c-tools.git/.
 
 
 Implementation details
index 84bb74dcae12e95acc4a2aa888462520422f693d..7f7413e597f3886ad046b9ca527b7d5fe19ecf56 100644 (file)
@@ -217,7 +217,6 @@ Code  Seq#(hex)     Include File            Comments
 'd'    02-40   pcmcia/ds.h             conflict!
 'd'    F0-FF   linux/digi1.h
 'e'    all     linux/digi1.h           conflict!
-'e'    00-1F   drivers/net/irda/irtty-sir.h    conflict!
 'f'    00-1F   linux/ext2_fs.h         conflict!
 'f'    00-1F   linux/ext3_fs.h         conflict!
 'f'    00-0F   fs/jfs/jfs_dinode.h     conflict!
@@ -247,7 +246,6 @@ Code  Seq#(hex)     Include File            Comments
 'm'    all     linux/synclink.h        conflict!
 'm'    00-19   drivers/message/fusion/mptctl.h conflict!
 'm'    00      drivers/scsi/megaraid/megaraid_ioctl.h  conflict!
-'m'    00-1F   net/irda/irmod.h        conflict!
 'n'    00-7F   linux/ncp_fs.h and fs/ncpfs/ioctl.c
 'n'    80-8F   uapi/linux/nilfs2_api.h NILFS2
 'n'    E0-FF   linux/matroxfb.h        matroxfb
index e6ce1e3f5a78a9be42f33904b00b5ad1dd1a2414..217237f93b378e7448a41a3a6b9ed89987a45fbf 100644 (file)
@@ -7,7 +7,7 @@ file: uapi/v4l/keytable.c
 
     /* keytable.c - This program allows checking/replacing keys at IR
 
-       Copyright (C) 2006-2009 Mauro Carvalho Chehab <mchehab@infradead.org>
+       Copyright (C) 2006-2009 Mauro Carvalho Chehab <mchehab@kernel.org>
 
        This program is free software; you can redistribute it and/or modify
        it under the terms of the GNU General Public License as published by
index 5aabd0b7b089958a59eaccf15dce6d2df4706896..f0d0ab6abd413642cc108830d57326cfaa4f8bb4 100644 (file)
@@ -6,7 +6,7 @@ file: media/v4l/v4l2grab.c
 .. code-block:: c
 
     /* V4L2 video picture grabber
-       Copyright (C) 2009 Mauro Carvalho Chehab <mchehab@infradead.org>
+       Copyright (C) 2009 Mauro Carvalho Chehab <mchehab@kernel.org>
 
        This program is free software; you can redistribute it and/or modify
        it under the terms of the GNU General Public License as published by
diff --git a/Documentation/networking/af_xdp.rst b/Documentation/networking/af_xdp.rst
new file mode 100644 (file)
index 0000000..91928d9
--- /dev/null
@@ -0,0 +1,297 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+======
+AF_XDP
+======
+
+Overview
+========
+
+AF_XDP is an address family that is optimized for high performance
+packet processing.
+
+This document assumes that the reader is familiar with BPF and XDP. If
+not, the Cilium project has an excellent reference guide at
+http://cilium.readthedocs.io/en/doc-1.0/bpf/.
+
+Using the XDP_REDIRECT action from an XDP program, the program can
+redirect ingress frames to other XDP enabled netdevs, using the
+bpf_redirect_map() function. AF_XDP sockets enable the possibility for
+XDP programs to redirect frames to a memory buffer in a user-space
+application.
+
+An AF_XDP socket (XSK) is created with the normal socket()
+syscall. Associated with each XSK are two rings: the RX ring and the
+TX ring. A socket can receive packets on the RX ring and it can send
+packets on the TX ring. These rings are registered and sized with the
+setsockopts XDP_RX_RING and XDP_TX_RING, respectively. It is mandatory
+to have at least one of these rings for each socket. An RX or TX
+descriptor ring points to a data buffer in a memory area called a
+UMEM. RX and TX can share the same UMEM so that a packet does not have
+to be copied between RX and TX. Moreover, if a packet needs to be kept
+for a while due to a possible retransmit, the descriptor that points
+to that packet can be changed to point to another and reused right
+away. This again avoids copying data.
+
+The UMEM consists of a number of equally size frames and each frame
+has a unique frame id. A descriptor in one of the rings references a
+frame by referencing its frame id. The user space allocates memory for
+this UMEM using whatever means it feels is most appropriate (malloc,
+mmap, huge pages, etc). This memory area is then registered with the
+kernel using the new setsockopt XDP_UMEM_REG. The UMEM also has two
+rings: the FILL ring and the COMPLETION ring. The fill ring is used by
+the application to send down frame ids for the kernel to fill in with
+RX packet data. References to these frames will then appear in the RX
+ring once each packet has been received. The completion ring, on the
+other hand, contains frame ids that the kernel has transmitted
+completely and can now be used again by user space, for either TX or
+RX. Thus, the frame ids appearing in the completion ring are ids that
+were previously transmitted using the TX ring. In summary, the RX and
+FILL rings are used for the RX path and the TX and COMPLETION rings
+are used for the TX path.
+
+The socket is then finally bound with a bind() call to a device and a
+specific queue id on that device, and it is not until bind is
+completed that traffic starts to flow.
+
+The UMEM can be shared between processes, if desired. If a process
+wants to do this, it simply skips the registration of the UMEM and its
+corresponding two rings, sets the XDP_SHARED_UMEM flag in the bind
+call and submits the XSK of the process it would like to share UMEM
+with as well as its own newly created XSK socket. The new process will
+then receive frame id references in its own RX ring that point to this
+shared UMEM. Note that since the ring structures are single-consumer /
+single-producer (for performance reasons), the new process has to
+create its own socket with associated RX and TX rings, since it cannot
+share this with the other process. This is also the reason that there
+is only one set of FILL and COMPLETION rings per UMEM. It is the
+responsibility of a single process to handle the UMEM.
+
+How is then packets distributed from an XDP program to the XSKs? There
+is a BPF map called XSKMAP (or BPF_MAP_TYPE_XSKMAP in full). The
+user-space application can place an XSK at an arbitrary place in this
+map. The XDP program can then redirect a packet to a specific index in
+this map and at this point XDP validates that the XSK in that map was
+indeed bound to that device and ring number. If not, the packet is
+dropped. If the map is empty at that index, the packet is also
+dropped. This also means that it is currently mandatory to have an XDP
+program loaded (and one XSK in the XSKMAP) to be able to get any
+traffic to user space through the XSK.
+
+AF_XDP can operate in two different modes: XDP_SKB and XDP_DRV. If the
+driver does not have support for XDP, or XDP_SKB is explicitly chosen
+when loading the XDP program, XDP_SKB mode is employed that uses SKBs
+together with the generic XDP support and copies out the data to user
+space. A fallback mode that works for any network device. On the other
+hand, if the driver has support for XDP, it will be used by the AF_XDP
+code to provide better performance, but there is still a copy of the
+data into user space.
+
+Concepts
+========
+
+In order to use an AF_XDP socket, a number of associated objects need
+to be setup.
+
+Jonathan Corbet has also written an excellent article on LWN,
+"Accelerating networking with AF_XDP". It can be found at
+https://lwn.net/Articles/750845/.
+
+UMEM
+----
+
+UMEM is a region of virtual contiguous memory, divided into
+equal-sized frames. An UMEM is associated to a netdev and a specific
+queue id of that netdev. It is created and configured (frame size,
+frame headroom, start address and size) by using the XDP_UMEM_REG
+setsockopt system call. A UMEM is bound to a netdev and queue id, via
+the bind() system call.
+
+An AF_XDP is socket linked to a single UMEM, but one UMEM can have
+multiple AF_XDP sockets. To share an UMEM created via one socket A,
+the next socket B can do this by setting the XDP_SHARED_UMEM flag in
+struct sockaddr_xdp member sxdp_flags, and passing the file descriptor
+of A to struct sockaddr_xdp member sxdp_shared_umem_fd.
+
+The UMEM has two single-producer/single-consumer rings, that are used
+to transfer ownership of UMEM frames between the kernel and the
+user-space application.
+
+Rings
+-----
+
+There are a four different kind of rings: Fill, Completion, RX and
+TX. All rings are single-producer/single-consumer, so the user-space
+application need explicit synchronization of multiple
+processes/threads are reading/writing to them.
+
+The UMEM uses two rings: Fill and Completion. Each socket associated
+with the UMEM must have an RX queue, TX queue or both. Say, that there
+is a setup with four sockets (all doing TX and RX). Then there will be
+one Fill ring, one Completion ring, four TX rings and four RX rings.
+
+The rings are head(producer)/tail(consumer) based rings. A producer
+writes the data ring at the index pointed out by struct xdp_ring
+producer member, and increasing the producer index. A consumer reads
+the data ring at the index pointed out by struct xdp_ring consumer
+member, and increasing the consumer index.
+
+The rings are configured and created via the _RING setsockopt system
+calls and mmapped to user-space using the appropriate offset to mmap()
+(XDP_PGOFF_RX_RING, XDP_PGOFF_TX_RING, XDP_UMEM_PGOFF_FILL_RING and
+XDP_UMEM_PGOFF_COMPLETION_RING).
+
+The size of the rings need to be of size power of two.
+
+UMEM Fill Ring
+~~~~~~~~~~~~~~
+
+The Fill ring is used to transfer ownership of UMEM frames from
+user-space to kernel-space. The UMEM indicies are passed in the
+ring. As an example, if the UMEM is 64k and each frame is 4k, then the
+UMEM has 16 frames and can pass indicies between 0 and 15.
+
+Frames passed to the kernel are used for the ingress path (RX rings).
+
+The user application produces UMEM indicies to this ring.
+
+UMEM Completetion Ring
+~~~~~~~~~~~~~~~~~~~~~~
+
+The Completion Ring is used transfer ownership of UMEM frames from
+kernel-space to user-space. Just like the Fill ring, UMEM indicies are
+used.
+
+Frames passed from the kernel to user-space are frames that has been
+sent (TX ring) and can be used by user-space again.
+
+The user application consumes UMEM indicies from this ring.
+
+
+RX Ring
+~~~~~~~
+
+The RX ring is the receiving side of a socket. Each entry in the ring
+is a struct xdp_desc descriptor. The descriptor contains UMEM index
+(idx), the length of the data (len), the offset into the frame
+(offset).
+
+If no frames have been passed to kernel via the Fill ring, no
+descriptors will (or can) appear on the RX ring.
+
+The user application consumes struct xdp_desc descriptors from this
+ring.
+
+TX Ring
+~~~~~~~
+
+The TX ring is used to send frames. The struct xdp_desc descriptor is
+filled (index, length and offset) and passed into the ring.
+
+To start the transfer a sendmsg() system call is required. This might
+be relaxed in the future.
+
+The user application produces struct xdp_desc descriptors to this
+ring.
+
+XSKMAP / BPF_MAP_TYPE_XSKMAP
+----------------------------
+
+On XDP side there is a BPF map type BPF_MAP_TYPE_XSKMAP (XSKMAP) that
+is used in conjunction with bpf_redirect_map() to pass the ingress
+frame to a socket.
+
+The user application inserts the socket into the map, via the bpf()
+system call.
+
+Note that if an XDP program tries to redirect to a socket that does
+not match the queue configuration and netdev, the frame will be
+dropped. E.g. an AF_XDP socket is bound to netdev eth0 and
+queue 17. Only the XDP program executing for eth0 and queue 17 will
+successfully pass data to the socket. Please refer to the sample
+application (samples/bpf/) in for an example.
+
+Usage
+=====
+
+In order to use AF_XDP sockets there are two parts needed. The
+user-space application and the XDP program. For a complete setup and
+usage example, please refer to the sample application. The user-space
+side is xdpsock_user.c and the XDP side xdpsock_kern.c.
+
+Naive ring dequeue and enqueue could look like this::
+
+    // typedef struct xdp_rxtx_ring RING;
+    // typedef struct xdp_umem_ring RING;
+
+    // typedef struct xdp_desc RING_TYPE;
+    // typedef __u32 RING_TYPE;
+
+    int dequeue_one(RING *ring, RING_TYPE *item)
+    {
+        __u32 entries = ring->ptrs.producer - ring->ptrs.consumer;
+
+        if (entries == 0)
+            return -1;
+
+        // read-barrier!
+
+        *item = ring->desc[ring->ptrs.consumer & (RING_SIZE - 1)];
+        ring->ptrs.consumer++;
+        return 0;
+    }
+
+    int enqueue_one(RING *ring, const RING_TYPE *item)
+    {
+        u32 free_entries = RING_SIZE - (ring->ptrs.producer - ring->ptrs.consumer);
+
+        if (free_entries == 0)
+            return -1;
+
+        ring->desc[ring->ptrs.producer & (RING_SIZE - 1)] = *item;
+
+        // write-barrier!
+
+        ring->ptrs.producer++;
+        return 0;
+    }
+
+
+For a more optimized version, please refer to the sample application.
+
+Sample application
+==================
+
+There is a xdpsock benchmarking/test application included that
+demonstrates how to use AF_XDP sockets with both private and shared
+UMEMs. Say that you would like your UDP traffic from port 4242 to end
+up in queue 16, that we will enable AF_XDP on. Here, we use ethtool
+for this::
+
+      ethtool -N p3p2 rx-flow-hash udp4 fn
+      ethtool -N p3p2 flow-type udp4 src-port 4242 dst-port 4242 \
+          action 16
+
+Running the rxdrop benchmark in XDP_DRV mode can then be done
+using::
+
+      samples/bpf/xdpsock -i p3p2 -q 16 -r -N
+
+For XDP_SKB mode, use the switch "-S" instead of "-N" and all options
+can be displayed with "-h", as usual.
+
+Credits
+=======
+
+- Björn Töpel (AF_XDP core)
+- Magnus Karlsson (AF_XDP core)
+- Alexander Duyck
+- Alexei Starovoitov
+- Daniel Borkmann
+- Jesper Dangaard Brouer
+- John Fastabend
+- Jonathan Corbet (LWN coverage)
+- Michael S. Tsirkin
+- Qi Z Zhang
+- Willem de Bruijn
+
index 9ba04c0bab8db6e1a74947770a028ebca43e1651..c13214d073a4866f49025033a86fed03275bca5f 100644 (file)
@@ -140,7 +140,7 @@ bonding module at load time, or are specified via sysfs.
 
        Module options may be given as command line arguments to the
 insmod or modprobe command, but are usually specified in either the
-/etc/modrobe.d/*.conf configuration files, or in a distro-specific
+/etc/modprobe.d/*.conf configuration files, or in a distro-specific
 configuration file (some of which are detailed in the next section).
 
        Details on bonding support for sysfs is provided in the
index fd55c7de99910628a68b545d606bd38b9020a6b8..e6b4ebb2b243821e0b849a7b541fd80772d06a54 100644 (file)
@@ -483,6 +483,12 @@ Example output from dmesg:
 [ 3389.935851] JIT code: 00000030: 00 e8 28 94 ff e0 83 f8 01 75 07 b8 ff ff 00 00
 [ 3389.935852] JIT code: 00000040: eb 02 31 c0 c9 c3
 
+When CONFIG_BPF_JIT_ALWAYS_ON is enabled, bpf_jit_enable is permanently set to 1 and
+setting any other value than that will return in failure. This is even the case for
+setting bpf_jit_enable to 2, since dumping the final JIT image into the kernel log
+is discouraged and introspection through bpftool (under tools/bpf/bpftool/) is the
+generally recommended approach instead.
+
 In the kernel source tree under tools/bpf/, there's bpf_jit_disasm for
 generating disassembly out of the kernel log's hexdump:
 
@@ -1136,6 +1142,7 @@ into a register from memory, the register's top 56 bits are known zero, while
 the low 8 are unknown - which is represented as the tnum (0x0; 0xff).  If we
 then OR this with 0x40, we get (0x40; 0xbf), then if we add 1 we get (0x0;
 0x1ff), because of potential carries.
+
 Besides arithmetic, the register state can also be updated by conditional
 branches.  For instance, if a SCALAR_VALUE is compared > 8, in the 'true' branch
 it will have a umin_value (unsigned minimum value) of 9, whereas in the 'false'
@@ -1144,14 +1151,16 @@ BPF_JSGE) would instead update the signed minimum/maximum values.  Information
 from the signed and unsigned bounds can be combined; for instance if a value is
 first tested < 8 and then tested s> 4, the verifier will conclude that the value
 is also > 4 and s< 8, since the bounds prevent crossing the sign boundary.
+
 PTR_TO_PACKETs with a variable offset part have an 'id', which is common to all
 pointers sharing that same variable offset.  This is important for packet range
-checks: after adding some variable to a packet pointer, if you then copy it to
-another register and (say) add a constant 4, both registers will share the same
-'id' but one will have a fixed offset of +4.  Then if it is bounds-checked and
-found to be less than a PTR_TO_PACKET_END, the other register is now known to
-have a safe range of at least 4 bytes.  See 'Direct packet access', below, for
-more on PTR_TO_PACKET ranges.
+checks: after adding a variable to a packet pointer register A, if you then copy
+it to another register B and then add a constant 4 to A, both registers will
+share the same 'id' but the A will have a fixed offset of +4.  Then if A is
+bounds-checked and found to be less than a PTR_TO_PACKET_END, the register B is
+now known to have a safe range of at least 4 bytes.  See 'Direct packet access',
+below, for more on PTR_TO_PACKET ranges.
+
 The 'id' field is also used on PTR_TO_MAP_VALUE_OR_NULL, common to all copies of
 the pointer returned from a map lookup.  This means that when one copy is
 checked and found to be non-NULL, all copies can become PTR_TO_MAP_VALUEs.
index f204eaff657d8728cd60a4a9d19d040dd901ae5a..cbd9bdd4a79ef84b762ee52368e639579969eef6 100644 (file)
@@ -6,6 +6,7 @@ Contents:
 .. toctree::
    :maxdepth: 2
 
+   af_xdp
    batman-adv
    can
    dpaa2/index
index b583a73cf95f3b0bacbec3ca8cf3efd8c8c4c948..924bd51327b7a8dff3503d7afccdd54e1eb5c29b 100644 (file)
@@ -449,8 +449,10 @@ tcp_recovery - INTEGER
        features.
 
        RACK: 0x1 enables the RACK loss detection for fast detection of lost
-             retransmissions and tail drops.
+             retransmissions and tail drops. It also subsumes and disables
+             RFC6675 recovery for SACK connections.
        RACK: 0x2 makes RACK's reordering window static (min_rtt/4).
+       RACK: 0x4 disables RACK's DUPACK threshold heuristic
 
        Default: 0x1
 
@@ -523,6 +525,19 @@ tcp_rmem - vector of 3 INTEGERs: min, default, max
 tcp_sack - BOOLEAN
        Enable select acknowledgments (SACKS).
 
+tcp_comp_sack_delay_ns - LONG INTEGER
+       TCP tries to reduce number of SACK sent, using a timer
+       based on 5% of SRTT, capped by this sysctl, in nano seconds.
+       The default is 1ms, based on TSO autosizing period.
+
+       Default : 1,000,000 ns (1 ms)
+
+tcp_comp_sack_nr - INTEGER
+       Max numer of SACK that can be compressed.
+       Using 0 disables SACK compression.
+
+       Detault : 44
+
 tcp_slow_start_after_idle - BOOLEAN
        If set, provide RFC2861 behavior and time out the congestion
        window after an idle period.  An idle period is defined at
@@ -1428,6 +1443,19 @@ ip6frag_low_thresh - INTEGER
 ip6frag_time - INTEGER
        Time in seconds to keep an IPv6 fragment in memory.
 
+IPv6 Segment Routing:
+
+seg6_flowlabel - INTEGER
+       Controls the behaviour of computing the flowlabel of outer
+       IPv6 header in case of SR T.encaps
+
+       -1 set flowlabel to zero.
+       0 copy flowlabel from Inner packet in case of Inner IPv6
+               (Set flowlabel to 0 in case IPv4/L2)
+       1 Compute the flowlabel using seg6_make_flowlabel()
+
+       Default is 0.
+
 conf/default/*:
        Change the interface-specific default settings.
 
@@ -2126,18 +2154,3 @@ max_dgram_qlen - INTEGER
 
        Default: 10
 
-
-UNDOCUMENTED:
-
-/proc/sys/net/irda/*
-       fast_poll_increase FIXME
-       warn_noreply_time FIXME
-       discovery_slots FIXME
-       slot_timeout FIXME
-       max_baud_rate FIXME
-       discovery_timeout FIXME
-       lap_keepalive_time FIXME
-       max_noreply_time FIXME
-       max_tx_data_size FIXME
-       max_tx_window FIXME
-       min_tx_turn_time FIXME
index c77f9d57eb91584878d26061450c3e08a5b19030..c4a54c162547d3e290a69446afadef54aac7f1d5 100644 (file)
@@ -113,6 +113,13 @@ whatever headers there might be.
 NETIF_F_TSO_ECN means that hardware can properly split packets with CWR bit
 set, be it TCPv4 (when NETIF_F_TSO is enabled) or TCPv6 (NETIF_F_TSO6).
 
+ * Transmit UDP segmentation offload
+
+NETIF_F_GSO_UDP_GSO_L4 accepts a single UDP header with a payload that exceeds
+gso_size. On segmentation, it segments the payload on gso_size boundaries and
+replicates the network and UDP headers (fixing up the last one if less than
+gso_size).
+
  * Transmit DMA from high memory
 
 On platforms where this is relevant, NETIF_F_HIGHDMA signals that
index 31abd04b957284f55697be1aa226e906c981196c..6f55eb960a6dc642159cb8b027c000c764d554a5 100644 (file)
@@ -168,7 +168,7 @@ update on the CPUs, as discussed below:
 
 [Please bear in mind that the kernel requests the microcode images from
 userspace, using the request_firmware() function defined in
-drivers/base/firmware_class.c]
+drivers/base/firmware_loader/main.c]
 
 
 a. When all the CPUs are identical:
index 00cecf1fcba9f34808cf27053afc0543d852ef2a..633be1043690dde2bbd45a09387843cdb5e13b84 100644 (file)
@@ -157,8 +157,5 @@ memory management. See ``include/sound/sndmagic.h`` for complete list of them. M
 OSS sound drivers have their magic numbers constructed from the soundcard PCI
 ID - these are not listed here as well.
 
-IrDA subsystem also uses large number of own magic numbers, see
-``include/net/irda/irda.h`` for a complete list of them.
-
 HFS is another larger user of magic numbers - you can find them in
 ``fs/hfs/hfs.h``.
index a958d8b5e99da34b898a1a81b47d818a09c557b6..d410f47567e987d3f9870c57189195b1283a6fd0 100755 (executable)
@@ -387,11 +387,11 @@ tree for more details.
 
 =head1 BUGS
 
-Report bugs to Mauro Carvalho Chehab <mchehab@s-opensource.com>
+Report bugs to Mauro Carvalho Chehab <mchehab@kernel.org>
 
 =head1 COPYRIGHT
 
-Copyright (c) 2016 by Mauro Carvalho Chehab <mchehab@s-opensource.com>.
+Copyright (c) 2016 by Mauro Carvalho Chehab <mchehab+samsung@kernel.org>.
 
 License GPLv2: GNU GPL version 2 <http://gnu.org/licenses/gpl.html>.
 
index 5992602469d894d594b829690b9b8caf7fda57f4..9ecde517728c317ac7428efd616536a0a90f301e 100644 (file)
@@ -45,6 +45,7 @@ through bpf(2) and passing a verifier in the kernel, a JIT will then
 translate these BPF proglets into native CPU instructions. There are
 two flavors of JITs, the newer eBPF JIT currently supported on:
   - x86_64
+  - x86_32
   - arm64
   - arm32
   - ppc64
index e45f0786f3f9ef29bb1da2f9c2b3c8d7430adc85..67d9c38e95eb54d2855bf356b6fccc0cace8442a 100644 (file)
@@ -461,9 +461,17 @@ of ftrace. Here is a list of some of the key files:
                and ticks at the same rate as the hardware clocksource.
 
        boot:
-               Same as mono. Used to be a separate clock which accounted
-               for the time spent in suspend while CLOCK_MONOTONIC did
-               not.
+               This is the boot clock (CLOCK_BOOTTIME) and is based on the
+               fast monotonic clock, but also accounts for time spent in
+               suspend. Since the clock access is designed for use in
+               tracing in the suspend path, some side effects are possible
+               if clock is accessed after the suspend time is accounted before
+               the fast mono clock is updated. In this case, the clock update
+               appears to happen slightly sooner than it normally would have.
+               Also on 32-bit systems, it's possible that the 64-bit boot offset
+               sees a partial update. These effects are rare and post
+               processing should be able to handle them. See comments in the
+               ktime_get_boot_fast_ns() function for more information.
 
        To set a clock, simply echo the clock name into this file::
 
index 698660b7f21fd8d0453c12a1c29c0614557cf38c..c77c0f0608647ea9432597aa0e0a2ee70c892fa9 100644 (file)
@@ -6,7 +6,7 @@ communicating in English you can also ask the Chinese maintainer for
 help.  Contact the Chinese maintainer if this translation is outdated
 or if there is a problem with the translation.
 
-Maintainer: Mauro Carvalho Chehab <mchehab@infradead.org>
+Maintainer: Mauro Carvalho Chehab <mchehab@kernel.org>
 Chinese maintainer: Fu Wei <tekkamanninja@gmail.com>
 ---------------------------------------------------------------------
 Documentation/video4linux/v4l2-framework.txt 的中文翻译
@@ -14,7 +14,7 @@ Documentation/video4linux/v4l2-framework.txt 的中文翻译
 如果想评论或更新本文的内容,请直接联系原文档的维护者。如果你使用英文
 交流有困难的话,也可以向中文版维护者求助。如果本翻译更新不及时或者翻
 译存在问题,请联系中文版维护者。
-英文版维护者: Mauro Carvalho Chehab <mchehab@infradead.org>
+英文版维护者: Mauro Carvalho Chehab <mchehab@kernel.org>
 中文版维护者: 傅炜 Fu Wei <tekkamanninja@gmail.com>
 中文版翻译者: 傅炜 Fu Wei <tekkamanninja@gmail.com>
 中文版校译者: 傅炜 Fu Wei <tekkamanninja@gmail.com>
index 1c7958b57fe9fd52d337862fa9b6b5d46ca2e9f8..758bf403a169dac0db8ec259bc611986aea69cfd 100644 (file)
@@ -1960,6 +1960,9 @@ ARM 32-bit VFP control registers have the following id bit patterns:
 ARM 64-bit FP registers have the following id bit patterns:
   0x4030 0000 0012 0 <regno:12>
 
+ARM firmware pseudo-registers have the following bit pattern:
+  0x4030 0000 0014 <regno:16>
+
 
 arm64 registers are mapped using the lower 32 bits. The upper 16 of
 that is the register group type, or coprocessor number:
@@ -1976,6 +1979,9 @@ arm64 CCSIDR registers are demultiplexed by CSSELR value:
 arm64 system registers have the following id bit patterns:
   0x6030 0000 0013 <op0:2> <op1:3> <crn:4> <crm:4> <op2:3>
 
+arm64 firmware pseudo-registers have the following bit pattern:
+  0x6030 0000 0014 <regno:16>
+
 
 MIPS registers are mapped using the lower 32 bits.  The upper 16 of that is
 the register group type:
@@ -2510,7 +2516,8 @@ Possible features:
          and execute guest code when KVM_RUN is called.
        - KVM_ARM_VCPU_EL1_32BIT: Starts the CPU in a 32bit mode.
          Depends on KVM_CAP_ARM_EL1_32BIT (arm64 only).
-       - KVM_ARM_VCPU_PSCI_0_2: Emulate PSCI v0.2 for the CPU.
+       - KVM_ARM_VCPU_PSCI_0_2: Emulate PSCI v0.2 (or a future revision
+          backward compatible with v0.2) for the CPU.
          Depends on KVM_CAP_ARM_PSCI_0_2.
        - KVM_ARM_VCPU_PMU_V3: Emulate PMUv3 for the CPU.
          Depends on KVM_CAP_ARM_PMU_V3.
diff --git a/Documentation/virtual/kvm/arm/psci.txt b/Documentation/virtual/kvm/arm/psci.txt
new file mode 100644 (file)
index 0000000..aafdab8
--- /dev/null
@@ -0,0 +1,30 @@
+KVM implements the PSCI (Power State Coordination Interface)
+specification in order to provide services such as CPU on/off, reset
+and power-off to the guest.
+
+The PSCI specification is regularly updated to provide new features,
+and KVM implements these updates if they make sense from a virtualization
+point of view.
+
+This means that a guest booted on two different versions of KVM can
+observe two different "firmware" revisions. This could cause issues if
+a given guest is tied to a particular PSCI revision (unlikely), or if
+a migration causes a different PSCI version to be exposed out of the
+blue to an unsuspecting guest.
+
+In order to remedy this situation, KVM exposes a set of "firmware
+pseudo-registers" that can be manipulated using the GET/SET_ONE_REG
+interface. These registers can be saved/restored by userspace, and set
+to a convenient value if required.
+
+The following register is defined:
+
+* KVM_REG_ARM_PSCI_VERSION:
+
+  - Only valid if the vcpu has the KVM_ARM_VCPU_PSCI_0_2 feature set
+    (and thus has already been initialized)
+  - Returns the current PSCI version on GET_ONE_REG (defaulting to the
+    highest PSCI version implemented by KVM and compatible with v0.2)
+  - Allows any PSCI version implemented by KVM and compatible with
+    v0.2 to be set with SET_ONE_REG
+  - Affects the whole VM (even if the register view is per-vcpu)
index d4f33eb805dd228a8d8aff796bd77327334aa24b..ab022dcd09117571d215294ed51c5675543165f1 100644 (file)
@@ -72,8 +72,8 @@ KVM_FEATURE_CLOCKSOURCE_STABLE_BIT ||    24 || host will warn if no guest-side
 
 flag                               || value || meaning
 ==================================================================================
-KVM_HINTS_DEDICATED                ||     0 || guest checks this feature bit to
-                                   ||       || determine if there is vCPU pinning
-                                   ||       || and there is no vCPU over-commitment,
+KVM_HINTS_REALTIME                 ||     0 || guest checks this feature bit to
+                                   ||       || determine that vCPUs are never
+                                   ||       || preempted for an unlimited time,
                                    ||       || allowing optimizations
 ----------------------------------------------------------------------------------
index fc812fb5857a80f7aa2d43696b1d377d0f2865d7..032807a955586d4a98a8b48449b88156e2ecca34 100644 (file)
@@ -137,9 +137,9 @@ Maintainers List (try to look for most precise areas first)
                -----------------------------------
 
 3C59X NETWORK DRIVER
-M:     Steffen Klassert <klassert@mathematik.tu-chemnitz.de>
+M:     Steffen Klassert <klassert@kernel.org>
 L:     netdev@vger.kernel.org
-S:     Maintained
+S:     Odd Fixes
 F:     Documentation/networking/vortex.txt
 F:     drivers/net/ethernet/3com/3c59x.c
 
@@ -564,8 +564,9 @@ S:  Maintained
 F:     drivers/media/dvb-frontends/af9033*
 
 AFFS FILE SYSTEM
+M:     David Sterba <dsterba@suse.com>
 L:     linux-fsdevel@vger.kernel.org
-S:     Orphan
+S:     Odd Fixes
 F:     Documentation/filesystems/affs.txt
 F:     fs/affs/
 
@@ -905,6 +906,8 @@ ANDROID ION DRIVER
 M:     Laura Abbott <labbott@redhat.com>
 M:     Sumit Semwal <sumit.semwal@linaro.org>
 L:     devel@driverdev.osuosl.org
+L:     dri-devel@lists.freedesktop.org
+L:     linaro-mm-sig@lists.linaro.org (moderated for non-subscribers)
 S:     Supported
 F:     drivers/staging/android/ion
 F:     drivers/staging/android/uapi/ion.h
@@ -1208,7 +1211,6 @@ F:        drivers/*/*alpine*
 ARM/ARTPEC MACHINE SUPPORT
 M:     Jesper Nilsson <jesper.nilsson@axis.com>
 M:     Lars Persson <lars.persson@axis.com>
-M:     Niklas Cassel <niklas.cassel@axis.com>
 S:     Maintained
 L:     linux-arm-kernel@axis.com
 F:     arch/arm/mach-artpec
@@ -1373,7 +1375,8 @@ F:        arch/arm/mach-ebsa110/
 F:     drivers/net/ethernet/amd/am79c961a.*
 
 ARM/ENERGY MICRO (SILICON LABS) EFM32 SUPPORT
-M:     Uwe Kleine-König <kernel@pengutronix.de>
+M:     Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
+R:     Pengutronix Kernel Team <kernel@pengutronix.de>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
 N:     efm32
@@ -1401,7 +1404,8 @@ F:        arch/arm/mach-footbridge/
 
 ARM/FREESCALE IMX / MXC ARM ARCHITECTURE
 M:     Shawn Guo <shawnguo@kernel.org>
-M:     Sascha Hauer <kernel@pengutronix.de>
+M:     Sascha Hauer <s.hauer@pengutronix.de>
+R:     Pengutronix Kernel Team <kernel@pengutronix.de>
 R:     Fabio Estevam <fabio.estevam@nxp.com>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
@@ -1416,7 +1420,8 @@ F:        include/soc/imx/
 
 ARM/FREESCALE VYBRID ARM ARCHITECTURE
 M:     Shawn Guo <shawnguo@kernel.org>
-M:     Sascha Hauer <kernel@pengutronix.de>
+M:     Sascha Hauer <s.hauer@pengutronix.de>
+R:     Pengutronix Kernel Team <kernel@pengutronix.de>
 R:     Stefan Agner <stefan@agner.ch>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
@@ -2549,7 +2554,6 @@ F:        Documentation/devicetree/bindings/sound/axentia,*
 F:     sound/soc/atmel/tse850-pcm5142.c
 
 AZ6007 DVB DRIVER
-M:     Mauro Carvalho Chehab <mchehab@s-opensource.com>
 M:     Mauro Carvalho Chehab <mchehab@kernel.org>
 L:     linux-media@vger.kernel.org
 W:     https://linuxtv.org
@@ -2614,7 +2618,7 @@ S:        Maintained
 F:     drivers/net/hamradio/baycom*
 
 BCACHE (BLOCK LAYER CACHE)
-M:     Michael Lyle <mlyle@lyle.org>
+M:     Coly Li <colyli@suse.de>
 M:     Kent Overstreet <kent.overstreet@gmail.com>
 L:     linux-bcache@vger.kernel.org
 W:     http://bcache.evilpiepirate.org
@@ -2724,7 +2728,6 @@ F:        Documentation/networking/filter.txt
 F:     Documentation/bpf/
 F:     include/linux/bpf*
 F:     include/linux/filter.h
-F:     include/trace/events/bpf.h
 F:     include/trace/events/xdp.h
 F:     include/uapi/linux/bpf*
 F:     include/uapi/linux/filter.h
@@ -3078,7 +3081,6 @@ F:        include/linux/btrfs*
 F:     include/uapi/linux/btrfs*
 
 BTTV VIDEO4LINUX DRIVER
-M:     Mauro Carvalho Chehab <mchehab@s-opensource.com>
 M:     Mauro Carvalho Chehab <mchehab@kernel.org>
 L:     linux-media@vger.kernel.org
 W:     https://linuxtv.org
@@ -3688,7 +3690,6 @@ F:        drivers/cpufreq/arm_big_little_dt.c
 
 CPU POWER MONITORING SUBSYSTEM
 M:     Thomas Renninger <trenn@suse.com>
-M:     Shuah Khan <shuahkh@osg.samsung.com>
 M:     Shuah Khan <shuah@kernel.org>
 L:     linux-pm@vger.kernel.org
 S:     Maintained
@@ -3807,7 +3808,6 @@ S:        Maintained
 F:     drivers/media/dvb-frontends/cx24120*
 
 CX88 VIDEO4LINUX DRIVER
-M:     Mauro Carvalho Chehab <mchehab@s-opensource.com>
 M:     Mauro Carvalho Chehab <mchehab@kernel.org>
 L:     linux-media@vger.kernel.org
 W:     https://linuxtv.org
@@ -4245,6 +4245,9 @@ F:        include/trace/events/fs_dax.h
 
 DEVICE DIRECT ACCESS (DAX)
 M:     Dan Williams <dan.j.williams@intel.com>
+M:     Dave Jiang <dave.jiang@intel.com>
+M:     Ross Zwisler <ross.zwisler@linux.intel.com>
+M:     Vishal Verma <vishal.l.verma@intel.com>
 L:     linux-nvdimm@lists.01.org
 S:     Supported
 F:     drivers/dax/
@@ -4305,7 +4308,7 @@ F:        Documentation/driver-api/dma-buf.rst
 T:     git git://anongit.freedesktop.org/drm/drm-misc
 
 DMA GENERIC OFFLOAD ENGINE SUBSYSTEM
-M:     Vinod Koul <vinod.koul@intel.com>
+M:     Vinod Koul <vkoul@kernel.org>
 L:     dmaengine@vger.kernel.org
 Q:     https://patchwork.kernel.org/project/linux-dmaengine/list/
 S:     Maintained
@@ -5045,7 +5048,6 @@ F:        drivers/edac/thunderx_edac*
 
 EDAC-CORE
 M:     Borislav Petkov <bp@alien8.de>
-M:     Mauro Carvalho Chehab <mchehab@s-opensource.com>
 M:     Mauro Carvalho Chehab <mchehab@kernel.org>
 L:     linux-edac@vger.kernel.org
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp.git for-next
@@ -5074,7 +5076,6 @@ S:        Maintained
 F:     drivers/edac/fsl_ddr_edac.*
 
 EDAC-GHES
-M:     Mauro Carvalho Chehab <mchehab@s-opensource.com>
 M:     Mauro Carvalho Chehab <mchehab@kernel.org>
 L:     linux-edac@vger.kernel.org
 S:     Maintained
@@ -5091,21 +5092,18 @@ S:      Maintained
 F:     drivers/edac/i5000_edac.c
 
 EDAC-I5400
-M:     Mauro Carvalho Chehab <mchehab@s-opensource.com>
 M:     Mauro Carvalho Chehab <mchehab@kernel.org>
 L:     linux-edac@vger.kernel.org
 S:     Maintained
 F:     drivers/edac/i5400_edac.c
 
 EDAC-I7300
-M:     Mauro Carvalho Chehab <mchehab@s-opensource.com>
 M:     Mauro Carvalho Chehab <mchehab@kernel.org>
 L:     linux-edac@vger.kernel.org
 S:     Maintained
 F:     drivers/edac/i7300_edac.c
 
 EDAC-I7CORE
-M:     Mauro Carvalho Chehab <mchehab@s-opensource.com>
 M:     Mauro Carvalho Chehab <mchehab@kernel.org>
 L:     linux-edac@vger.kernel.org
 S:     Maintained
@@ -5155,7 +5153,6 @@ S:        Maintained
 F:     drivers/edac/r82600_edac.c
 
 EDAC-SBRIDGE
-M:     Mauro Carvalho Chehab <mchehab@s-opensource.com>
 M:     Mauro Carvalho Chehab <mchehab@kernel.org>
 L:     linux-edac@vger.kernel.org
 S:     Maintained
@@ -5214,7 +5211,6 @@ S:        Maintained
 F:     drivers/net/ethernet/ibm/ehea/
 
 EM28XX VIDEO4LINUX DRIVER
-M:     Mauro Carvalho Chehab <mchehab@s-opensource.com>
 M:     Mauro Carvalho Chehab <mchehab@kernel.org>
 L:     linux-media@vger.kernel.org
 W:     https://linuxtv.org
@@ -5651,7 +5647,8 @@ F:        drivers/net/ethernet/freescale/fec.h
 F:     Documentation/devicetree/bindings/net/fsl-fec.txt
 
 FREESCALE IMX / MXC FRAMEBUFFER DRIVER
-M:     Sascha Hauer <kernel@pengutronix.de>
+M:     Sascha Hauer <s.hauer@pengutronix.de>
+R:     Pengutronix Kernel Team <kernel@pengutronix.de>
 L:     linux-fbdev@vger.kernel.org
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
@@ -6263,7 +6260,7 @@ S:        Odd Fixes
 F:     drivers/media/usb/hdpvr/
 
 HEWLETT PACKARD ENTERPRISE ILO NMI WATCHDOG DRIVER
-M:     Jimmy Vance <jimmy.vance@hpe.com>
+M:     Jerry Hoemann <jerry.hoemann@hpe.com>
 S:     Supported
 F:     Documentation/watchdog/hpwdt.txt
 F:     drivers/watchdog/hpwdt.c
@@ -7403,16 +7400,6 @@ S:       Obsolete
 F:     include/uapi/linux/ipx.h
 F:     drivers/staging/ipx/
 
-IRDA SUBSYSTEM
-M:     Samuel Ortiz <samuel@sortiz.org>
-L:     irda-users@lists.sourceforge.net (subscribers-only)
-L:     netdev@vger.kernel.org
-W:     http://irda.sourceforge.net/
-S:     Obsolete
-T:     git git://git.kernel.org/pub/scm/linux/kernel/git/sameo/irda-2.6.git
-F:     Documentation/networking/irda.txt
-F:     drivers/staging/irda/
-
 IRQ DOMAINS (IRQ NUMBER MAPPING LIBRARY)
 M:     Marc Zyngier <marc.zyngier@arm.com>
 S:     Maintained
@@ -7677,9 +7664,11 @@ L:       linux-kbuild@vger.kernel.org
 S:     Maintained
 F:     Documentation/kbuild/
 F:     Makefile
-F:     scripts/Makefile.*
+F:     scripts/Kbuild*
+F:     scripts/Makefile*
 F:     scripts/basic/
 F:     scripts/mk*
+F:     scripts/mod/
 F:     scripts/package/
 
 KERNEL JANITORS
@@ -7704,10 +7693,10 @@ F:      include/linux/sunrpc/
 F:     include/uapi/linux/sunrpc/
 
 KERNEL SELFTEST FRAMEWORK
-M:     Shuah Khan <shuahkh@osg.samsung.com>
 M:     Shuah Khan <shuah@kernel.org>
 L:     linux-kselftest@vger.kernel.org
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest.git
+Q:     https://patchwork.kernel.org/project/linux-kselftest/list/
 S:     Maintained
 F:     tools/testing/selftests/
 F:     Documentation/dev-tools/kselftest*
@@ -7745,7 +7734,7 @@ F:        arch/x86/include/asm/svm.h
 F:     arch/x86/kvm/svm.c
 
 KERNEL VIRTUAL MACHINE FOR ARM (KVM/arm)
-M:     Christoffer Dall <christoffer.dall@linaro.org>
+M:     Christoffer Dall <christoffer.dall@arm.com>
 M:     Marc Zyngier <marc.zyngier@arm.com>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 L:     kvmarm@lists.cs.columbia.edu
@@ -7759,7 +7748,7 @@ F:        virt/kvm/arm/
 F:     include/kvm/arm_*
 
 KERNEL VIRTUAL MACHINE FOR ARM64 (KVM/arm64)
-M:     Christoffer Dall <christoffer.dall@linaro.org>
+M:     Christoffer Dall <christoffer.dall@arm.com>
 M:     Marc Zyngier <marc.zyngier@arm.com>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 L:     kvmarm@lists.cs.columbia.edu
@@ -8055,6 +8044,9 @@ F:        tools/lib/lockdep/
 
 LIBNVDIMM BLK: MMIO-APERTURE DRIVER
 M:     Ross Zwisler <ross.zwisler@linux.intel.com>
+M:     Dan Williams <dan.j.williams@intel.com>
+M:     Vishal Verma <vishal.l.verma@intel.com>
+M:     Dave Jiang <dave.jiang@intel.com>
 L:     linux-nvdimm@lists.01.org
 Q:     https://patchwork.kernel.org/project/linux-nvdimm/list/
 S:     Supported
@@ -8063,6 +8055,9 @@ F:        drivers/nvdimm/region_devs.c
 
 LIBNVDIMM BTT: BLOCK TRANSLATION TABLE
 M:     Vishal Verma <vishal.l.verma@intel.com>
+M:     Dan Williams <dan.j.williams@intel.com>
+M:     Ross Zwisler <ross.zwisler@linux.intel.com>
+M:     Dave Jiang <dave.jiang@intel.com>
 L:     linux-nvdimm@lists.01.org
 Q:     https://patchwork.kernel.org/project/linux-nvdimm/list/
 S:     Supported
@@ -8070,6 +8065,9 @@ F:        drivers/nvdimm/btt*
 
 LIBNVDIMM PMEM: PERSISTENT MEMORY DRIVER
 M:     Ross Zwisler <ross.zwisler@linux.intel.com>
+M:     Dan Williams <dan.j.williams@intel.com>
+M:     Vishal Verma <vishal.l.verma@intel.com>
+M:     Dave Jiang <dave.jiang@intel.com>
 L:     linux-nvdimm@lists.01.org
 Q:     https://patchwork.kernel.org/project/linux-nvdimm/list/
 S:     Supported
@@ -8085,6 +8083,9 @@ F:        Documentation/devicetree/bindings/pmem/pmem-region.txt
 
 LIBNVDIMM: NON-VOLATILE MEMORY DEVICE SUBSYSTEM
 M:     Dan Williams <dan.j.williams@intel.com>
+M:     Ross Zwisler <ross.zwisler@linux.intel.com>
+M:     Vishal Verma <vishal.l.verma@intel.com>
+M:     Dave Jiang <dave.jiang@intel.com>
 L:     linux-nvdimm@lists.01.org
 Q:     https://patchwork.kernel.org/project/linux-nvdimm/list/
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm.git
@@ -8464,6 +8465,7 @@ M:        Vivien Didelot <vivien.didelot@savoirfairelinux.com>
 L:     netdev@vger.kernel.org
 S:     Maintained
 F:     drivers/net/dsa/mv88e6xxx/
+F:     linux/platform_data/mv88e6xxx.h
 F:     Documentation/devicetree/bindings/net/dsa/marvell.txt
 
 MARVELL ARMADA DRM SUPPORT
@@ -8859,7 +8861,6 @@ F:        Documentation/devicetree/bindings/media/nvidia,tegra-vde.txt
 F:     drivers/staging/media/tegra-vde/
 
 MEDIA INPUT INFRASTRUCTURE (V4L/DVB)
-M:     Mauro Carvalho Chehab <mchehab@s-opensource.com>
 M:     Mauro Carvalho Chehab <mchehab@kernel.org>
 P:     LinuxTV.org Project
 L:     linux-media@vger.kernel.org
@@ -9018,26 +9019,17 @@ W:      http://www.mellanox.com
 Q:     http://patchwork.ozlabs.org/project/netdev/list/
 F:     drivers/net/ethernet/mellanox/mlx5/core/en_*
 
-MELLANOX ETHERNET INNOVA DRIVER
-M:     Ilan Tayari <ilant@mellanox.com>
-R:     Boris Pismenny <borisp@mellanox.com>
+MELLANOX ETHERNET INNOVA DRIVERS
+M:     Boris Pismenny <borisp@mellanox.com>
 L:     netdev@vger.kernel.org
 S:     Supported
 W:     http://www.mellanox.com
 Q:     http://patchwork.ozlabs.org/project/netdev/list/
+F:     drivers/net/ethernet/mellanox/mlx5/core/en_accel/*
+F:     drivers/net/ethernet/mellanox/mlx5/core/accel/*
 F:     drivers/net/ethernet/mellanox/mlx5/core/fpga/*
 F:     include/linux/mlx5/mlx5_ifc_fpga.h
 
-MELLANOX ETHERNET INNOVA IPSEC DRIVER
-M:     Ilan Tayari <ilant@mellanox.com>
-R:     Boris Pismenny <borisp@mellanox.com>
-L:     netdev@vger.kernel.org
-S:     Supported
-W:     http://www.mellanox.com
-Q:     http://patchwork.ozlabs.org/project/netdev/list/
-F:     drivers/net/ethernet/mellanox/mlx5/core/en_ipsec/*
-F:     drivers/net/ethernet/mellanox/mlx5/core/ipsec*
-
 MELLANOX ETHERNET SWITCH DRIVERS
 M:     Jiri Pirko <jiri@mellanox.com>
 M:     Ido Schimmel <idosch@mellanox.com>
@@ -9287,6 +9279,12 @@ F:       include/linux/cciss*.h
 F:     include/uapi/linux/cciss*.h
 F:     Documentation/scsi/smartpqi.txt
 
+MICROSEMI ETHERNET SWITCH DRIVER
+M:     Alexandre Belloni <alexandre.belloni@bootlin.com>
+L:     netdev@vger.kernel.org
+S:     Supported
+F:     drivers/net/ethernet/mscc/
+
 MICROSOFT SURFACE PRO 3 BUTTON DRIVER
 M:     Chen Yu <yu.c.chen@intel.com>
 L:     platform-driver-x86@vger.kernel.org
@@ -9713,6 +9711,7 @@ W:        https://fedorahosted.org/dropwatch/
 F:     net/core/drop_monitor.c
 
 NETWORKING DRIVERS
+M:     "David S. Miller" <davem@davemloft.net>
 L:     netdev@vger.kernel.org
 W:     http://www.linuxfoundation.org/en/Net
 Q:     http://patchwork.ozlabs.org/project/netdev/list/
@@ -9829,7 +9828,7 @@ F:        net/netfilter/xt_CONNSECMARK.c
 F:     net/netfilter/xt_SECMARK.c
 
 NETWORKING [TLS]
-M:     Ilya Lesokhin <ilyal@mellanox.com>
+M:     Boris Pismenny <borisp@mellanox.com>
 M:     Aviad Yehezkel <aviadye@mellanox.com>
 M:     Dave Watson <davejwatson@fb.com>
 L:     netdev@vger.kernel.org
@@ -9869,7 +9868,7 @@ F:        include/linux/platform_data/nxp-nci.h
 F:     Documentation/devicetree/bindings/net/nfc/
 
 NFS, SUNRPC, AND LOCKD CLIENTS
-M:     Trond Myklebust <trond.myklebust@primarydata.com>
+M:     Trond Myklebust <trond.myklebust@hammerspace.com>
 M:     Anna Schumaker <anna.schumaker@netapp.com>
 L:     linux-nfs@vger.kernel.org
 W:     http://client.linux-nfs.org
@@ -10889,7 +10888,6 @@ F:      drivers/pci/host/
 F:     drivers/pci/dwc/
 
 PCIE DRIVER FOR AXIS ARTPEC
-M:     Niklas Cassel <niklas.cassel@axis.com>
 M:     Jesper Nilsson <jesper.nilsson@axis.com>
 L:     linux-arm-kernel@axis.com
 L:     linux-pci@vger.kernel.org
@@ -12219,7 +12217,7 @@ F:      Documentation/s390/vfio-ccw.txt
 F:     include/uapi/linux/vfio_ccw.h
 
 S390 ZCRYPT DRIVER
-M:     Harald Freudenberger <freude@de.ibm.com>
+M:     Harald Freudenberger <freude@linux.ibm.com>
 L:     linux-s390@vger.kernel.org
 W:     http://www.ibm.com/developerworks/linux/linux390/
 S:     Supported
@@ -12248,7 +12246,6 @@ S:      Odd Fixes
 F:     drivers/media/i2c/saa6588*
 
 SAA7134 VIDEO4LINUX DRIVER
-M:     Mauro Carvalho Chehab <mchehab@s-opensource.com>
 M:     Mauro Carvalho Chehab <mchehab@kernel.org>
 L:     linux-media@vger.kernel.org
 W:     https://linuxtv.org
@@ -12487,6 +12484,7 @@ F:      drivers/scsi/st_*.h
 SCTP PROTOCOL
 M:     Vlad Yasevich <vyasevich@gmail.com>
 M:     Neil Horman <nhorman@tuxdriver.com>
+M:     Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
 L:     linux-sctp@vger.kernel.org
 W:     http://lksctp.sourceforge.net
 S:     Maintained
@@ -12752,7 +12750,6 @@ S:      Maintained
 F:     drivers/media/radio/si4713/radio-usb-si4713.c
 
 SIANO DVB DRIVER
-M:     Mauro Carvalho Chehab <mchehab@s-opensource.com>
 M:     Mauro Carvalho Chehab <mchehab@kernel.org>
 L:     linux-media@vger.kernel.org
 W:     https://linuxtv.org
@@ -12824,7 +12821,8 @@ F:      include/linux/siphash.h
 
 SIOX
 M:     Gavin Schenk <g.schenk@eckelmann.de>
-M:     Uwe Kleine-König <kernel@pengutronix.de>
+M:     Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
+R:     Pengutronix Kernel Team <kernel@pengutronix.de>
 S:     Supported
 F:     drivers/siox/*
 F:     include/trace/events/siox.h
@@ -13263,6 +13261,12 @@ M:     Jan-Benedict Glaw <jbglaw@lug-owl.de>
 S:     Maintained
 F:     arch/alpha/kernel/srm_env.c
 
+ST STM32 I2C/SMBUS DRIVER
+M:     Pierre-Yves MORDRET <pierre-yves.mordret@st.com>
+L:     linux-i2c@vger.kernel.org
+S:     Maintained
+F:     drivers/i2c/busses/i2c-stm32*
+
 STABLE BRANCH
 M:     Greg Kroah-Hartman <gregkh@linuxfoundation.org>
 L:     stable@vger.kernel.org
@@ -13393,6 +13397,7 @@ F:      drivers/media/usb/stk1160/
 STMMAC ETHERNET DRIVER
 M:     Giuseppe Cavallaro <peppe.cavallaro@st.com>
 M:     Alexandre Torgue <alexandre.torgue@st.com>
+M:     Jose Abreu <joabreu@synopsys.com>
 L:     netdev@vger.kernel.org
 W:     http://www.stlinux.com
 S:     Supported
@@ -13742,7 +13747,6 @@ S:      Maintained
 F:     drivers/media/i2c/tda9840*
 
 TEA5761 TUNER DRIVER
-M:     Mauro Carvalho Chehab <mchehab@s-opensource.com>
 M:     Mauro Carvalho Chehab <mchehab@kernel.org>
 L:     linux-media@vger.kernel.org
 W:     https://linuxtv.org
@@ -13751,7 +13755,6 @@ S:      Odd fixes
 F:     drivers/media/tuners/tea5761.*
 
 TEA5767 TUNER DRIVER
-M:     Mauro Carvalho Chehab <mchehab@s-opensource.com>
 M:     Mauro Carvalho Chehab <mchehab@kernel.org>
 L:     linux-media@vger.kernel.org
 W:     https://linuxtv.org
@@ -13841,7 +13844,6 @@ S:      Supported
 F:     drivers/iommu/tegra*
 
 TEGRA KBC DRIVER
-M:     Rakesh Iyer <riyer@nvidia.com>
 M:     Laxman Dewangan <ldewangan@nvidia.com>
 S:     Supported
 F:     drivers/input/keyboard/tegra-kbc.c
@@ -13944,7 +13946,7 @@ THUNDERBOLT DRIVER
 M:     Andreas Noever <andreas.noever@gmail.com>
 M:     Michael Jamet <michael.jamet@intel.com>
 M:     Mika Westerberg <mika.westerberg@linux.intel.com>
-M:     Yehezkel Bernat <yehezkel.bernat@intel.com>
+M:     Yehezkel Bernat <YehezkelShB@gmail.com>
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/westeri/thunderbolt.git
 S:     Maintained
 F:     Documentation/admin-guide/thunderbolt.rst
@@ -13954,7 +13956,7 @@ F:      include/linux/thunderbolt.h
 THUNDERBOLT NETWORK DRIVER
 M:     Michael Jamet <michael.jamet@intel.com>
 M:     Mika Westerberg <mika.westerberg@linux.intel.com>
-M:     Yehezkel Bernat <yehezkel.bernat@intel.com>
+M:     Yehezkel Bernat <YehezkelShB@gmail.com>
 L:     netdev@vger.kernel.org
 S:     Maintained
 F:     drivers/net/thunderbolt.c
@@ -14168,7 +14170,6 @@ F:      Documentation/networking/tlan.txt
 F:     drivers/net/ethernet/ti/tlan.*
 
 TM6000 VIDEO4LINUX DRIVER
-M:     Mauro Carvalho Chehab <mchehab@s-opensource.com>
 M:     Mauro Carvalho Chehab <mchehab@kernel.org>
 L:     linux-media@vger.kernel.org
 W:     https://linuxtv.org
@@ -14653,7 +14654,6 @@ F:      drivers/usb/common/usb-otg-fsm.c
 
 USB OVER IP DRIVER
 M:     Valentina Manea <valentina.manea.m@gmail.com>
-M:     Shuah Khan <shuahkh@osg.samsung.com>
 M:     Shuah Khan <shuah@kernel.org>
 L:     linux-usb@vger.kernel.org
 S:     Maintained
@@ -15397,7 +15397,6 @@ S:      Maintained
 F:     arch/x86/entry/vdso/
 
 XC2028/3028 TUNER DRIVER
-M:     Mauro Carvalho Chehab <mchehab@s-opensource.com>
 M:     Mauro Carvalho Chehab <mchehab@kernel.org>
 L:     linux-media@vger.kernel.org
 W:     https://linuxtv.org
@@ -15405,6 +15404,14 @@ T:     git git://linuxtv.org/media_tree.git
 S:     Maintained
 F:     drivers/media/tuners/tuner-xc2028.*
 
+XDP SOCKETS (AF_XDP)
+M:     Björn Töpel <bjorn.topel@intel.com>
+M:     Magnus Karlsson <magnus.karlsson@intel.com>
+L:     netdev@vger.kernel.org
+S:     Maintained
+F:     kernel/bpf/xskmap.c
+F:     net/xdp/
+
 XEN BLOCK SUBSYSTEM
 M:     Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
 M:     Roger Pau Monné <roger.pau@citrix.com>
index e811e0c509c5b90add1c607bc664c2bc8cb0d6dc..ec6f45928fd43225fadb39ed877a4ba20567a580 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -2,8 +2,8 @@
 VERSION = 4
 PATCHLEVEL = 17
 SUBLEVEL = 0
-EXTRAVERSION = -rc1
-NAME = Fearless Coyote
+EXTRAVERSION = -rc6
+NAME = Merciless Moray
 
 # *DOCUMENTATION*
 # To see a list of typical targets execute "make help"
index 8e0d665c8d53d3c57c2c434939f93125472ff610..75dd23acf133b089384db7e9071a37980e98c5ea 100644 (file)
@@ -464,6 +464,10 @@ config GCC_PLUGIN_LATENT_ENTROPY
 config GCC_PLUGIN_STRUCTLEAK
        bool "Force initialization of variables containing userspace addresses"
        depends on GCC_PLUGINS
+       # Currently STRUCTLEAK inserts initialization out of live scope of
+       # variables from KASAN point of view. This leads to KASAN false
+       # positive reports. Prohibit this combination for now.
+       depends on !KASAN_EXTRA
        help
          This plugin zero-initializes any structures containing a
          __user attribute. This can prevent some classes of information
index 45a6b9b7af2a56f179427d353ad70c3cd920bb69..6a4e7341ecd33284f835a65fcc83a3e52ee4d524 100644 (file)
@@ -117,11 +117,9 @@ ccflags-y := -fpic -mno-single-pic-base -fno-builtin -I$(obj)
 asflags-y := -DZIMAGE
 
 # Supply kernel BSS size to the decompressor via a linker symbol.
-KBSS_SZ = $(shell $(CROSS_COMPILE)nm $(obj)/../../../../vmlinux | \
-               perl -e 'while (<>) { \
-                       $$bss_start=hex($$1) if /^([[:xdigit:]]+) B __bss_start$$/; \
-                       $$bss_end=hex($$1) if /^([[:xdigit:]]+) B __bss_stop$$/; \
-               }; printf "%d\n", $$bss_end - $$bss_start;')
+KBSS_SZ = $(shell echo $$(($$($(CROSS_COMPILE)nm $(obj)/../../../../vmlinux | \
+               sed -n -e 's/^\([^ ]*\) [AB] __bss_start$$/-0x\1/p' \
+                      -e 's/^\([^ ]*\) [AB] __bss_stop$$/+0x\1/p') )) )
 LDFLAGS_vmlinux = --defsym _kernel_bss_size=$(KBSS_SZ)
 # Supply ZRELADDR to the decompressor via a linker symbol.
 ifneq ($(CONFIG_AUTO_ZRELADDR),y)
index 45c8823c37503d3bfdc2beee80304c556db8bf53..517e0e18f0b8307855447abfec63f0827cb72cec 100644 (file)
 #if defined(CONFIG_DEBUG_ICEDCC)
 
 #if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_V6K) || defined(CONFIG_CPU_V7)
-               .macro  loadsp, rb, tmp
+               .macro  loadsp, rb, tmp1, tmp2
                .endm
                .macro  writeb, ch, rb
                mcr     p14, 0, \ch, c0, c5, 0
                .endm
 #elif defined(CONFIG_CPU_XSCALE)
-               .macro  loadsp, rb, tmp
+               .macro  loadsp, rb, tmp1, tmp2
                .endm
                .macro  writeb, ch, rb
                mcr     p14, 0, \ch, c8, c0, 0
                .endm
 #else
-               .macro  loadsp, rb, tmp
+               .macro  loadsp, rb, tmp1, tmp2
                .endm
                .macro  writeb, ch, rb
                mcr     p14, 0, \ch, c1, c0, 0
@@ -57,7 +57,7 @@
                .endm
 
 #if defined(CONFIG_ARCH_SA1100)
-               .macro  loadsp, rb, tmp
+               .macro  loadsp, rb, tmp1, tmp2
                mov     \rb, #0x80000000        @ physical base address
 #ifdef CONFIG_DEBUG_LL_SER3
                add     \rb, \rb, #0x00050000   @ Ser3
@@ -66,8 +66,8 @@
 #endif
                .endm
 #else
-               .macro  loadsp, rb, tmp
-               addruart \rb, \tmp
+               .macro  loadsp, rb, tmp1, tmp2
+               addruart \rb, \tmp1, \tmp2
                .endm
 #endif
 #endif
@@ -561,8 +561,6 @@ not_relocated:      mov     r0, #0
                bl      decompress_kernel
                bl      cache_clean_flush
                bl      cache_off
-               mov     r1, r7                  @ restore architecture number
-               mov     r2, r8                  @ restore atags pointer
 
 #ifdef CONFIG_ARM_VIRT_EXT
                mrs     r0, spsr                @ Get saved CPU boot mode
@@ -1297,7 +1295,7 @@ phex:             adr     r3, phexbuf
                b       1b
 
 @ puts corrupts {r0, r1, r2, r3}
-puts:          loadsp  r3, r1
+puts:          loadsp  r3, r2, r1
 1:             ldrb    r2, [r0], #1
                teq     r2, #0
                moveq   pc, lr
@@ -1314,8 +1312,8 @@ puts:             loadsp  r3, r1
 @ putc corrupts {r0, r1, r2, r3}
 putc:
                mov     r2, r0
+               loadsp  r3, r1, r0
                mov     r0, #0
-               loadsp  r3, r1
                b       2b
 
 @ memdump corrupts {r0, r1, r2, r3, r10, r11, r12, lr}
@@ -1365,6 +1363,8 @@ __hyp_reentry_vectors:
 
 __enter_kernel:
                mov     r0, #0                  @ must be 0
+               mov     r1, r7                  @ restore architecture number
+               mov     r2, r8                  @ restore atags pointer
  ARM(          mov     pc, r4          )       @ call kernel
  M_CLASS(      add     r4, r4, #1      )       @ enter in Thumb mode for M class
  THUMB(                bx      r4              )       @ entry point is always ARM for A/R classes
index 699fdf94d139bdffea5b28fbc4bab912895583f2..9fe4f5a6379e3b60d79a6ed8a0327f680434861e 100644 (file)
@@ -69,7 +69,7 @@ core {
                timer@20200 {
                        compatible = "arm,cortex-a9-global-timer";
                        reg = <0x20200 0x100>;
-                       interrupts = <GIC_PPI 11 IRQ_TYPE_LEVEL_HIGH>;
+                       interrupts = <GIC_PPI 11 IRQ_TYPE_EDGE_RISING>;
                        clocks = <&periph_clk>;
                };
 
index a1f4d6d5a569bbb740b6a5d27f7232155853ec4a..0edf769ea95c93c1449dfef12b71213251d7c973 100644 (file)
@@ -21,8 +21,8 @@ chosen {
                stdout-path = "serial2:115200n8";
        };
 
-       memory {
-               device_type = "memory";
+       memory@c0000000 {
+               /* 128 MB DDR2 SDRAM @ 0xc0000000 */
                reg = <0xc0000000 0x08000000>;
        };
 
index c66cf78953639db3ed025e4997beb6ef77719064..12010002dbdb65cbdf9b87d2f37bf39e72903b3e 100644 (file)
@@ -7,10 +7,19 @@
  * Free Software Foundation;  either version 2 of the  License, or (at your
  * option) any later version.
  */
-#include "skeleton.dtsi"
 #include <dt-bindings/interrupt-controller/irq.h>
 
 / {
+       #address-cells = <1>;
+       #size-cells = <1>;
+       chosen { };
+       aliases { };
+
+       memory@c0000000 {
+               device_type = "memory";
+               reg = <0xc0000000 0x0>;
+       };
+
        arm {
                #address-cells = <1>;
                #size-cells = <1>;
@@ -46,8 +55,6 @@ soc@1c00000 {
                pmx_core: pinmux@14120 {
                        compatible = "pinctrl-single";
                        reg = <0x14120 0x50>;
-                       #address-cells = <1>;
-                       #size-cells = <0>;
                        #pinctrl-cells = <2>;
                        pinctrl-single,bit-per-mux;
                        pinctrl-single,register-width = <32>;
index d6657b3bae84b11395b46bc4d130964179b0bde1..85d7b5148b0ac6c6fc0009b72dac20c89ee16271 100644 (file)
@@ -10,7 +10,7 @@
 
 / {
        model = "DM8148 EVM";
-       compatible = "ti,dm8148-evm", "ti,dm8148";
+       compatible = "ti,dm8148-evm", "ti,dm8148", "ti,dm814";
 
        memory@80000000 {
                device_type = "memory";
index 63883b3479f95f22cd4787f8c10730dc2c20dc9b..6418f9cdbe83faaada09aeb75371724afe5bdda1 100644 (file)
@@ -9,7 +9,7 @@
 
 / {
        model = "HP t410 Smart Zero Client";
-       compatible = "hp,t410", "ti,dm8148";
+       compatible = "hp,t410", "ti,dm8148", "ti,dm814";
 
        memory@80000000 {
                device_type = "memory";
index c72a2132aa823b053c5ab9450a92faf266c85878..1d030d567307172b17766e1a649924c5b5a48eb7 100644 (file)
@@ -10,7 +10,7 @@
 
 / {
        model = "DM8168 EVM";
-       compatible = "ti,dm8168-evm", "ti,dm8168";
+       compatible = "ti,dm8168-evm", "ti,dm8168", "ti,dm816";
 
        memory@80000000 {
                device_type = "memory";
index fee0547f7302eca5b18fab7ed2d26d7bc0857134..31b824ad5d29fa822ffba094a8151fd0789369dc 100644 (file)
@@ -10,7 +10,7 @@
 
 / {
        model = "DRA62x J5 Eco EVM";
-       compatible = "ti,dra62x-j5eco-evm", "ti,dra62x", "ti,dm8148";
+       compatible = "ti,dra62x-j5eco-evm", "ti,dra62x", "ti,dm8148", "ti,dm814";
 
        memory@80000000 {
                device_type = "memory";
index 8bbb6f85d1618b3743bed54612ffe769141c850b..4785fbcc41ed840f0e72507163d2374532925395 100644 (file)
@@ -134,37 +134,37 @@ mux {
                                                function = "gmii";
                                                groups = "gmii_gmac0_grp";
                                        };
-                                       /* Settings come from OpenWRT */
+                                       /* Settings come from OpenWRT, pins on SL3516 */
                                        conf0 {
-                                               pins = "R8 GMAC0 RXDV", "U11 GMAC1 RXDV";
+                                               pins = "V8 GMAC0 RXDV", "T10 GMAC1 RXDV";
                                                skew-delay = <0>;
                                        };
                                        conf1 {
-                                               pins = "T8 GMAC0 RXC", "T11 GMAC1 RXC";
+                                               pins = "Y7 GMAC0 RXC", "Y11 GMAC1 RXC";
                                                skew-delay = <15>;
                                        };
                                        conf2 {
-                                               pins = "P8 GMAC0 TXEN", "V11 GMAC1 TXEN";
+                                               pins = "T8 GMAC0 TXEN", "W11 GMAC1 TXEN";
                                                skew-delay = <7>;
                                        };
                                        conf3 {
-                                               pins = "V7 GMAC0 TXC";
+                                               pins = "U8 GMAC0 TXC";
                                                skew-delay = <11>;
                                        };
                                        conf4 {
-                                               pins = "P10 GMAC1 TXC";
+                                               pins = "V11 GMAC1 TXC";
                                                skew-delay = <10>;
                                        };
                                        conf5 {
                                                /* The data lines all have default skew */
-                                               pins = "U8 GMAC0 RXD0", "V8 GMAC0 RXD1",
-                                                      "P9 GMAC0 RXD2", "R9 GMAC0 RXD3",
-                                                      "U7 GMAC0 TXD0", "T7 GMAC0 TXD1",
-                                                      "R7 GMAC0 TXD2", "P7 GMAC0 TXD3",
-                                                      "R11 GMAC1 RXD0", "P11 GMAC1 RXD1",
-                                                      "V12 GMAC1 RXD2", "U12 GMAC1 RXD3",
-                                                      "R10 GMAC1 TXD0", "T10 GMAC1 TXD1",
-                                                      "U10 GMAC1 TXD2", "V10 GMAC1 TXD3";
+                                               pins = "W8 GMAC0 RXD0", "V9 GMAC0 RXD1",
+                                                      "Y8 GMAC0 RXD2", "U9 GMAC0 RXD3",
+                                                      "T7 GMAC0 TXD0", "U6 GMAC0 TXD1",
+                                                      "V7 GMAC0 TXD2", "U7 GMAC0 TXD3",
+                                                      "Y12 GMAC1 RXD0", "V12 GMAC1 RXD1",
+                                                      "T11 GMAC1 RXD2", "W12 GMAC1 RXD3",
+                                                      "U10 GMAC1 TXD0", "Y10 GMAC1 TXD1",
+                                                      "W10 GMAC1 TXD2", "T9 GMAC1 TXD3";
                                                skew-delay = <7>;
                                        };
                                        /* Set up drive strength on GMAC0 to 16 mA */
index bf343195697e8ccbfcbfe8ad0b2421c09d13015e..54111ed218b10ff964a721f325b7dc1cc4e98539 100644 (file)
@@ -303,7 +303,7 @@ wdog: wdog@53fdc000 {
                        };
 
                        can1: can@53fe4000 {
-                               compatible = "fsl,imx35-flexcan";
+                               compatible = "fsl,imx35-flexcan", "fsl,imx25-flexcan";
                                reg = <0x53fe4000 0x1000>;
                                clocks = <&clks 33>, <&clks 33>;
                                clock-names = "ipg", "per";
@@ -312,7 +312,7 @@ can1: can@53fe4000 {
                        };
 
                        can2: can@53fe8000 {
-                               compatible = "fsl,imx35-flexcan";
+                               compatible = "fsl,imx35-flexcan", "fsl,imx25-flexcan";
                                reg = <0x53fe8000 0x1000>;
                                clocks = <&clks 34>, <&clks 34>;
                                clock-names = "ipg", "per";
index 0c99ac04ad08b2fb9e95910ab055d8e539f181d7..6464f2560e066b559fd51aa5c806f80f57dd1493 100644 (file)
@@ -523,7 +523,7 @@ touchscreen@4c {
        };
 
        touchscreen@20 {
-               compatible = "syna,rmi4_i2c";
+               compatible = "syna,rmi4-i2c";
                reg = <0x20>;
                pinctrl-names = "default";
                pinctrl-0 = <&pinctrl_ts>;
@@ -541,8 +541,8 @@ rmi4-f01@1 {
 
                rmi4-f11@11 {
                        reg = <0x11>;
-                       touch-inverted-y;
-                       touch-swapped-x-y;
+                       touchscreen-inverted-y;
+                       touchscreen-swapped-x-y;
                        syna,sensor-type = <1>;
                };
        };
index 7d647d043f528ab1194b0467a7552d34a3a7a9b7..3d65c0192f6931f6f5997e2de8e5cb1c2f2a8b6e 100644 (file)
@@ -551,7 +551,7 @@ uart2: serial@53fc0000 {
                        };
 
                        can1: can@53fc8000 {
-                               compatible = "fsl,imx53-flexcan";
+                               compatible = "fsl,imx53-flexcan", "fsl,imx25-flexcan";
                                reg = <0x53fc8000 0x4000>;
                                interrupts = <82>;
                                clocks = <&clks IMX5_CLK_CAN1_IPG_GATE>,
@@ -561,7 +561,7 @@ can1: can@53fc8000 {
                        };
 
                        can2: can@53fcc000 {
-                               compatible = "fsl,imx53-flexcan";
+                               compatible = "fsl,imx53-flexcan", "fsl,imx25-flexcan";
                                reg = <0x53fcc000 0x4000>;
                                interrupts = <83>;
                                clocks = <&clks IMX5_CLK_CAN2_IPG_GATE>,
index 4d42335c0dee991aa18ff8c40526865eb89cd97c..ce85b3ca1a55fc3eebc13607240ab499231e131d 100644 (file)
@@ -868,6 +868,7 @@ sai3: sai@308c0000 {
 
                        crypto: caam@30900000 {
                                compatible = "fsl,sec-v4.0";
+                               fsl,sec-era = <8>;
                                #address-cells = <1>;
                                #size-cells = <1>;
                                reg = <0x30900000 0x40000>;
index b47cac23a04be5d18b9324248ab091885a908077..6fa7bba3e801508fde024cac085e0d9f7b43e96c 100644 (file)
@@ -26,7 +26,7 @@ wl12xx_vmmc: wl12xx_vmmc {
                gpio = <&gpio1 3 0>;   /* gpio_3 */
                startup-delay-us = <70000>;
                enable-active-high;
-               vin-supply = <&vmmc2>;
+               vin-supply = <&vaux3>;
        };
 
        /* HS USB Host PHY on PORT 1 */
@@ -82,6 +82,7 @@ twl: twl@48 {
                twl_audio: audio {
                        compatible = "ti,twl4030-audio";
                        codec {
+                               ti,hs_extmute_gpio = <&gpio2 25 GPIO_ACTIVE_HIGH>;
                        };
                };
        };
@@ -199,6 +200,7 @@ i2c1_pins: pinmux_i2c1_pins {
                pinctrl-single,pins = <
                        OMAP3_CORE1_IOPAD(0x21ba, PIN_INPUT | MUX_MODE0)        /* i2c1_scl.i2c1_scl */
                        OMAP3_CORE1_IOPAD(0x21bc, PIN_INPUT | MUX_MODE0)        /* i2c1_sda.i2c1_sda */
+                       OMAP3_CORE1_IOPAD(0x20ba, PIN_OUTPUT | MUX_MODE4)        /* gpmc_ncs6.gpio_57 */
                >;
        };
 };
@@ -213,7 +215,7 @@ OMAP3_WKUP_IOPAD(0x2a0e, PIN_OUTPUT | MUX_MODE4)    /* sys_boot2.gpio_4 */
        };
        wl127x_gpio: pinmux_wl127x_gpio_pin {
                pinctrl-single,pins = <
-                       OMAP3_WKUP_IOPAD(0x2a0c, PIN_INPUT | MUX_MODE4)         /* sys_boot0.gpio_2 */
+                       OMAP3_WKUP_IOPAD(0x2a0a, PIN_INPUT | MUX_MODE4)         /* sys_boot0.gpio_2 */
                        OMAP3_WKUP_IOPAD(0x2a0c, PIN_OUTPUT | MUX_MODE4)        /* sys_boot1.gpio_3 */
                >;
        };
@@ -260,6 +262,11 @@ &mcspi1 {
 #include "twl4030.dtsi"
 #include "twl4030_omap3.dtsi"
 
+&vaux3 {
+       regulator-min-microvolt = <2800000>;
+       regulator-max-microvolt = <2800000>;
+};
+
 &twl {
        twl_power: power {
                compatible = "ti,twl4030-power-idle-osc-off", "ti,twl4030-power-idle";
index 475904894b8633464173b1b76f85e378c13ff4cb..e554b6e039f32fe396386bd6f7c32e9b4869ecf8 100644 (file)
@@ -163,10 +163,10 @@ cm1_clockdomains: clockdomains {
 
                        cm2: cm2@8000 {
                                compatible = "ti,omap4-cm2", "simple-bus";
-                               reg = <0x8000 0x3000>;
+                               reg = <0x8000 0x2000>;
                                #address-cells = <1>;
                                #size-cells = <1>;
-                               ranges = <0 0x8000 0x3000>;
+                               ranges = <0 0x8000 0x2000>;
 
                                cm2_clocks: clocks {
                                        #address-cells = <1>;
@@ -250,11 +250,11 @@ counter32k: counter@4000 {
 
                                prm: prm@6000 {
                                        compatible = "ti,omap4-prm";
-                                       reg = <0x6000 0x3000>;
+                                       reg = <0x6000 0x2000>;
                                        interrupts = <GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>;
                                        #address-cells = <1>;
                                        #size-cells = <1>;
-                                       ranges = <0 0x6000 0x3000>;
+                                       ranges = <0 0x6000 0x2000>;
 
                                        prm_clocks: clocks {
                                                #address-cells = <1>;
index 063fdb65dc60dfc22c915103853a2ce9a137dbf0..f07f9018c3e72e4631967dc3ca3c3da8d6772aef 100644 (file)
@@ -379,7 +379,7 @@ ports {
                                port@0 {
                                        reg = <0>;
                                        adv7511_in: endpoint {
-                                               remote-endpoint = <&du_out_lvds0>;
+                                               remote-endpoint = <&lvds0_out>;
                                        };
                                };
 
@@ -467,10 +467,8 @@ &du {
        status = "okay";
 
        clocks = <&cpg CPG_MOD 724>, <&cpg CPG_MOD 723>, <&cpg CPG_MOD 722>,
-                <&cpg CPG_MOD 726>, <&cpg CPG_MOD 725>,
                 <&x13_clk>, <&x2_clk>;
-       clock-names = "du.0", "du.1", "du.2", "lvds.0", "lvds.1",
-                     "dclkin.0", "dclkin.1";
+       clock-names = "du.0", "du.1", "du.2", "dclkin.0", "dclkin.1";
 
        ports {
                port@0 {
@@ -478,12 +476,26 @@ endpoint {
                                remote-endpoint = <&adv7123_in>;
                        };
                };
+       };
+};
+
+&lvds0 {
+       status = "okay";
+
+       ports {
                port@1 {
                        endpoint {
                                remote-endpoint = <&adv7511_in>;
                        };
                };
-               port@2 {
+       };
+};
+
+&lvds1 {
+       status = "okay";
+
+       ports {
+               port@1 {
                        lvds_connector: endpoint {
                        };
                };
index e4367cecad18a1d0f5e3c21dde4c1985b0fb7297..05a0fc23ac88f8ded558fe31da8ae15358c1ad15 100644 (file)
@@ -1627,18 +1627,13 @@ jpu: jpeg-codec@fe980000 {
 
                du: display@feb00000 {
                        compatible = "renesas,du-r8a7790";
-                       reg = <0 0xfeb00000 0 0x70000>,
-                             <0 0xfeb90000 0 0x1c>,
-                             <0 0xfeb94000 0 0x1c>;
-                       reg-names = "du", "lvds.0", "lvds.1";
+                       reg = <0 0xfeb00000 0 0x70000>;
                        interrupts = <GIC_SPI 256 IRQ_TYPE_LEVEL_HIGH>,
                                     <GIC_SPI 268 IRQ_TYPE_LEVEL_HIGH>,
                                     <GIC_SPI 269 IRQ_TYPE_LEVEL_HIGH>;
                        clocks = <&cpg CPG_MOD 724>, <&cpg CPG_MOD 723>,
-                                <&cpg CPG_MOD 722>, <&cpg CPG_MOD 726>,
-                                <&cpg CPG_MOD 725>;
-                       clock-names = "du.0", "du.1", "du.2", "lvds.0",
-                                     "lvds.1";
+                                <&cpg CPG_MOD 722>;
+                       clock-names = "du.0", "du.1", "du.2";
                        status = "disabled";
 
                        ports {
@@ -1653,11 +1648,65 @@ du_out_rgb: endpoint {
                                port@1 {
                                        reg = <1>;
                                        du_out_lvds0: endpoint {
+                                               remote-endpoint = <&lvds0_in>;
                                        };
                                };
                                port@2 {
                                        reg = <2>;
                                        du_out_lvds1: endpoint {
+                                               remote-endpoint = <&lvds1_in>;
+                                       };
+                               };
+                       };
+               };
+
+               lvds0: lvds@feb90000 {
+                       compatible = "renesas,r8a7790-lvds";
+                       reg = <0 0xfeb90000 0 0x1c>;
+                       clocks = <&cpg CPG_MOD 726>;
+                       power-domains = <&sysc R8A7790_PD_ALWAYS_ON>;
+                       resets = <&cpg 726>;
+                       status = "disabled";
+
+                       ports {
+                               #address-cells = <1>;
+                               #size-cells = <0>;
+
+                               port@0 {
+                                       reg = <0>;
+                                       lvds0_in: endpoint {
+                                               remote-endpoint = <&du_out_lvds0>;
+                                       };
+                               };
+                               port@1 {
+                                       reg = <1>;
+                                       lvds0_out: endpoint {
+                                       };
+                               };
+                       };
+               };
+
+               lvds1: lvds@feb94000 {
+                       compatible = "renesas,r8a7790-lvds";
+                       reg = <0 0xfeb94000 0 0x1c>;
+                       clocks = <&cpg CPG_MOD 725>;
+                       power-domains = <&sysc R8A7790_PD_ALWAYS_ON>;
+                       resets = <&cpg 725>;
+                       status = "disabled";
+
+                       ports {
+                               #address-cells = <1>;
+                               #size-cells = <0>;
+
+                               port@0 {
+                                       reg = <0>;
+                                       lvds1_in: endpoint {
+                                               remote-endpoint = <&du_out_lvds1>;
+                                       };
+                               };
+                               port@1 {
+                                       reg = <1>;
+                                       lvds1_out: endpoint {
                                        };
                                };
                        };
index f40321a1c917e7d1c7546176319541a0da37ba08..9d7213a0b8b826506e14c51b95fe10724434b9b6 100644 (file)
@@ -468,10 +468,9 @@ &du {
        pinctrl-names = "default";
        status = "okay";
 
-       clocks = <&cpg CPG_MOD 724>, <&cpg CPG_MOD 723>, <&cpg CPG_MOD 726>,
+       clocks = <&cpg CPG_MOD 724>, <&cpg CPG_MOD 723>,
                 <&x13_clk>, <&x2_clk>;
-       clock-names = "du.0", "du.1", "lvds.0",
-                     "dclkin.0", "dclkin.1";
+       clock-names = "du.0", "du.1", "dclkin.0", "dclkin.1";
 
        ports {
                port@0 {
@@ -479,6 +478,13 @@ endpoint {
                                remote-endpoint = <&adv7511_in>;
                        };
                };
+       };
+};
+
+&lvds0 {
+       status = "okay";
+
+       ports {
                port@1 {
                        lvds_connector: endpoint {
                        };
index c14e6fe9e4f69b1f070da271a03842ad699d898d..ae9ed9ff53efde994a4d4735227bb0a8199fab81 100644 (file)
@@ -441,10 +441,9 @@ &du {
        pinctrl-names = "default";
        status = "okay";
 
-       clocks = <&cpg CPG_MOD 724>, <&cpg CPG_MOD 723>, <&cpg CPG_MOD 726>,
+       clocks = <&cpg CPG_MOD 724>, <&cpg CPG_MOD 723>,
                 <&x3_clk>, <&x16_clk>;
-       clock-names = "du.0", "du.1", "lvds.0",
-                     "dclkin.0", "dclkin.1";
+       clock-names = "du.0", "du.1", "dclkin.0", "dclkin.1";
 
        ports {
                port@0 {
@@ -455,6 +454,17 @@ endpoint {
        };
 };
 
+&lvds0 {
+       status = "okay";
+
+       ports {
+               port@1 {
+                       lvds_connector: endpoint {
+                       };
+               };
+       };
+};
+
 &rcar_sound {
        pinctrl-0 = <&ssi_pins &audio_clk_pins>;
        pinctrl-names = "default";
index f11dab71b03a9f7aaef287fe38f324b4c9b0d137..506b20885413398825a353b6b84938175cfd255f 100644 (file)
@@ -1633,15 +1633,12 @@ jpu: jpeg-codec@fe980000 {
 
                du: display@feb00000 {
                        compatible = "renesas,du-r8a7791";
-                       reg = <0 0xfeb00000 0 0x40000>,
-                             <0 0xfeb90000 0 0x1c>;
-                       reg-names = "du", "lvds.0";
+                       reg = <0 0xfeb00000 0 0x40000>;
                        interrupts = <GIC_SPI 256 IRQ_TYPE_LEVEL_HIGH>,
                                     <GIC_SPI 268 IRQ_TYPE_LEVEL_HIGH>;
                        clocks = <&cpg CPG_MOD 724>,
-                                <&cpg CPG_MOD 723>,
-                                <&cpg CPG_MOD 726>;
-                       clock-names = "du.0", "du.1", "lvds.0";
+                                <&cpg CPG_MOD 723>;
+                       clock-names = "du.0", "du.1";
                        status = "disabled";
 
                        ports {
@@ -1656,6 +1653,33 @@ du_out_rgb: endpoint {
                                port@1 {
                                        reg = <1>;
                                        du_out_lvds0: endpoint {
+                                               remote-endpoint = <&lvds0_in>;
+                                       };
+                               };
+                       };
+               };
+
+               lvds0: lvds@feb90000 {
+                       compatible = "renesas,r8a7791-lvds";
+                       reg = <0 0xfeb90000 0 0x1c>;
+                       clocks = <&cpg CPG_MOD 726>;
+                       power-domains = <&sysc R8A7791_PD_ALWAYS_ON>;
+                       resets = <&cpg 726>;
+                       status = "disabled";
+
+                       ports {
+                               #address-cells = <1>;
+                               #size-cells = <0>;
+
+                               port@0 {
+                                       reg = <0>;
+                                       lvds0_in: endpoint {
+                                               remote-endpoint = <&du_out_lvds0>;
+                                       };
+                               };
+                               port@1 {
+                                       reg = <1>;
+                                       lvds0_out: endpoint {
                                        };
                                };
                        };
index 9ed6961f2d9a2c4a1a671980f3c336f5d137cc7d..96e117d8b2cce0f8e4070d05fdb48995775ee667 100644 (file)
@@ -447,10 +447,9 @@ &du {
        pinctrl-names = "default";
        status = "okay";
 
-       clocks = <&cpg CPG_MOD 724>, <&cpg CPG_MOD 723>, <&cpg CPG_MOD 726>,
+       clocks = <&cpg CPG_MOD 724>, <&cpg CPG_MOD 723>,
                 <&x13_clk>, <&x2_clk>;
-       clock-names = "du.0", "du.1", "lvds.0",
-                     "dclkin.0", "dclkin.1";
+       clock-names = "du.0", "du.1", "dclkin.0", "dclkin.1";
 
        ports {
                port@0 {
@@ -458,6 +457,11 @@ endpoint {
                                remote-endpoint = <&adv7511_in>;
                        };
                };
+       };
+};
+
+&lvds0 {
+       ports {
                port@1 {
                        lvds_connector: endpoint {
                        };
index f9c5a557107d932756e02c9c10da743dfe9be673..4f526030dc7cb7011e67f8ff26fad601d6af867f 100644 (file)
@@ -1292,15 +1292,12 @@ gic: interrupt-controller@f1001000 {
 
                du: display@feb00000 {
                        compatible = "renesas,du-r8a7793";
-                       reg = <0 0xfeb00000 0 0x40000>,
-                             <0 0xfeb90000 0 0x1c>;
-                       reg-names = "du", "lvds.0";
+                       reg = <0 0xfeb00000 0 0x40000>;
                        interrupts = <GIC_SPI 256 IRQ_TYPE_LEVEL_HIGH>,
                                     <GIC_SPI 268 IRQ_TYPE_LEVEL_HIGH>;
                        clocks = <&cpg CPG_MOD 724>,
-                                <&cpg CPG_MOD 723>,
-                                <&cpg CPG_MOD 726>;
-                       clock-names = "du.0", "du.1", "lvds.0";
+                                <&cpg CPG_MOD 723>;
+                       clock-names = "du.0", "du.1";
                        status = "disabled";
 
                        ports {
@@ -1315,6 +1312,34 @@ du_out_rgb: endpoint {
                                port@1 {
                                        reg = <1>;
                                        du_out_lvds0: endpoint {
+                                               remote-endpoint = <&lvds0_in>;
+                                       };
+                               };
+                       };
+               };
+
+               lvds0: lvds@feb90000 {
+                       compatible = "renesas,r8a7793-lvds";
+                       reg = <0 0xfeb90000 0 0x1c>;
+                       clocks = <&cpg CPG_MOD 726>;
+                       power-domains = <&sysc R8A7793_PD_ALWAYS_ON>;
+                       resets = <&cpg 726>;
+
+                       status = "disabled";
+
+                       ports {
+                               #address-cells = <1>;
+                               #size-cells = <0>;
+
+                               port@0 {
+                                       reg = <0>;
+                                       lvds0_in: endpoint {
+                                               remote-endpoint = <&du_out_lvds0>;
+                                       };
+                               };
+                               port@1 {
+                                       reg = <1>;
+                                       lvds0_out: endpoint {
                                        };
                                };
                        };
index 0a7136462a1a6dfa698f1f5b909918e6ec3b1cf7..983dd5c1479459f2ba27f43dbefdbaaa411cffb5 100644 (file)
@@ -741,7 +741,7 @@ phy2: usb-phy@c5004000 {
                phy_type = "ulpi";
                clocks = <&tegra_car TEGRA20_CLK_USB2>,
                         <&tegra_car TEGRA20_CLK_PLL_U>,
-                        <&tegra_car TEGRA20_CLK_PLL_P_OUT4>;
+                        <&tegra_car TEGRA20_CLK_CDEV2>;
                clock-names = "reg", "pll_u", "ulpi-link";
                resets = <&tegra_car 58>, <&tegra_car 22>;
                reset-names = "usb", "utmi-pads";
index 2a63fa10c813042bf2b98261964c7b6cd9f03790..553777ac28146f053983a1903ddf568a5bdd3c31 100644 (file)
@@ -1,6 +1,7 @@
 # CONFIG_LOCALVERSION_AUTO is not set
 CONFIG_SYSVIPC=y
 CONFIG_NO_HZ_IDLE=y
+CONFIG_HIGH_RES_TIMERS=y
 CONFIG_BSD_PROCESS_ACCT=y
 CONFIG_USER_NS=y
 CONFIG_RELAY=y
@@ -12,15 +13,21 @@ CONFIG_ARCH_GEMINI=y
 CONFIG_PCI=y
 CONFIG_PREEMPT=y
 CONFIG_AEABI=y
+CONFIG_HIGHMEM=y
+CONFIG_CMA=y
 CONFIG_CMDLINE="console=ttyS0,115200n8"
 CONFIG_KEXEC=y
 CONFIG_BINFMT_MISC=y
 CONFIG_PM=y
+CONFIG_NET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 CONFIG_DEVTMPFS=y
 CONFIG_MTD=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_CFI=y
+CONFIG_MTD_JEDECPROBE=y
 CONFIG_MTD_CFI_INTELEXT=y
 CONFIG_MTD_CFI_AMDSTD=y
 CONFIG_MTD_CFI_STAA=y
@@ -33,6 +40,11 @@ CONFIG_BLK_DEV_SD=y
 # CONFIG_SCSI_LOWLEVEL is not set
 CONFIG_ATA=y
 CONFIG_PATA_FTIDE010=y
+CONFIG_NETDEVICES=y
+CONFIG_GEMINI_ETHERNET=y
+CONFIG_MDIO_BITBANG=y
+CONFIG_MDIO_GPIO=y
+CONFIG_REALTEK_PHY=y
 CONFIG_INPUT_EVDEV=y
 CONFIG_KEYBOARD_GPIO=y
 # CONFIG_INPUT_MOUSE is not set
@@ -43,9 +55,19 @@ CONFIG_SERIAL_8250_NR_UARTS=1
 CONFIG_SERIAL_8250_RUNTIME_UARTS=1
 CONFIG_SERIAL_OF_PLATFORM=y
 # CONFIG_HW_RANDOM is not set
-# CONFIG_HWMON is not set
+CONFIG_I2C_GPIO=y
+CONFIG_SPI=y
+CONFIG_SPI_GPIO=y
+CONFIG_SENSORS_GPIO_FAN=y
+CONFIG_SENSORS_LM75=y
+CONFIG_THERMAL=y
 CONFIG_WATCHDOG=y
-CONFIG_GEMINI_WATCHDOG=y
+CONFIG_REGULATOR=y
+CONFIG_REGULATOR_FIXED_VOLTAGE=y
+CONFIG_DRM=y
+CONFIG_DRM_PANEL_ILITEK_IL9322=y
+CONFIG_DRM_TVE200=y
+CONFIG_LOGO=y
 CONFIG_USB=y
 CONFIG_USB_MON=y
 CONFIG_USB_FOTG210_HCD=y
@@ -54,6 +76,7 @@ CONFIG_NEW_LEDS=y
 CONFIG_LEDS_CLASS=y
 CONFIG_LEDS_GPIO=y
 CONFIG_LEDS_TRIGGERS=y
+CONFIG_LEDS_TRIGGER_DISK=y
 CONFIG_LEDS_TRIGGER_HEARTBEAT=y
 CONFIG_RTC_CLASS=y
 CONFIG_DMADEVICES=y
index 2620ce790db0afaa18ed4d51eca2d886a21b00a8..371fca4e1ab7deafe2d8aed06ac5fd4092c7a8e4 100644 (file)
@@ -57,6 +57,7 @@ CONFIG_MTD_M25P80=y
 CONFIG_MTD_NAND=y
 CONFIG_MTD_NAND_DENALI_DT=y
 CONFIG_MTD_SPI_NOR=y
+# CONFIG_MTD_SPI_NOR_USE_4K_SECTORS is not set
 CONFIG_SPI_CADENCE_QUADSPI=y
 CONFIG_OF_OVERLAY=y
 CONFIG_OF_CONFIGFS=y
index bc8d4bbd82e27719a990c7972fd77bfca9dc7aef..9342904cccca67ac3cfba4e0b75bcbc17893ade0 100644 (file)
@@ -536,4 +536,14 @@ THUMB(     orr     \reg , \reg , #PSR_T_BIT        )
 #endif
        .endm
 
+#ifdef CONFIG_KPROBES
+#define _ASM_NOKPROBE(entry)                           \
+       .pushsection "_kprobe_blacklist", "aw" ;        \
+       .balign 4 ;                                     \
+       .long entry;                                    \
+       .popsection
+#else
+#define _ASM_NOKPROBE(entry)
+#endif
+
 #endif /* __ASM_ASSEMBLER_H__ */
index c6a749568dd6c413603ef3b7554561f356b33cc9..c7c28c885a1946de9a012311cbca7861d93a67da 100644 (file)
@@ -77,6 +77,9 @@ struct kvm_arch {
        /* Interrupt controller */
        struct vgic_dist        vgic;
        int max_vcpus;
+
+       /* Mandated version of PSCI */
+       u32 psci_version;
 };
 
 #define KVM_NR_MEM_OBJS     40
index 707a1f06dc5d5e207f0d2c18e0b37844569ca199..f675162663f09a054253b8ad6d86361c74968618 100644 (file)
@@ -309,6 +309,22 @@ static inline unsigned int kvm_get_vmid_bits(void)
        return 8;
 }
 
+/*
+ * We are not in the kvm->srcu critical section most of the time, so we take
+ * the SRCU read lock here. Since we copy the data from the user page, we
+ * can immediately drop the lock again.
+ */
+static inline int kvm_read_guest_lock(struct kvm *kvm,
+                                     gpa_t gpa, void *data, unsigned long len)
+{
+       int srcu_idx = srcu_read_lock(&kvm->srcu);
+       int ret = kvm_read_guest(kvm, gpa, data, len);
+
+       srcu_read_unlock(&kvm->srcu, srcu_idx);
+
+       return ret;
+}
+
 static inline void *kvm_get_hyp_vector(void)
 {
        return kvm_ksym_ref(__kvm_hyp_vector);
index 2ba95d6fe852d7bed7460b022f1ed5b8409caacd..caae4843cb7001fbee1fa9b222850df7006850fb 100644 (file)
@@ -195,6 +195,12 @@ struct kvm_arch_memory_slot {
 #define KVM_REG_ARM_VFP_FPINST         0x1009
 #define KVM_REG_ARM_VFP_FPINST2                0x100A
 
+/* KVM-as-firmware specific pseudo-registers */
+#define KVM_REG_ARM_FW                 (0x0014 << KVM_REG_ARM_COPROC_SHIFT)
+#define KVM_REG_ARM_FW_REG(r)          (KVM_REG_ARM | KVM_REG_SIZE_U64 | \
+                                        KVM_REG_ARM_FW | ((r) & 0xffff))
+#define KVM_REG_ARM_PSCI_VERSION       KVM_REG_ARM_FW_REG(0)
+
 /* Device Control API: ARM VGIC */
 #define KVM_DEV_ARM_VGIC_GRP_ADDR      0
 #define KVM_DEV_ARM_VGIC_GRP_DIST_REGS 1
diff --git a/arch/arm/include/uapi/asm/siginfo.h b/arch/arm/include/uapi/asm/siginfo.h
deleted file mode 100644 (file)
index d051388..0000000
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef __ASM_SIGINFO_H
-#define __ASM_SIGINFO_H
-
-#include <asm-generic/siginfo.h>
-
-/*
- * SIGFPE si_codes
- */
-#ifdef __KERNEL__
-#define FPE_FIXME      0       /* Broken dup of SI_USER */
-#endif /* __KERNEL__ */
-
-#endif
index 6b38d7a634c19ffd279f98ca8cc3a113484d1fd3..dd2eb5f76b9f0a7d64f50169dd0d04a402b2ae67 100644 (file)
@@ -83,7 +83,7 @@ void machine_crash_nonpanic_core(void *unused)
 {
        struct pt_regs regs;
 
-       crash_setup_regs(&regs, NULL);
+       crash_setup_regs(&regs, get_irq_regs());
        printk(KERN_DEBUG "CPU %u will stop doing anything useful since another CPU has crashed\n",
               smp_processor_id());
        crash_save_cpu(&regs, smp_processor_id());
@@ -95,6 +95,27 @@ void machine_crash_nonpanic_core(void *unused)
                cpu_relax();
 }
 
+void crash_smp_send_stop(void)
+{
+       static int cpus_stopped;
+       unsigned long msecs;
+
+       if (cpus_stopped)
+               return;
+
+       atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
+       smp_call_function(machine_crash_nonpanic_core, NULL, false);
+       msecs = 1000; /* Wait at most a second for the other cpus to stop */
+       while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) {
+               mdelay(1);
+               msecs--;
+       }
+       if (atomic_read(&waiting_for_crash_ipi) > 0)
+               pr_warn("Non-crashing CPUs did not react to IPI\n");
+
+       cpus_stopped = 1;
+}
+
 static void machine_kexec_mask_interrupts(void)
 {
        unsigned int i;
@@ -120,19 +141,8 @@ static void machine_kexec_mask_interrupts(void)
 
 void machine_crash_shutdown(struct pt_regs *regs)
 {
-       unsigned long msecs;
-
        local_irq_disable();
-
-       atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
-       smp_call_function(machine_crash_nonpanic_core, NULL, false);
-       msecs = 1000; /* Wait at most a second for the other cpus to stop */
-       while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) {
-               mdelay(1);
-               msecs--;
-       }
-       if (atomic_read(&waiting_for_crash_ipi) > 0)
-               pr_warn("Non-crashing CPUs did not react to IPI\n");
+       crash_smp_send_stop();
 
        crash_save_cpu(regs, smp_processor_id());
        machine_kexec_mask_interrupts();
index 5e3633c24e636575c19059cdb99bd247a666b5f5..2fe87109ae468bce6d38ff575f395fc2f41cc72e 100644 (file)
@@ -19,6 +19,7 @@
 #include <linux/uaccess.h>
 #include <linux/hardirq.h>
 #include <linux/kdebug.h>
+#include <linux/kprobes.h>
 #include <linux/module.h>
 #include <linux/kexec.h>
 #include <linux/bug.h>
@@ -417,7 +418,8 @@ void unregister_undef_hook(struct undef_hook *hook)
        raw_spin_unlock_irqrestore(&undef_lock, flags);
 }
 
-static int call_undef_hook(struct pt_regs *regs, unsigned int instr)
+static nokprobe_inline
+int call_undef_hook(struct pt_regs *regs, unsigned int instr)
 {
        struct undef_hook *hook;
        unsigned long flags;
@@ -490,6 +492,7 @@ asmlinkage void do_undefinstr(struct pt_regs *regs)
 
        arm_notify_die("Oops - undefined instruction", regs, &info, 0, 6);
 }
+NOKPROBE_SYMBOL(do_undefinstr)
 
 /*
  * Handle FIQ similarly to NMI on x86 systems.
index 1e0784ebbfd6db77595652961ad3c48c41fceabe..a18f33edc471a92fcf6194dbe6601bc4b94f1988 100644 (file)
@@ -22,6 +22,7 @@
 #include <linux/module.h>
 #include <linux/vmalloc.h>
 #include <linux/fs.h>
+#include <kvm/arm_psci.h>
 #include <asm/cputype.h>
 #include <linux/uaccess.h>
 #include <asm/kvm.h>
@@ -176,6 +177,7 @@ static unsigned long num_core_regs(void)
 unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu)
 {
        return num_core_regs() + kvm_arm_num_coproc_regs(vcpu)
+               + kvm_arm_get_fw_num_regs(vcpu)
                + NUM_TIMER_REGS;
 }
 
@@ -196,6 +198,11 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
                uindices++;
        }
 
+       ret = kvm_arm_copy_fw_reg_indices(vcpu, uindices);
+       if (ret)
+               return ret;
+       uindices += kvm_arm_get_fw_num_regs(vcpu);
+
        ret = copy_timer_indices(vcpu, uindices);
        if (ret)
                return ret;
@@ -214,6 +221,9 @@ int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
        if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE)
                return get_core_reg(vcpu, reg);
 
+       if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_FW)
+               return kvm_arm_get_fw_reg(vcpu, reg);
+
        if (is_timer_reg(reg->id))
                return get_timer_reg(vcpu, reg);
 
@@ -230,6 +240,9 @@ int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
        if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE)
                return set_core_reg(vcpu, reg);
 
+       if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_FW)
+               return kvm_arm_set_fw_reg(vcpu, reg);
+
        if (is_timer_reg(reg->id))
                return set_timer_reg(vcpu, reg);
 
index df73914e81c8344feccac5df8d5791dcbe92ed60..746e7801dcdf70fed9e339c2d6800b3f275c49b7 100644 (file)
@@ -38,6 +38,7 @@ ENTRY(__get_user_1)
        mov     r0, #0
        ret     lr
 ENDPROC(__get_user_1)
+_ASM_NOKPROBE(__get_user_1)
 
 ENTRY(__get_user_2)
        check_uaccess r0, 2, r1, r2, __get_user_bad
@@ -58,6 +59,7 @@ rb    .req    r0
        mov     r0, #0
        ret     lr
 ENDPROC(__get_user_2)
+_ASM_NOKPROBE(__get_user_2)
 
 ENTRY(__get_user_4)
        check_uaccess r0, 4, r1, r2, __get_user_bad
@@ -65,6 +67,7 @@ ENTRY(__get_user_4)
        mov     r0, #0
        ret     lr
 ENDPROC(__get_user_4)
+_ASM_NOKPROBE(__get_user_4)
 
 ENTRY(__get_user_8)
        check_uaccess r0, 8, r1, r2, __get_user_bad8
@@ -78,6 +81,7 @@ ENTRY(__get_user_8)
        mov     r0, #0
        ret     lr
 ENDPROC(__get_user_8)
+_ASM_NOKPROBE(__get_user_8)
 
 #ifdef __ARMEB__
 ENTRY(__get_user_32t_8)
@@ -91,6 +95,7 @@ ENTRY(__get_user_32t_8)
        mov     r0, #0
        ret     lr
 ENDPROC(__get_user_32t_8)
+_ASM_NOKPROBE(__get_user_32t_8)
 
 ENTRY(__get_user_64t_1)
        check_uaccess r0, 1, r1, r2, __get_user_bad8
@@ -98,6 +103,7 @@ ENTRY(__get_user_64t_1)
        mov     r0, #0
        ret     lr
 ENDPROC(__get_user_64t_1)
+_ASM_NOKPROBE(__get_user_64t_1)
 
 ENTRY(__get_user_64t_2)
        check_uaccess r0, 2, r1, r2, __get_user_bad8
@@ -114,6 +120,7 @@ rb  .req    r0
        mov     r0, #0
        ret     lr
 ENDPROC(__get_user_64t_2)
+_ASM_NOKPROBE(__get_user_64t_2)
 
 ENTRY(__get_user_64t_4)
        check_uaccess r0, 4, r1, r2, __get_user_bad8
@@ -121,6 +128,7 @@ ENTRY(__get_user_64t_4)
        mov     r0, #0
        ret     lr
 ENDPROC(__get_user_64t_4)
+_ASM_NOKPROBE(__get_user_64t_4)
 #endif
 
 __get_user_bad8:
@@ -131,6 +139,8 @@ __get_user_bad:
        ret     lr
 ENDPROC(__get_user_bad)
 ENDPROC(__get_user_bad8)
+_ASM_NOKPROBE(__get_user_bad)
+_ASM_NOKPROBE(__get_user_bad8)
 
 .pushsection __ex_table, "a"
        .long   1b, __get_user_bad
index 004f9c8de0329cacdb25bfc7c7819a528a78fe77..d1e8ce7b4bd21245d41901709041c113f95f344d 100644 (file)
@@ -205,12 +205,17 @@ static const short da830_evm_mmc_sd_pins[] = {
        -1
 };
 
+#define DA830_MMCSD_WP_PIN             GPIO_TO_PIN(2, 1)
+#define DA830_MMCSD_CD_PIN             GPIO_TO_PIN(2, 2)
+
 static struct gpiod_lookup_table mmc_gpios_table = {
        .dev_id = "da830-mmc.0",
        .table = {
                /* gpio chip 1 contains gpio range 32-63 */
-               GPIO_LOOKUP("davinci_gpio.1", 2, "cd", GPIO_ACTIVE_LOW),
-               GPIO_LOOKUP("davinci_gpio.1", 1, "wp", GPIO_ACTIVE_LOW),
+               GPIO_LOOKUP("davinci_gpio.0", DA830_MMCSD_CD_PIN, "cd",
+                           GPIO_ACTIVE_LOW),
+               GPIO_LOOKUP("davinci_gpio.0", DA830_MMCSD_WP_PIN, "wp",
+                           GPIO_ACTIVE_LOW),
        },
 };
 
index 3063478bcc366315d98ed861876380e06b358cf7..158ed9a1483fc87582d66de2746620590f800dd5 100644 (file)
@@ -763,12 +763,17 @@ static const short da850_evm_mcasp_pins[] __initconst = {
        -1
 };
 
+#define DA850_MMCSD_CD_PIN             GPIO_TO_PIN(4, 0)
+#define DA850_MMCSD_WP_PIN             GPIO_TO_PIN(4, 1)
+
 static struct gpiod_lookup_table mmc_gpios_table = {
        .dev_id = "da830-mmc.0",
        .table = {
                /* gpio chip 2 contains gpio range 64-95 */
-               GPIO_LOOKUP("davinci_gpio.2", 0, "cd", GPIO_ACTIVE_LOW),
-               GPIO_LOOKUP("davinci_gpio.2", 1, "wp", GPIO_ACTIVE_LOW),
+               GPIO_LOOKUP("davinci_gpio.0", DA850_MMCSD_CD_PIN, "cd",
+                           GPIO_ACTIVE_LOW),
+               GPIO_LOOKUP("davinci_gpio.0", DA850_MMCSD_WP_PIN, "wp",
+                           GPIO_ACTIVE_LOW),
        },
 };
 
index cb30637d9eaf8e379c7ed1dd3e447c40bf56d058..23ab9e8bc04c0e3b37d7a940d23755d1e48a2c80 100644 (file)
@@ -19,6 +19,7 @@
 #include <linux/gpio.h>
 #include <linux/gpio/machine.h>
 #include <linux/clk.h>
+#include <linux/dm9000.h>
 #include <linux/videodev2.h>
 #include <media/i2c/tvp514x.h>
 #include <linux/spi/spi.h>
@@ -109,12 +110,15 @@ static struct platform_device davinci_nand_device = {
        },
 };
 
+#define DM355_I2C_SDA_PIN      GPIO_TO_PIN(0, 15)
+#define DM355_I2C_SCL_PIN      GPIO_TO_PIN(0, 14)
+
 static struct gpiod_lookup_table i2c_recovery_gpiod_table = {
-       .dev_id = "i2c_davinci",
+       .dev_id = "i2c_davinci.1",
        .table = {
-               GPIO_LOOKUP("davinci_gpio", 15, "sda",
+               GPIO_LOOKUP("davinci_gpio.0", DM355_I2C_SDA_PIN, "sda",
                            GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN),
-               GPIO_LOOKUP("davinci_gpio", 14, "scl",
+               GPIO_LOOKUP("davinci_gpio.0", DM355_I2C_SCL_PIN, "scl",
                            GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN),
        },
 };
@@ -179,11 +183,16 @@ static struct resource dm355evm_dm9000_rsrc[] = {
        },
 };
 
+static struct dm9000_plat_data dm335evm_dm9000_platdata;
+
 static struct platform_device dm355evm_dm9000 = {
        .name           = "dm9000",
        .id             = -1,
        .resource       = dm355evm_dm9000_rsrc,
        .num_resources  = ARRAY_SIZE(dm355evm_dm9000_rsrc),
+       .dev            = {
+               .platform_data = &dm335evm_dm9000_platdata,
+       },
 };
 
 static struct tvp514x_platform_data tvp5146_pdata = {
index 95b55aae1366f6ef9fd6f19277dce0ed30f626fc..509e64ab1994ac3b0c5d94e1144772fd93d99465 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/i2c.h>
 #include <linux/platform_data/pcf857x.h>
 #include <linux/platform_data/at24.h>
+#include <linux/platform_data/gpio-davinci.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/rawnand.h>
 #include <linux/mtd/partitions.h>
@@ -596,12 +597,15 @@ static struct i2c_board_info __initdata i2c_info[] =  {
        },
 };
 
+#define DM644X_I2C_SDA_PIN     GPIO_TO_PIN(2, 12)
+#define DM644X_I2C_SCL_PIN     GPIO_TO_PIN(2, 11)
+
 static struct gpiod_lookup_table i2c_recovery_gpiod_table = {
-       .dev_id = "i2c_davinci",
+       .dev_id = "i2c_davinci.1",
        .table = {
-               GPIO_LOOKUP("davinci_gpio", 44, "sda",
+               GPIO_LOOKUP("davinci_gpio.0", DM644X_I2C_SDA_PIN, "sda",
                            GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN),
-               GPIO_LOOKUP("davinci_gpio", 43, "scl",
+               GPIO_LOOKUP("davinci_gpio.0", DM644X_I2C_SCL_PIN, "scl",
                            GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN),
        },
 };
index 2d37f5b0e1f5ce1ab8e85e0ad19bd25846154cab..a3c0d1e87647847605cfdaca2ac26f5c05aa8894 100644 (file)
@@ -532,11 +532,12 @@ static struct vpif_display_config dm646x_vpif_display_config = {
        .set_clock      = set_vpif_clock,
        .subdevinfo     = dm646x_vpif_subdev,
        .subdev_count   = ARRAY_SIZE(dm646x_vpif_subdev),
+       .i2c_adapter_id = 1,
        .chan_config[0] = {
                .outputs = dm6467_ch0_outputs,
                .output_count = ARRAY_SIZE(dm6467_ch0_outputs),
        },
-       .card_name      = "DM646x EVM",
+       .card_name      = "DM646x EVM Video Display",
 };
 
 /**
@@ -674,6 +675,7 @@ static struct vpif_capture_config dm646x_vpif_capture_cfg = {
        .setup_input_channel_mode = setup_vpif_input_channel_mode,
        .subdev_info = vpif_capture_sdev_info,
        .subdev_count = ARRAY_SIZE(vpif_capture_sdev_info),
+       .i2c_adapter_id = 1,
        .chan_config[0] = {
                .inputs = dm6467_ch0_inputs,
                .input_count = ARRAY_SIZE(dm6467_ch0_inputs),
@@ -694,6 +696,7 @@ static struct vpif_capture_config dm646x_vpif_capture_cfg = {
                        .fid_pol = 0,
                },
        },
+       .card_name = "DM646x EVM Video Capture",
 };
 
 static void __init evm_init_video(void)
index 0d32042b728fa447a77dc286b9310d10c6b7f96d..be8b892a6ea7061a8af9b932e007cc8652701b0f 100644 (file)
@@ -123,12 +123,16 @@ static const short hawk_mmcsd0_pins[] = {
        -1
 };
 
+#define DA850_HAWK_MMCSD_CD_PIN                GPIO_TO_PIN(3, 12)
+#define DA850_HAWK_MMCSD_WP_PIN                GPIO_TO_PIN(3, 13)
+
 static struct gpiod_lookup_table mmc_gpios_table = {
        .dev_id = "da830-mmc.0",
        .table = {
-               /* CD: gpio3_12: gpio60: chip 1 contains gpio range 32-63*/
-               GPIO_LOOKUP("davinci_gpio.0", 28, "cd", GPIO_ACTIVE_LOW),
-               GPIO_LOOKUP("davinci_gpio.0", 29, "wp", GPIO_ACTIVE_LOW),
+               GPIO_LOOKUP("davinci_gpio.0", DA850_HAWK_MMCSD_CD_PIN, "cd",
+                           GPIO_ACTIVE_LOW),
+               GPIO_LOOKUP("davinci_gpio.0", DA850_HAWK_MMCSD_WP_PIN, "wp",
+                           GPIO_ACTIVE_LOW),
        },
 };
 
index 109ab1fa0d2c32b4f0840310281dd8ac1865ad34..c32ca27ab343d1258399648b5e9dfb9aa1e215e0 100644 (file)
@@ -488,7 +488,8 @@ static u8 dm646x_default_priorities[DAVINCI_N_AINTC_IRQ] = {
        [IRQ_DM646X_MCASP0TXINT]        = 7,
        [IRQ_DM646X_MCASP0RXINT]        = 7,
        [IRQ_DM646X_RESERVED_3]         = 7,
-       [IRQ_DM646X_MCASP1TXINT]        = 7,    /* clockevent */
+       [IRQ_DM646X_MCASP1TXINT]        = 7,
+       [IRQ_TINT0_TINT12]              = 7,    /* clockevent */
        [IRQ_TINT0_TINT34]              = 7,    /* clocksource */
        [IRQ_TINT1_TINT12]              = 7,    /* DSP timer */
        [IRQ_TINT1_TINT34]              = 7,    /* system tick */
index fe57e26926292fe3dccc1adaf7d6c5e8552fd055..abca83d22ff3f1d217d642ed31e6719354ae4a3f 100644 (file)
@@ -29,6 +29,7 @@ static struct dev_pm_domain keystone_pm_domain = {
 
 static struct pm_clk_notifier_block platform_domain_notifier = {
        .pm_domain = &keystone_pm_domain,
+       .con_ids = { NULL },
 };
 
 static const struct of_device_id of_keystone_table[] = {
index 793a24a53c5261c20d1e9a39955615bf863fa9df..d7ca9e2b40d274c096333c7488011ac7dcc746db 100644 (file)
@@ -58,22 +58,24 @@ static irqreturn_t deferred_fiq(int irq, void *dev_id)
                irq_num = gpio_to_irq(gpio);
                fiq_count = fiq_buffer[FIQ_CNT_INT_00 + gpio];
 
-               while (irq_counter[gpio] < fiq_count) {
-                       if (gpio != AMS_DELTA_GPIO_PIN_KEYBRD_CLK) {
-                               struct irq_data *d = irq_get_irq_data(irq_num);
-
-                               /*
-                                * It looks like handle_edge_irq() that
-                                * OMAP GPIO edge interrupts default to,
-                                * expects interrupt already unmasked.
-                                */
-                               if (irq_chip && irq_chip->irq_unmask)
+               if (irq_counter[gpio] < fiq_count &&
+                               gpio != AMS_DELTA_GPIO_PIN_KEYBRD_CLK) {
+                       struct irq_data *d = irq_get_irq_data(irq_num);
+
+                       /*
+                        * handle_simple_irq() that OMAP GPIO edge
+                        * interrupts default to since commit 80ac93c27441
+                        * requires interrupt already acked and unmasked.
+                        */
+                       if (irq_chip) {
+                               if (irq_chip->irq_ack)
+                                       irq_chip->irq_ack(d);
+                               if (irq_chip->irq_unmask)
                                        irq_chip->irq_unmask(d);
                        }
-                       generic_handle_irq(irq_num);
-
-                       irq_counter[gpio]++;
                }
+               for (; irq_counter[gpio] < fiq_count; irq_counter[gpio]++)
+                       generic_handle_irq(irq_num);
        }
        return IRQ_HANDLED;
 }
index 4603c30fef73a721b8564ed74ffe63ce21b5966c..0d9ce58bc464a87249f1551a0882c18d5009bc5d 100644 (file)
@@ -243,8 +243,4 @@ arch/arm/mach-omap2/pm-asm-offsets.s: arch/arm/mach-omap2/pm-asm-offsets.c
 include/generated/ti-pm-asm-offsets.h: arch/arm/mach-omap2/pm-asm-offsets.s FORCE
        $(call filechk,offsets,__TI_PM_ASM_OFFSETS_H__)
 
-# For rule to generate ti-emif-asm-offsets.h dependency
-include drivers/memory/Makefile.asm-offsets
-
-arch/arm/mach-omap2/sleep33xx.o: include/generated/ti-pm-asm-offsets.h include/generated/ti-emif-asm-offsets.h
-arch/arm/mach-omap2/sleep43xx.o: include/generated/ti-pm-asm-offsets.h include/generated/ti-emif-asm-offsets.h
+$(obj)/sleep33xx.o $(obj)/sleep43xx.o: include/generated/ti-pm-asm-offsets.h
index 6d4392da7c11008dcc2f2c6eb762e1c850b22190..b9846b19e5e2c7b9737085e23c56d83e0d0a6f67 100644 (file)
@@ -7,9 +7,12 @@
 
 #include <linux/kbuild.h>
 #include <linux/platform_data/pm33xx.h>
+#include <linux/ti-emif-sram.h>
 
 int main(void)
 {
+       ti_emif_asm_offsets();
+
        DEFINE(AMX3_PM_WFI_FLAGS_OFFSET,
               offsetof(struct am33xx_pm_sram_data, wfi_flags));
        DEFINE(AMX3_PM_L2_AUX_CTRL_VAL_OFFSET,
index 76eb6ec5f157e9753cf7bc9773801a35d55ffd27..1e6a967cd2d5890342fb76bbe3b0c8c42ec6491d 100644 (file)
@@ -188,7 +188,7 @@ static int _pwrdm_state_switch(struct powerdomain *pwrdm, int flag)
                                       ((prev & OMAP_POWERSTATE_MASK) << 0));
                        trace_power_domain_target_rcuidle(pwrdm->name,
                                                          trace_state,
-                                                         smp_processor_id());
+                                                         raw_smp_processor_id());
                }
                break;
        default:
@@ -518,7 +518,7 @@ int pwrdm_set_next_pwrst(struct powerdomain *pwrdm, u8 pwrst)
        if (arch_pwrdm && arch_pwrdm->pwrdm_set_next_pwrst) {
                /* Trace the pwrdm desired target state */
                trace_power_domain_target_rcuidle(pwrdm->name, pwrst,
-                                                 smp_processor_id());
+                                                 raw_smp_processor_id());
                /* Program the pwrdm desired target state */
                ret = arch_pwrdm->pwrdm_set_next_pwrst(pwrdm, pwrst);
        }
index 218d79930b04623c24af79aa75468186a1b39f67..322b3bb868b49766f53ac807674ced65b5b42bc9 100644 (file)
@@ -6,7 +6,6 @@
  *     Dave Gerlach, Vaibhav Bedia
  */
 
-#include <generated/ti-emif-asm-offsets.h>
 #include <generated/ti-pm-asm-offsets.h>
 #include <linux/linkage.h>
 #include <linux/ti-emif-sram.h>
index b24be624e8b993d12ad7768cb2ed1ecea4676734..8903814a6677d327579f9410f9b9b641e23ed97a 100644 (file)
@@ -6,7 +6,6 @@
  *     Dave Gerlach, Vaibhav Bedia
  */
 
-#include <generated/ti-emif-asm-offsets.h>
 #include <generated/ti-pm-asm-offsets.h>
 #include <linux/linkage.h>
 #include <linux/ti-emif-sram.h>
index 59589a4a0d4b2023b628e9485aec98849c0f4c76..885e8f12e4b911bfaaf74ce8a8f9767b95bd08e7 100644 (file)
@@ -427,9 +427,9 @@ static struct gpiod_lookup_table jive_wm8750_gpiod_table = {
        .dev_id         = "spi_gpio",
        .table          = {
                GPIO_LOOKUP("GPIOB", 4,
-                           "gpio-sck", GPIO_ACTIVE_HIGH),
+                           "sck", GPIO_ACTIVE_HIGH),
                GPIO_LOOKUP("GPIOB", 9,
-                           "gpio-mosi", GPIO_ACTIVE_HIGH),
+                           "mosi", GPIO_ACTIVE_HIGH),
                GPIO_LOOKUP("GPIOH", 10,
                            "cs", GPIO_ACTIVE_HIGH),
                { },
index b5030e1a41d829fae51d93afdadd2597f99792db..d3ea6454e775254cfc4899dd41b0a2ecd9fb8af5 100644 (file)
@@ -234,18 +234,11 @@ static void jit_fill_hole(void *area, unsigned int size)
 #define SCRATCH_SIZE 80
 
 /* total stack size used in JITed code */
-#define _STACK_SIZE \
-       (ctx->prog->aux->stack_depth + \
-        + SCRATCH_SIZE + \
-        + 4 /* extra for skb_copy_bits buffer */)
-
-#define STACK_SIZE ALIGN(_STACK_SIZE, STACK_ALIGNMENT)
+#define _STACK_SIZE    (ctx->prog->aux->stack_depth + SCRATCH_SIZE)
+#define STACK_SIZE     ALIGN(_STACK_SIZE, STACK_ALIGNMENT)
 
 /* Get the offset of eBPF REGISTERs stored on scratch space. */
-#define STACK_VAR(off) (STACK_SIZE-off-4)
-
-/* Offset of skb_copy_bits buffer */
-#define SKB_BUFFER STACK_VAR(SCRATCH_SIZE)
+#define STACK_VAR(off) (STACK_SIZE - off)
 
 #if __LINUX_ARM_ARCH__ < 7
 
@@ -1452,83 +1445,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
                        emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_lo)), ctx);
                emit_ldx_r(dst, rn, dstk, off, ctx, BPF_SIZE(code));
                break;
-       /* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + imm)) */
-       case BPF_LD | BPF_ABS | BPF_W:
-       case BPF_LD | BPF_ABS | BPF_H:
-       case BPF_LD | BPF_ABS | BPF_B:
-       /* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + src + imm)) */
-       case BPF_LD | BPF_IND | BPF_W:
-       case BPF_LD | BPF_IND | BPF_H:
-       case BPF_LD | BPF_IND | BPF_B:
-       {
-               const u8 r4 = bpf2a32[BPF_REG_6][1]; /* r4 = ptr to sk_buff */
-               const u8 r0 = bpf2a32[BPF_REG_0][1]; /*r0: struct sk_buff *skb*/
-                                                    /* rtn value */
-               const u8 r1 = bpf2a32[BPF_REG_0][0]; /* r1: int k */
-               const u8 r2 = bpf2a32[BPF_REG_1][1]; /* r2: unsigned int size */
-               const u8 r3 = bpf2a32[BPF_REG_1][0]; /* r3: void *buffer */
-               const u8 r6 = bpf2a32[TMP_REG_1][1]; /* r6: void *(*func)(..) */
-               int size;
-
-               /* Setting up first argument */
-               emit(ARM_MOV_R(r0, r4), ctx);
-
-               /* Setting up second argument */
-               emit_a32_mov_i(r1, imm, false, ctx);
-               if (BPF_MODE(code) == BPF_IND)
-                       emit_a32_alu_r(r1, src_lo, false, sstk, ctx,
-                                      false, false, BPF_ADD);
-
-               /* Setting up third argument */
-               switch (BPF_SIZE(code)) {
-               case BPF_W:
-                       size = 4;
-                       break;
-               case BPF_H:
-                       size = 2;
-                       break;
-               case BPF_B:
-                       size = 1;
-                       break;
-               default:
-                       return -EINVAL;
-               }
-               emit_a32_mov_i(r2, size, false, ctx);
-
-               /* Setting up fourth argument */
-               emit(ARM_ADD_I(r3, ARM_SP, imm8m(SKB_BUFFER)), ctx);
-
-               /* Setting up function pointer to call */
-               emit_a32_mov_i(r6, (unsigned int)bpf_load_pointer, false, ctx);
-               emit_blx_r(r6, ctx);
-
-               emit(ARM_EOR_R(r1, r1, r1), ctx);
-               /* Check if return address is NULL or not.
-                * if NULL then jump to epilogue
-                * else continue to load the value from retn address
-                */
-               emit(ARM_CMP_I(r0, 0), ctx);
-               jmp_offset = epilogue_offset(ctx);
-               check_imm24(jmp_offset);
-               _emit(ARM_COND_EQ, ARM_B(jmp_offset), ctx);
-
-               /* Load value from the address */
-               switch (BPF_SIZE(code)) {
-               case BPF_W:
-                       emit(ARM_LDR_I(r0, r0, 0), ctx);
-                       emit_rev32(r0, r0, ctx);
-                       break;
-               case BPF_H:
-                       emit(ARM_LDRH_I(r0, r0, 0), ctx);
-                       emit_rev16(r0, r0, ctx);
-                       break;
-               case BPF_B:
-                       emit(ARM_LDRB_I(r0, r0, 0), ctx);
-                       /* No need to reverse */
-                       break;
-               }
-               break;
-       }
        /* ST: *(size *)(dst + off) = imm */
        case BPF_ST | BPF_MEM | BPF_W:
        case BPF_ST | BPF_MEM | BPF_H:
index bcdecc25461bcaa51f6df405807935bb481de2c5..b2aa9b32bff2b5e9d2e6d102a4cd58f6cf8c5676 100644 (file)
@@ -165,13 +165,14 @@ optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
 {
        unsigned long flags;
        struct kprobe *p = &op->kp;
-       struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+       struct kprobe_ctlblk *kcb;
 
        /* Save skipped registers */
        regs->ARM_pc = (unsigned long)op->kp.addr;
        regs->ARM_ORIG_r0 = ~0UL;
 
        local_irq_save(flags);
+       kcb = get_kprobe_ctlblk();
 
        if (kprobe_running()) {
                kprobes_inc_nmissed_count(&op->kp);
@@ -191,6 +192,7 @@ optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
 
        local_irq_restore(flags);
 }
+NOKPROBE_SYMBOL(optimized_callback)
 
 int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *orig)
 {
index 4c375e11ae9531bec8b5a05bd14ab8424ccd6653..af4ee2cef2f9650e699de3a335eaedc58dc9d37b 100644 (file)
@@ -257,7 +257,7 @@ static void vfp_raise_exceptions(u32 exceptions, u32 inst, u32 fpscr, struct pt_
 
        if (exceptions == VFP_EXCEPTION_ERROR) {
                vfp_panic("unhandled bounce", inst);
-               vfp_raise_sigfpe(FPE_FIXME, regs);
+               vfp_raise_sigfpe(FPE_FLTINV, regs);
                return;
        }
 
index 15402861bb59eb9377682e56453965e7713592ca..87f7d2f9f17c266fee66973f29b6efd6c0590a71 100644 (file)
@@ -56,7 +56,11 @@ KBUILD_AFLAGS        += $(lseinstr) $(brokengasinst)
 KBUILD_CFLAGS  += $(call cc-option,-mabi=lp64)
 KBUILD_AFLAGS  += $(call cc-option,-mabi=lp64)
 
+ifeq ($(cc-name),clang)
+KBUILD_CFLAGS  += -DCONFIG_ARCH_SUPPORTS_INT128
+else
 KBUILD_CFLAGS  += $(call cc-ifversion, -ge, 0500, -DCONFIG_ARCH_SUPPORTS_INT128)
+endif
 
 ifeq ($(CONFIG_CPU_BIG_ENDIAN), y)
 KBUILD_CPPFLAGS        += -mbig-endian
index 4eef36b2253865833a659be0ca5b3f2ab1b34cb7..88e712ea757a238152c06f932dfecfeb5b36b843 100644 (file)
@@ -212,3 +212,7 @@ &uart_AO {
        pinctrl-0 = <&uart_ao_a_pins>;
        pinctrl-names = "default";
 };
+
+&usb0 {
+       status = "okay";
+};
index 22bf37404ff1b41ba74b96702d523a517e978c9c..3e3eb31748a35a7790a9dc90e56971f004660298 100644 (file)
@@ -271,3 +271,15 @@ &uart_AO {
        pinctrl-0 = <&uart_ao_a_pins>;
        pinctrl-names = "default";
 };
+
+&usb0 {
+       status = "okay";
+};
+
+&usb2_phy0 {
+       /*
+        * even though the schematics don't show it:
+        * HDMI_5V is also used as supply for the USB VBUS.
+        */
+       phy-supply = <&hdmi_5v>;
+};
index 69c721a70e44b7c377040c2428833fc141f0ddf0..6739697be1defd72693e1e867c172a85cfe643ba 100644 (file)
@@ -215,3 +215,7 @@ &uart_AO {
        pinctrl-0 = <&uart_ao_a_pins>;
        pinctrl-names = "default";
 };
+
+&usb0 {
+       status = "okay";
+};
index 0a0953fbc7d42b776a6b4d64f8da6a041ae30cb3..0cfd701809dec578ac31f5f68a7fcfbc21822619 100644 (file)
@@ -185,3 +185,7 @@ &uart_AO {
        pinctrl-0 = <&uart_ao_a_pins>;
        pinctrl-names = "default";
 };
+
+&usb0 {
+       status = "okay";
+};
index e1a39cbed8c9c18abf90c521ba09dbd5356b7502..dba365ed4bd5f903e34f4c6fdf1b14d539db0d89 100644 (file)
@@ -20,6 +20,67 @@ secmon_reserved_alt: secmon@5000000 {
                        no-map;
                };
        };
+
+       soc {
+               usb0: usb@c9000000 {
+                       status = "disabled";
+                       compatible = "amlogic,meson-gxl-dwc3";
+                       #address-cells = <2>;
+                       #size-cells = <2>;
+                       ranges;
+
+                       clocks = <&clkc CLKID_USB>;
+                       clock-names = "usb_general";
+                       resets = <&reset RESET_USB_OTG>;
+                       reset-names = "usb_otg";
+
+                       dwc3: dwc3@c9000000 {
+                               compatible = "snps,dwc3";
+                               reg = <0x0 0xc9000000 0x0 0x100000>;
+                               interrupts = <GIC_SPI 30 IRQ_TYPE_LEVEL_HIGH>;
+                               dr_mode = "host";
+                               maximum-speed = "high-speed";
+                               snps,dis_u2_susphy_quirk;
+                               phys = <&usb3_phy>, <&usb2_phy0>, <&usb2_phy1>;
+                       };
+               };
+       };
+};
+
+&apb {
+       usb2_phy0: phy@78000 {
+               compatible = "amlogic,meson-gxl-usb2-phy";
+               #phy-cells = <0>;
+               reg = <0x0 0x78000 0x0 0x20>;
+               clocks = <&clkc CLKID_USB>;
+               clock-names = "phy";
+               resets = <&reset RESET_USB_OTG>;
+               reset-names = "phy";
+               status = "okay";
+       };
+
+       usb2_phy1: phy@78020 {
+               compatible = "amlogic,meson-gxl-usb2-phy";
+               #phy-cells = <0>;
+               reg = <0x0 0x78020 0x0 0x20>;
+               clocks = <&clkc CLKID_USB>;
+               clock-names = "phy";
+               resets = <&reset RESET_USB_OTG>;
+               reset-names = "phy";
+               status = "okay";
+       };
+
+       usb3_phy: phy@78080 {
+               compatible = "amlogic,meson-gxl-usb3-phy";
+               #phy-cells = <0>;
+               reg = <0x0 0x78080 0x0 0x20>;
+               interrupts = <GIC_SPI 16 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&clkc CLKID_USB>, <&clkc_AO CLKID_AO_CEC_32K>;
+               clock-names = "phy", "peripheral";
+               resets = <&reset RESET_USB_OTG>, <&reset RESET_USB_OTG>;
+               reset-names = "phy", "peripheral";
+               status = "okay";
+       };
 };
 
 &ethmac {
index 4fd46c1546a7e5a82ec63dc50e15a2e0ae65d3e3..0868da476e41ff1a81aa40be378a13737c2a2c4a 100644 (file)
@@ -406,3 +406,7 @@ &saradc {
        status = "okay";
        vref-supply = <&vddio_ao18>;
 };
+
+&usb0 {
+       status = "okay";
+};
index d076a7c425dddd683ed81a9cfcb45fc7f2df682a..247888d68a3aabe06e8841a8d93cbfe8afe12960 100644 (file)
@@ -80,6 +80,19 @@ cpu7: cpu@103 {
        };
 };
 
+&apb {
+       usb2_phy2: phy@78040 {
+               compatible = "amlogic,meson-gxl-usb2-phy";
+               #phy-cells = <0>;
+               reg = <0x0 0x78040 0x0 0x20>;
+               clocks = <&clkc CLKID_USB>;
+               clock-names = "phy";
+               resets = <&reset RESET_USB_OTG>;
+               reset-names = "phy";
+               status = "okay";
+       };
+};
+
 &clkc_AO {
        compatible = "amlogic,meson-gxm-aoclkc", "amlogic,meson-gx-aoclkc";
 };
@@ -100,3 +113,7 @@ &vpu {
 &hdmi_tx {
        compatible = "amlogic,meson-gxm-dw-hdmi", "amlogic,meson-gx-dw-hdmi";
 };
+
+&dwc3 {
+       phys = <&usb3_phy>, <&usb2_phy0>, <&usb2_phy1>, <&usb2_phy2>;
+};
index 2ac43221ddb680acafd1507a866da58514a53370..69804c5f1197caac2815199edde847a547a0ae03 100644 (file)
@@ -56,8 +56,6 @@ mb_fixed_3v3: mcc-sb-3v3 {
 
                        gpio_keys {
                                compatible = "gpio-keys";
-                               #address-cells = <1>;
-                               #size-cells = <0>;
 
                                power-button {
                                        debounce_interval = <50>;
index 4b5465da81d8e29ac616fc18de3f1351e81321ed..8c68e0c26f1b281c0259fd0f5cca18d2eebcb8a4 100644 (file)
@@ -36,11 +36,11 @@ sata {
                #size-cells = <1>;
                ranges = <0x0 0x0 0x67d00000 0x00800000>;
 
-               sata0: ahci@210000 {
+               sata0: ahci@0 {
                        compatible = "brcm,iproc-ahci", "generic-ahci";
-                       reg = <0x00210000 0x1000>;
+                       reg = <0x00000000 0x1000>;
                        reg-names = "ahci";
-                       interrupts = <GIC_SPI 339 IRQ_TYPE_LEVEL_HIGH>;
+                       interrupts = <GIC_SPI 321 IRQ_TYPE_LEVEL_HIGH>;
                        #address-cells = <1>;
                        #size-cells = <0>;
                        status = "disabled";
@@ -52,9 +52,9 @@ sata0_port0: sata-port@0 {
                        };
                };
 
-               sata_phy0: sata_phy@212100 {
+               sata_phy0: sata_phy@2100 {
                        compatible = "brcm,iproc-sr-sata-phy";
-                       reg = <0x00212100 0x1000>;
+                       reg = <0x00002100 0x1000>;
                        reg-names = "phy";
                        #address-cells = <1>;
                        #size-cells = <0>;
@@ -66,11 +66,11 @@ sata0_phy0: sata-phy@0 {
                        };
                };
 
-               sata1: ahci@310000 {
+               sata1: ahci@10000 {
                        compatible = "brcm,iproc-ahci", "generic-ahci";
-                       reg = <0x00310000 0x1000>;
+                       reg = <0x00010000 0x1000>;
                        reg-names = "ahci";
-                       interrupts = <GIC_SPI 347 IRQ_TYPE_LEVEL_HIGH>;
+                       interrupts = <GIC_SPI 323 IRQ_TYPE_LEVEL_HIGH>;
                        #address-cells = <1>;
                        #size-cells = <0>;
                        status = "disabled";
@@ -82,9 +82,9 @@ sata1_port0: sata-port@0 {
                        };
                };
 
-               sata_phy1: sata_phy@312100 {
+               sata_phy1: sata_phy@12100 {
                        compatible = "brcm,iproc-sr-sata-phy";
-                       reg = <0x00312100 0x1000>;
+                       reg = <0x00012100 0x1000>;
                        reg-names = "phy";
                        #address-cells = <1>;
                        #size-cells = <0>;
@@ -96,11 +96,11 @@ sata1_phy0: sata-phy@0 {
                        };
                };
 
-               sata2: ahci@120000 {
+               sata2: ahci@20000 {
                        compatible = "brcm,iproc-ahci", "generic-ahci";
-                       reg = <0x00120000 0x1000>;
+                       reg = <0x00020000 0x1000>;
                        reg-names = "ahci";
-                       interrupts = <GIC_SPI 333 IRQ_TYPE_LEVEL_HIGH>;
+                       interrupts = <GIC_SPI 325 IRQ_TYPE_LEVEL_HIGH>;
                        #address-cells = <1>;
                        #size-cells = <0>;
                        status = "disabled";
@@ -112,9 +112,9 @@ sata2_port0: sata-port@0 {
                        };
                };
 
-               sata_phy2: sata_phy@122100 {
+               sata_phy2: sata_phy@22100 {
                        compatible = "brcm,iproc-sr-sata-phy";
-                       reg = <0x00122100 0x1000>;
+                       reg = <0x00022100 0x1000>;
                        reg-names = "phy";
                        #address-cells = <1>;
                        #size-cells = <0>;
@@ -126,11 +126,11 @@ sata2_phy0: sata-phy@0 {
                        };
                };
 
-               sata3: ahci@130000 {
+               sata3: ahci@30000 {
                        compatible = "brcm,iproc-ahci", "generic-ahci";
-                       reg = <0x00130000 0x1000>;
+                       reg = <0x00030000 0x1000>;
                        reg-names = "ahci";
-                       interrupts = <GIC_SPI 335 IRQ_TYPE_LEVEL_HIGH>;
+                       interrupts = <GIC_SPI 327 IRQ_TYPE_LEVEL_HIGH>;
                        #address-cells = <1>;
                        #size-cells = <0>;
                        status = "disabled";
@@ -142,9 +142,9 @@ sata3_port0: sata-port@0 {
                        };
                };
 
-               sata_phy3: sata_phy@132100 {
+               sata_phy3: sata_phy@32100 {
                        compatible = "brcm,iproc-sr-sata-phy";
-                       reg = <0x00132100 0x1000>;
+                       reg = <0x00032100 0x1000>;
                        reg-names = "phy";
                        #address-cells = <1>;
                        #size-cells = <0>;
@@ -156,11 +156,11 @@ sata3_phy0: sata-phy@0 {
                        };
                };
 
-               sata4: ahci@330000 {
+               sata4: ahci@100000 {
                        compatible = "brcm,iproc-ahci", "generic-ahci";
-                       reg = <0x00330000 0x1000>;
+                       reg = <0x00100000 0x1000>;
                        reg-names = "ahci";
-                       interrupts = <GIC_SPI 351 IRQ_TYPE_LEVEL_HIGH>;
+                       interrupts = <GIC_SPI 329 IRQ_TYPE_LEVEL_HIGH>;
                        #address-cells = <1>;
                        #size-cells = <0>;
                        status = "disabled";
@@ -172,9 +172,9 @@ sata4_port0: sata-port@0 {
                        };
                };
 
-               sata_phy4: sata_phy@332100 {
+               sata_phy4: sata_phy@102100 {
                        compatible = "brcm,iproc-sr-sata-phy";
-                       reg = <0x00332100 0x1000>;
+                       reg = <0x00102100 0x1000>;
                        reg-names = "phy";
                        #address-cells = <1>;
                        #size-cells = <0>;
@@ -186,11 +186,11 @@ sata4_phy0: sata-phy@0 {
                        };
                };
 
-               sata5: ahci@400000 {
+               sata5: ahci@110000 {
                        compatible = "brcm,iproc-ahci", "generic-ahci";
-                       reg = <0x00400000 0x1000>;
+                       reg = <0x00110000 0x1000>;
                        reg-names = "ahci";
-                       interrupts = <GIC_SPI 353 IRQ_TYPE_LEVEL_HIGH>;
+                       interrupts = <GIC_SPI 331 IRQ_TYPE_LEVEL_HIGH>;
                        #address-cells = <1>;
                        #size-cells = <0>;
                        status = "disabled";
@@ -202,9 +202,9 @@ sata5_port0: sata-port@0 {
                        };
                };
 
-               sata_phy5: sata_phy@402100 {
+               sata_phy5: sata_phy@112100 {
                        compatible = "brcm,iproc-sr-sata-phy";
-                       reg = <0x00402100 0x1000>;
+                       reg = <0x00112100 0x1000>;
                        reg-names = "phy";
                        #address-cells = <1>;
                        #size-cells = <0>;
@@ -216,11 +216,11 @@ sata5_phy0: sata-phy@0 {
                        };
                };
 
-               sata6: ahci@410000 {
+               sata6: ahci@120000 {
                        compatible = "brcm,iproc-ahci", "generic-ahci";
-                       reg = <0x00410000 0x1000>;
+                       reg = <0x00120000 0x1000>;
                        reg-names = "ahci";
-                       interrupts = <GIC_SPI 355 IRQ_TYPE_LEVEL_HIGH>;
+                       interrupts = <GIC_SPI 333 IRQ_TYPE_LEVEL_HIGH>;
                        #address-cells = <1>;
                        #size-cells = <0>;
                        status = "disabled";
@@ -232,9 +232,9 @@ sata6_port0: sata-port@0 {
                        };
                };
 
-               sata_phy6: sata_phy@412100 {
+               sata_phy6: sata_phy@122100 {
                        compatible = "brcm,iproc-sr-sata-phy";
-                       reg = <0x00412100 0x1000>;
+                       reg = <0x00122100 0x1000>;
                        reg-names = "phy";
                        #address-cells = <1>;
                        #size-cells = <0>;
@@ -246,11 +246,11 @@ sata6_phy0: sata-phy@0 {
                        };
                };
 
-               sata7: ahci@420000 {
+               sata7: ahci@130000 {
                        compatible = "brcm,iproc-ahci", "generic-ahci";
-                       reg = <0x00420000 0x1000>;
+                       reg = <0x00130000 0x1000>;
                        reg-names = "ahci";
-                       interrupts = <GIC_SPI 357 IRQ_TYPE_LEVEL_HIGH>;
+                       interrupts = <GIC_SPI 335 IRQ_TYPE_LEVEL_HIGH>;
                        #address-cells = <1>;
                        #size-cells = <0>;
                        status = "disabled";
@@ -262,9 +262,9 @@ sata7_port0: sata-port@0 {
                        };
                };
 
-               sata_phy7: sata_phy@422100 {
+               sata_phy7: sata_phy@132100 {
                        compatible = "brcm,iproc-sr-sata-phy";
-                       reg = <0x00422100 0x1000>;
+                       reg = <0x00132100 0x1000>;
                        reg-names = "phy";
                        #address-cells = <1>;
                        #size-cells = <0>;
index c0231d077fa61f83dd66a7d9cd5aeb7423c6f6ff..1ad8677f6a0a622c66f842ef16478091ee22ee73 100644 (file)
@@ -1317,7 +1317,7 @@ i2s1: i2s@14d60000 {
                        reg = <0x14d60000 0x100>;
                        dmas = <&pdma0 31 &pdma0 30>;
                        dma-names = "tx", "rx";
-                       interrupts = <GIC_SPI 435 IRQ_TYPE_NONE>;
+                       interrupts = <GIC_SPI 435 IRQ_TYPE_LEVEL_HIGH>;
                        clocks = <&cmu_peric CLK_PCLK_I2S1>,
                                 <&cmu_peric CLK_PCLK_I2S1>,
                                 <&cmu_peric CLK_SCLK_I2S1>;
index 48cad7919efa3809e5cbdc84895492304c56d84e..ed2f1237ea1e9a9c320e7b2da56145f07e147079 100644 (file)
@@ -38,9 +38,10 @@ CP110_LABEL(ethernet): ethernet@0 {
                        compatible = "marvell,armada-7k-pp22";
                        reg = <0x0 0x100000>, <0x129000 0xb000>;
                        clocks = <&CP110_LABEL(clk) 1 3>, <&CP110_LABEL(clk) 1 9>,
-                                <&CP110_LABEL(clk) 1 5>, <&CP110_LABEL(clk) 1 18>;
+                                <&CP110_LABEL(clk) 1 5>, <&CP110_LABEL(clk) 1 6>,
+                                <&CP110_LABEL(clk) 1 18>;
                        clock-names = "pp_clk", "gop_clk",
-                                     "mg_clk", "axi_clk";
+                                     "mg_clk", "mg_core_clk", "axi_clk";
                        marvell,system-controller = <&CP110_LABEL(syscon0)>;
                        status = "disabled";
                        dma-coherent;
@@ -141,6 +142,8 @@ CP110_LABEL(xmdio): mdio@12a600 {
                        #size-cells = <0>;
                        compatible = "marvell,xmdio";
                        reg = <0x12a600 0x10>;
+                       clocks = <&CP110_LABEL(clk) 1 5>,
+                                <&CP110_LABEL(clk) 1 6>, <&CP110_LABEL(clk) 1 18>;
                        status = "disabled";
                };
 
index a8baad7b80df2d4c9bd39c38e39d3435ded8d0dc..13f57fff147742c22f889412e866dfc64a40ecd5 100644 (file)
@@ -46,7 +46,7 @@ phy: phy@0 {
                                compatible = "ethernet-phy-ieee802.3-c22";
                                reg = <0x0>;
                                interrupt-parent = <&gpio>;
-                               interrupts = <TEGRA_MAIN_GPIO(M, 5) IRQ_TYPE_LEVEL_HIGH>;
+                               interrupts = <TEGRA_MAIN_GPIO(M, 5) IRQ_TYPE_LEVEL_LOW>;
                        };
                };
        };
index 24552f19b3fad5d3a66b9171fcdd2fe5c4ec9d97..6a573875d45ace01ddf3f916234cd5487870230b 100644 (file)
@@ -36,4 +36,30 @@ config {
                        drive-strength = <2>;   /* 2 MA */
                };
        };
+
+       blsp1_uart1_default: blsp1_uart1_default {
+               mux {
+                       pins = "gpio41", "gpio42", "gpio43", "gpio44";
+                       function = "blsp_uart2";
+               };
+
+               config {
+                       pins = "gpio41", "gpio42", "gpio43", "gpio44";
+                       drive-strength = <16>;
+                       bias-disable;
+               };
+       };
+
+       blsp1_uart1_sleep: blsp1_uart1_sleep {
+               mux {
+                       pins = "gpio41", "gpio42", "gpio43", "gpio44";
+                       function = "gpio";
+               };
+
+               config {
+                       pins = "gpio41", "gpio42", "gpio43", "gpio44";
+                       drive-strength = <2>;
+                       bias-disable;
+               };
+       };
 };
index 59b29ddfb6e91e8377f50ff98623de073d19b3a4..6167af9556599ad3f019bd2696905bc9e64f3b76 100644 (file)
@@ -14,6 +14,28 @@ pinconf {
                };
        };
 
+       bt_en_gpios: bt_en_gpios {
+               pinconf {
+                       pins = "gpio19";
+                       function = PMIC_GPIO_FUNC_NORMAL;
+                       output-low;
+                       power-source = <PM8994_GPIO_S4>; // 1.8V
+                       qcom,drive-strength = <PMIC_GPIO_STRENGTH_LOW>;
+                       bias-pull-down;
+               };
+       };
+
+       wlan_en_gpios: wlan_en_gpios {
+               pinconf {
+                       pins = "gpio8";
+                       function = PMIC_GPIO_FUNC_NORMAL;
+                       output-low;
+                       power-source = <PM8994_GPIO_S4>; // 1.8V
+                       qcom,drive-strength = <PMIC_GPIO_STRENGTH_LOW>;
+                       bias-pull-down;
+               };
+       };
+
        volume_up_gpio: pm8996_gpio2 {
                pinconf {
                        pins = "gpio2";
@@ -26,6 +48,16 @@ pinconf {
                };
        };
 
+       divclk4_pin_a: divclk4 {
+               pinconf {
+                       pins = "gpio18";
+                       function = PMIC_GPIO_FUNC_FUNC2;
+
+                       bias-disable;
+                       power-source = <PM8994_GPIO_S4>;
+               };
+       };
+
        usb3_vbus_det_gpio: pm8996_gpio22 {
                pinconf {
                        pins = "gpio22";
index 1c8f1b86472de9c149b706502dcc552f19376ae5..4b8bb026346edb17ed2bc3c9b35790f5f99ec059 100644 (file)
@@ -23,6 +23,7 @@ / {
        aliases {
                serial0 = &blsp2_uart1;
                serial1 = &blsp2_uart2;
+               serial2 = &blsp1_uart1;
                i2c0    = &blsp1_i2c2;
                i2c1    = &blsp2_i2c1;
                i2c2    = &blsp2_i2c0;
@@ -34,7 +35,36 @@ chosen {
                stdout-path = "serial0:115200n8";
        };
 
+       clocks {
+               divclk4: divclk4 {
+                       compatible = "fixed-clock";
+                       #clock-cells = <0>;
+                       clock-frequency = <32768>;
+                       clock-output-names = "divclk4";
+
+                       pinctrl-names = "default";
+                       pinctrl-0 = <&divclk4_pin_a>;
+               };
+       };
+
        soc {
+               serial@7570000 {
+                       label = "BT-UART";
+                       status = "okay";
+                       pinctrl-names = "default", "sleep";
+                       pinctrl-0 = <&blsp1_uart1_default>;
+                       pinctrl-1 = <&blsp1_uart1_sleep>;
+
+                       bluetooth {
+                               compatible = "qcom,qca6174-bt";
+
+                               /* bt_disable_n gpio */
+                               enable-gpios = <&pm8994_gpios 19 GPIO_ACTIVE_HIGH>;
+
+                               clocks = <&divclk4>;
+                       };
+               };
+
                serial@75b0000 {
                        label = "LS-UART1";
                        status = "okay";
@@ -139,9 +169,40 @@ usb2_id: usb2-id {
                        pinctrl-0 = <&usb2_vbus_det_gpio>;
                };
 
+               bt_en: bt-en-1-8v {
+                       pinctrl-names = "default";
+                       pinctrl-0 = <&bt_en_gpios>;
+                       compatible = "regulator-fixed";
+                       regulator-name = "bt-en-regulator";
+                       regulator-min-microvolt = <1800000>;
+                       regulator-max-microvolt = <1800000>;
+
+                       /* WLAN card specific delay */
+                       startup-delay-us = <70000>;
+                       enable-active-high;
+               };
+
+               wlan_en: wlan-en-1-8v {
+                       pinctrl-names = "default";
+                       pinctrl-0 = <&wlan_en_gpios>;
+                       compatible = "regulator-fixed";
+                       regulator-name = "wlan-en-regulator";
+                       regulator-min-microvolt = <1800000>;
+                       regulator-max-microvolt = <1800000>;
+
+                       gpio = <&pm8994_gpios 8 0>;
+
+                       /* WLAN card specific delay */
+                       startup-delay-us = <70000>;
+                       enable-active-high;
+               };
+
                agnoc@0 {
                        qcom,pcie@600000 {
+                               status = "okay";
                                perst-gpio = <&msmgpio 35 GPIO_ACTIVE_LOW>;
+                               vddpe-supply = <&wlan_en>;
+                               vddpe1-supply = <&bt_en>;
                        };
 
                        qcom,pcie@608000 {
index 410ae787ebb4de7c6db1ca8329240e718438d483..f8e49d0b468123f75251d7f975a42d65547bd373 100644 (file)
@@ -419,6 +419,16 @@ kryocc: clock-controller@6400000 {
                        #clock-cells = <1>;
                };
 
+               blsp1_uart1: serial@7570000 {
+                       compatible = "qcom,msm-uartdm-v1.4", "qcom,msm-uartdm";
+                       reg = <0x07570000 0x1000>;
+                       interrupts = <GIC_SPI 108 IRQ_TYPE_LEVEL_HIGH>;
+                       clocks = <&gcc GCC_BLSP1_UART2_APPS_CLK>,
+                                <&gcc GCC_BLSP1_AHB_CLK>;
+                       clock-names = "core", "iface";
+                       status = "disabled";
+               };
+
                blsp1_spi0: spi@7575000 {
                        compatible = "qcom,spi-qup-v2.2.1";
                        reg = <0x07575000 0x600>;
index e62bda1cf2d9db9b7058a2ab48529508417b92e4..c32dd3419c870ef080e58780f7ac46e93c136455 100644 (file)
@@ -414,7 +414,7 @@ emmc: sdhc@5a000000 {
                        mmc-ddr-1_8v;
                        mmc-hs200-1_8v;
                        mmc-pwrseq = <&emmc_pwrseq>;
-                       cdns,phy-input-delay-legacy = <4>;
+                       cdns,phy-input-delay-legacy = <9>;
                        cdns,phy-input-delay-mmc-highspeed = <2>;
                        cdns,phy-input-delay-mmc-ddr = <3>;
                        cdns,phy-dll-delay-sdclk = <21>;
index 2c1a92fafbfbe053808b00e4b8b66804f6744e4d..440c2e6a638b998c163b3f8aea91f94e115f516b 100644 (file)
@@ -67,3 +67,11 @@ ethphy: ethphy@0 {
                reg = <0>;
        };
 };
+
+&pinctrl_ether_rgmii {
+       tx {
+               pins = "RGMII_TXCLK", "RGMII_TXD0", "RGMII_TXD1",
+                      "RGMII_TXD2", "RGMII_TXD3", "RGMII_TXCTL";
+               drive-strength = <9>;
+       };
+};
index 9efe20d075890ee24638bfa125040e17ac8034cd..3a5ed789c056e37bd8dc07e9aa21f8d7e44ea4b8 100644 (file)
@@ -519,7 +519,7 @@ emmc: sdhc@5a000000 {
                        mmc-ddr-1_8v;
                        mmc-hs200-1_8v;
                        mmc-pwrseq = <&emmc_pwrseq>;
-                       cdns,phy-input-delay-legacy = <4>;
+                       cdns,phy-input-delay-legacy = <9>;
                        cdns,phy-input-delay-mmc-highspeed = <2>;
                        cdns,phy-input-delay-mmc-ddr = <3>;
                        cdns,phy-dll-delay-sdclk = <21>;
index 7c8f710d9bfa11fb3f9a3432c6e16cb4e18ef06c..e85d6ddea3c2171bec09a6c2a9bf3bfe817c2478 100644 (file)
@@ -334,7 +334,7 @@ emmc: sdhc@5a000000 {
                        mmc-ddr-1_8v;
                        mmc-hs200-1_8v;
                        mmc-pwrseq = <&emmc_pwrseq>;
-                       cdns,phy-input-delay-legacy = <4>;
+                       cdns,phy-input-delay-legacy = <9>;
                        cdns,phy-input-delay-mmc-highspeed = <2>;
                        cdns,phy-input-delay-mmc-ddr = <3>;
                        cdns,phy-dll-delay-sdclk = <21>;
index 30014a9f8f2b335f95193af0f6e45ed9cd59a48a..ea690b3562afb20773ce81cf3ea48f897f5998b1 100644 (file)
@@ -75,6 +75,7 @@
 #define ARM_CPU_IMP_CAVIUM             0x43
 #define ARM_CPU_IMP_BRCM               0x42
 #define ARM_CPU_IMP_QCOM               0x51
+#define ARM_CPU_IMP_NVIDIA             0x4E
 
 #define ARM_CPU_PART_AEM_V8            0xD0F
 #define ARM_CPU_PART_FOUNDATION                0xD00
 #define QCOM_CPU_PART_FALKOR           0xC00
 #define QCOM_CPU_PART_KRYO             0x200
 
+#define NVIDIA_CPU_PART_DENVER         0x003
+#define NVIDIA_CPU_PART_CARMEL         0x004
+
 #define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53)
 #define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57)
 #define MIDR_CORTEX_A72 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A72)
 #define MIDR_QCOM_FALKOR_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR_V1)
 #define MIDR_QCOM_FALKOR MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR)
 #define MIDR_QCOM_KRYO MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO)
+#define MIDR_NVIDIA_DENVER MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_DENVER)
+#define MIDR_NVIDIA_CARMEL MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_CARMEL)
 
 #ifndef __ASSEMBLY__
 
index 23b33e8ea03a6a56ab3066704c1090da9921e073..1dab3a9846082e5bfbf640dc04c3e8aa70ba24cc 100644 (file)
@@ -333,7 +333,7 @@ static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu)
        } else {
                u64 sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1);
                sctlr |= (1 << 25);
-               vcpu_write_sys_reg(vcpu, SCTLR_EL1, sctlr);
+               vcpu_write_sys_reg(vcpu, sctlr, SCTLR_EL1);
        }
 }
 
index ab46bc70add636e0793f32083f800ff06bf80353..469de8acd06f5fa4103d9f55d0fb4c4c97d853a9 100644 (file)
@@ -75,6 +75,9 @@ struct kvm_arch {
 
        /* Interrupt controller */
        struct vgic_dist        vgic;
+
+       /* Mandated version of PSCI */
+       u32 psci_version;
 };
 
 #define KVM_NR_MEM_OBJS     40
index 082110993647b9b7e30cf18da5d9d447c58d362f..6128992c2ded6b7e1aa4ba835474f6f45f6fdc5e 100644 (file)
@@ -360,6 +360,22 @@ static inline unsigned int kvm_get_vmid_bits(void)
        return (cpuid_feature_extract_unsigned_field(reg, ID_AA64MMFR1_VMIDBITS_SHIFT) == 2) ? 16 : 8;
 }
 
+/*
+ * We are not in the kvm->srcu critical section most of the time, so we take
+ * the SRCU read lock here. Since we copy the data from the user page, we
+ * can immediately drop the lock again.
+ */
+static inline int kvm_read_guest_lock(struct kvm *kvm,
+                                     gpa_t gpa, void *data, unsigned long len)
+{
+       int srcu_idx = srcu_read_lock(&kvm->srcu);
+       int ret = kvm_read_guest(kvm, gpa, data, len);
+
+       srcu_read_unlock(&kvm->srcu, srcu_idx);
+
+       return ret;
+}
+
 #ifdef CONFIG_KVM_INDIRECT_VECTORS
 /*
  * EL2 vectors can be mapped and rerouted in a number of ways,
index b6dbbe3123a9a9f8308a07b9257bfe968c184804..97d0ef12e2ff561938acdf96196a1a213664c522 100644 (file)
@@ -39,7 +39,7 @@ struct mod_arch_specific {
 u64 module_emit_plt_entry(struct module *mod, void *loc, const Elf64_Rela *rela,
                          Elf64_Sym *sym);
 
-u64 module_emit_adrp_veneer(struct module *mod, void *loc, u64 val);
+u64 module_emit_veneer_for_adrp(struct module *mod, void *loc, u64 val);
 
 #ifdef CONFIG_RANDOMIZE_BASE
 extern u64 module_alloc_base;
index 7e2c27e63cd894371655a569046faaa67cfc1837..7c4c8f318ba999afdac0b41300c9613fe161e91d 100644 (file)
@@ -230,7 +230,7 @@ static inline void set_pte(pte_t *ptep, pte_t pte)
        }
 }
 
-extern void __sync_icache_dcache(pte_t pteval, unsigned long addr);
+extern void __sync_icache_dcache(pte_t pteval);
 
 /*
  * PTE bits configuration in the presence of hardware Dirty Bit Management
@@ -253,7 +253,7 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
        pte_t old_pte;
 
        if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte))
-               __sync_icache_dcache(pte, addr);
+               __sync_icache_dcache(pte);
 
        /*
         * If the existing pte is valid, check for potential race with
index 9abbf30446545a0668083b0891461f015563bcb1..04b3256f8e6d5f8e3e368b043f0fdcfeb7c23164 100644 (file)
@@ -206,6 +206,12 @@ struct kvm_arch_memory_slot {
 #define KVM_REG_ARM_TIMER_CNT          ARM64_SYS_REG(3, 3, 14, 3, 2)
 #define KVM_REG_ARM_TIMER_CVAL         ARM64_SYS_REG(3, 3, 14, 0, 2)
 
+/* KVM-as-firmware specific pseudo-registers */
+#define KVM_REG_ARM_FW                 (0x0014 << KVM_REG_ARM_COPROC_SHIFT)
+#define KVM_REG_ARM_FW_REG(r)          (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \
+                                        KVM_REG_ARM_FW | ((r) & 0xffff))
+#define KVM_REG_ARM_PSCI_VERSION       KVM_REG_ARM_FW_REG(0)
+
 /* Device Control API: ARM VGIC */
 #define KVM_DEV_ARM_VGIC_GRP_ADDR      0
 #define KVM_DEV_ARM_VGIC_GRP_DIST_REGS 1
index a900befadfe84a0018112ad0e11d367fe896371f..e4a1182deff7f2ef24bce12082a0868f36715a1c 100644 (file)
@@ -316,6 +316,7 @@ static const struct midr_range arm64_bp_harden_smccc_cpus[] = {
        MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2),
        MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR_V1),
        MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR),
+       MIDR_ALL_VERSIONS(MIDR_NVIDIA_DENVER),
        {},
 };
 
index 536d572e55967286b65457631251c842c1951353..9d1b06d67c53d4addacf97dd96207aa91d2ba04b 100644 (file)
@@ -868,6 +868,7 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry,
        static const struct midr_range kpti_safe_list[] = {
                MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2),
                MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN),
+               { /* sentinel */ }
        };
        char const *str = "command line option";
 
index fa3637284a3d8f4c7dbfe4b7e107af9af94f2685..f0690c2ca3e0f2f27f761a1568e6b3cc637779a4 100644 (file)
@@ -43,7 +43,7 @@ u64 module_emit_plt_entry(struct module *mod, void *loc, const Elf64_Rela *rela,
 }
 
 #ifdef CONFIG_ARM64_ERRATUM_843419
-u64 module_emit_adrp_veneer(struct module *mod, void *loc, u64 val)
+u64 module_emit_veneer_for_adrp(struct module *mod, void *loc, u64 val)
 {
        struct mod_plt_sec *pltsec = !in_init(mod, loc) ? &mod->arch.core :
                                                          &mod->arch.init;
index 719fde8dcc19042b38164ddcaa7da429cd2180b2..155fd91e78f4a62180e7577355ca4a6b0eb283f4 100644 (file)
@@ -215,7 +215,7 @@ static int reloc_insn_adrp(struct module *mod, __le32 *place, u64 val)
                insn &= ~BIT(31);
        } else {
                /* out of range for ADR -> emit a veneer */
-               val = module_emit_adrp_veneer(mod, place, val & ~0xfff);
+               val = module_emit_veneer_for_adrp(mod, place, val & ~0xfff);
                if (!val)
                        return -ENOEXEC;
                insn = aarch64_insn_gen_branch_imm((u64)place, val,
index 71d99af24ef20852a18deea4d29a25c0197cf651..7ff81fed46e1ebeebfdd58eddc5c718af5f3d565 100644 (file)
@@ -25,6 +25,7 @@
 #include <linux/sched/signal.h>
 #include <linux/sched/task_stack.h>
 #include <linux/mm.h>
+#include <linux/nospec.h>
 #include <linux/smp.h>
 #include <linux/ptrace.h>
 #include <linux/user.h>
@@ -249,15 +250,20 @@ static struct perf_event *ptrace_hbp_get_event(unsigned int note_type,
 
        switch (note_type) {
        case NT_ARM_HW_BREAK:
-               if (idx < ARM_MAX_BRP)
-                       bp = tsk->thread.debug.hbp_break[idx];
+               if (idx >= ARM_MAX_BRP)
+                       goto out;
+               idx = array_index_nospec(idx, ARM_MAX_BRP);
+               bp = tsk->thread.debug.hbp_break[idx];
                break;
        case NT_ARM_HW_WATCH:
-               if (idx < ARM_MAX_WRP)
-                       bp = tsk->thread.debug.hbp_watch[idx];
+               if (idx >= ARM_MAX_WRP)
+                       goto out;
+               idx = array_index_nospec(idx, ARM_MAX_WRP);
+               bp = tsk->thread.debug.hbp_watch[idx];
                break;
        }
 
+out:
        return bp;
 }
 
@@ -1458,9 +1464,7 @@ static int compat_ptrace_gethbpregs(struct task_struct *tsk, compat_long_t num,
 {
        int ret;
        u32 kdata;
-       mm_segment_t old_fs = get_fs();
 
-       set_fs(KERNEL_DS);
        /* Watchpoint */
        if (num < 0) {
                ret = compat_ptrace_hbp_get(NT_ARM_HW_WATCH, tsk, num, &kdata);
@@ -1471,7 +1475,6 @@ static int compat_ptrace_gethbpregs(struct task_struct *tsk, compat_long_t num,
        } else {
                ret = compat_ptrace_hbp_get(NT_ARM_HW_BREAK, tsk, num, &kdata);
        }
-       set_fs(old_fs);
 
        if (!ret)
                ret = put_user(kdata, data);
@@ -1484,7 +1487,6 @@ static int compat_ptrace_sethbpregs(struct task_struct *tsk, compat_long_t num,
 {
        int ret;
        u32 kdata = 0;
-       mm_segment_t old_fs = get_fs();
 
        if (num == 0)
                return 0;
@@ -1493,12 +1495,10 @@ static int compat_ptrace_sethbpregs(struct task_struct *tsk, compat_long_t num,
        if (ret)
                return ret;
 
-       set_fs(KERNEL_DS);
        if (num < 0)
                ret = compat_ptrace_hbp_set(NT_ARM_HW_WATCH, tsk, num, &kdata);
        else
                ret = compat_ptrace_hbp_set(NT_ARM_HW_BREAK, tsk, num, &kdata);
-       set_fs(old_fs);
 
        return ret;
 }
index ba964da31a252dd2ea517b2b7a4dcef06daa0f7b..8bbdc17e49df79cab3e8576783216e91aef6c746 100644 (file)
@@ -277,7 +277,8 @@ void arm64_skip_faulting_instruction(struct pt_regs *regs, unsigned long size)
         * If we were single stepping, we want to get the step exception after
         * we return from the trap.
         */
-       user_fastforward_single_step(current);
+       if (user_mode(regs))
+               user_fastforward_single_step(current);
 }
 
 static LIST_HEAD(undef_hook);
@@ -366,7 +367,7 @@ void force_signal_inject(int signal, int code, unsigned long address)
        }
 
        /* Force signals we don't understand to SIGKILL */
-       if (WARN_ON(signal != SIGKILL ||
+       if (WARN_ON(signal != SIGKILL &&
                    siginfo_layout(signal, code) != SIL_FAULT)) {
                signal = SIGKILL;
        }
index 959e50d2588c0f14b9eb9230522c3f12c3f7daf9..56a0260ceb11a95258099f0f19356b8989ff6afa 100644 (file)
@@ -25,6 +25,7 @@
 #include <linux/module.h>
 #include <linux/vmalloc.h>
 #include <linux/fs.h>
+#include <kvm/arm_psci.h>
 #include <asm/cputype.h>
 #include <linux/uaccess.h>
 #include <asm/kvm.h>
@@ -205,7 +206,7 @@ static int get_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
 unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu)
 {
        return num_core_regs() + kvm_arm_num_sys_reg_descs(vcpu)
-                + NUM_TIMER_REGS;
+               + kvm_arm_get_fw_num_regs(vcpu) + NUM_TIMER_REGS;
 }
 
 /**
@@ -225,6 +226,11 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
                uindices++;
        }
 
+       ret = kvm_arm_copy_fw_reg_indices(vcpu, uindices);
+       if (ret)
+               return ret;
+       uindices += kvm_arm_get_fw_num_regs(vcpu);
+
        ret = copy_timer_indices(vcpu, uindices);
        if (ret)
                return ret;
@@ -243,6 +249,9 @@ int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
        if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE)
                return get_core_reg(vcpu, reg);
 
+       if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_FW)
+               return kvm_arm_get_fw_reg(vcpu, reg);
+
        if (is_timer_reg(reg->id))
                return get_timer_reg(vcpu, reg);
 
@@ -259,6 +268,9 @@ int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
        if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE)
                return set_core_reg(vcpu, reg);
 
+       if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_FW)
+               return kvm_arm_set_fw_reg(vcpu, reg);
+
        if (is_timer_reg(reg->id))
                return set_timer_reg(vcpu, reg);
 
index 86801b6055d6dc714a1bb98640caee9985965092..39be799d04175fcb32428370e1f7a169751764a6 100644 (file)
 #include <linux/compiler.h>
 #include <linux/irqchip/arm-gic.h>
 #include <linux/kvm_host.h>
+#include <linux/swab.h>
 
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_hyp.h>
 #include <asm/kvm_mmu.h>
 
+static bool __hyp_text __is_be(struct kvm_vcpu *vcpu)
+{
+       if (vcpu_mode_is_32bit(vcpu))
+               return !!(read_sysreg_el2(spsr) & COMPAT_PSR_E_BIT);
+
+       return !!(read_sysreg(SCTLR_EL1) & SCTLR_ELx_EE);
+}
+
 /*
  * __vgic_v2_perform_cpuif_access -- perform a GICV access on behalf of the
  *                                  guest.
@@ -64,14 +73,19 @@ int __hyp_text __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu)
        addr += fault_ipa - vgic->vgic_cpu_base;
 
        if (kvm_vcpu_dabt_iswrite(vcpu)) {
-               u32 data = vcpu_data_guest_to_host(vcpu,
-                                                  vcpu_get_reg(vcpu, rd),
-                                                  sizeof(u32));
+               u32 data = vcpu_get_reg(vcpu, rd);
+               if (__is_be(vcpu)) {
+                       /* guest pre-swabbed data, undo this for writel() */
+                       data = swab32(data);
+               }
                writel_relaxed(data, addr);
        } else {
                u32 data = readl_relaxed(addr);
-               vcpu_set_reg(vcpu, rd, vcpu_data_host_to_guest(vcpu, data,
-                                                              sizeof(u32)));
+               if (__is_be(vcpu)) {
+                       /* guest expects swabbed data */
+                       data = swab32(data);
+               }
+               vcpu_set_reg(vcpu, rd, data);
        }
 
        return 1;
index 806b0b126a644dbc4e2f80db451370b63c63c6a5..6e3b969391fdbb853f621fa60f7c8760d79caa78 100644 (file)
@@ -996,14 +996,12 @@ static u64 read_id_reg(struct sys_reg_desc const *r, bool raz)
 
        if (id == SYS_ID_AA64PFR0_EL1) {
                if (val & (0xfUL << ID_AA64PFR0_SVE_SHIFT))
-                       pr_err_once("kvm [%i]: SVE unsupported for guests, suppressing\n",
-                                   task_pid_nr(current));
+                       kvm_debug("SVE unsupported for guests, suppressing\n");
 
                val &= ~(0xfUL << ID_AA64PFR0_SVE_SHIFT);
        } else if (id == SYS_ID_AA64MMFR1_EL1) {
                if (val & (0xfUL << ID_AA64MMFR1_LOR_SHIFT))
-                       pr_err_once("kvm [%i]: LORegions unsupported for guests, suppressing\n",
-                                   task_pid_nr(current));
+                       kvm_debug("LORegions unsupported for guests, suppressing\n");
 
                val &= ~(0xfUL << ID_AA64MMFR1_LOR_SHIFT);
        }
index 0ead8a1d167956193f5666dfa9a5365e0e94ebc8..137710f4dac30ac01c5c17856b730a996628a2e3 100644 (file)
@@ -19,5 +19,9 @@ CFLAGS_atomic_ll_sc.o := -fcall-used-x0 -ffixed-x1 -ffixed-x2         \
                   -fcall-saved-x13 -fcall-saved-x14 -fcall-saved-x15   \
                   -fcall-saved-x18 -fomit-frame-pointer
 CFLAGS_REMOVE_atomic_ll_sc.o := -pg
+GCOV_PROFILE_atomic_ll_sc.o    := n
+KASAN_SANITIZE_atomic_ll_sc.o  := n
+KCOV_INSTRUMENT_atomic_ll_sc.o := n
+UBSAN_SANITIZE_atomic_ll_sc.o  := n
 
 lib-$(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) += uaccess_flushcache.o
index e36ed5087b5cbba3f71c57937af82b1b00cb88e4..1059884f9a6f35231be04b2930c7ecf5c10b4105 100644 (file)
@@ -58,7 +58,7 @@ void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
        flush_ptrace_access(vma, page, uaddr, dst, len);
 }
 
-void __sync_icache_dcache(pte_t pte, unsigned long addr)
+void __sync_icache_dcache(pte_t pte)
 {
        struct page *page = pte_page(pte);
 
index 9f3c47acf8ffb1170ad411985dcb1a288dfea2df..1b18b472242034b2cfe90ff91a27da8b850bcdaa 100644 (file)
@@ -646,8 +646,10 @@ static int keep_initrd __initdata;
 
 void __init free_initrd_mem(unsigned long start, unsigned long end)
 {
-       if (!keep_initrd)
+       if (!keep_initrd) {
                free_reserved_area((void *)start, (void *)end, 0, "initrd");
+               memblock_free(__virt_to_phys(start), end - start);
+       }
 }
 
 static int __init keepinitrd_setup(char *__unused)
index dabfc1ecda3d3a9d57a430f1641eca05c1114703..12145874c02b8c8a3f82b4dac954e73521085dd7 100644 (file)
@@ -204,7 +204,7 @@ void __init kasan_init(void)
        clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);
 
        kasan_map_populate(kimg_shadow_start, kimg_shadow_end,
-                          pfn_to_nid(virt_to_pfn(lm_alias(_text))));
+                          early_pfn_to_nid(virt_to_pfn(lm_alias(_text))));
 
        kasan_populate_zero_shadow((void *)KASAN_SHADOW_START,
                                   (void *)mod_shadow_start);
@@ -224,7 +224,7 @@ void __init kasan_init(void)
 
                kasan_map_populate((unsigned long)kasan_mem_to_shadow(start),
                                   (unsigned long)kasan_mem_to_shadow(end),
-                                  pfn_to_nid(virt_to_pfn(start)));
+                                  early_pfn_to_nid(virt_to_pfn(start)));
        }
 
        /*
index a93350451e8ece754a2f565764d47505e48b85ef..a6fdaea07c6339cf2754d582765747ee5d8b2ff5 100644 (file)
@@ -21,7 +21,6 @@
 #include <linux/bpf.h>
 #include <linux/filter.h>
 #include <linux/printk.h>
-#include <linux/skbuff.h>
 #include <linux/slab.h>
 
 #include <asm/byteorder.h>
@@ -80,23 +79,66 @@ static inline void emit(const u32 insn, struct jit_ctx *ctx)
        ctx->idx++;
 }
 
+static inline void emit_a64_mov_i(const int is64, const int reg,
+                                 const s32 val, struct jit_ctx *ctx)
+{
+       u16 hi = val >> 16;
+       u16 lo = val & 0xffff;
+
+       if (hi & 0x8000) {
+               if (hi == 0xffff) {
+                       emit(A64_MOVN(is64, reg, (u16)~lo, 0), ctx);
+               } else {
+                       emit(A64_MOVN(is64, reg, (u16)~hi, 16), ctx);
+                       if (lo != 0xffff)
+                               emit(A64_MOVK(is64, reg, lo, 0), ctx);
+               }
+       } else {
+               emit(A64_MOVZ(is64, reg, lo, 0), ctx);
+               if (hi)
+                       emit(A64_MOVK(is64, reg, hi, 16), ctx);
+       }
+}
+
+static int i64_i16_blocks(const u64 val, bool inverse)
+{
+       return (((val >>  0) & 0xffff) != (inverse ? 0xffff : 0x0000)) +
+              (((val >> 16) & 0xffff) != (inverse ? 0xffff : 0x0000)) +
+              (((val >> 32) & 0xffff) != (inverse ? 0xffff : 0x0000)) +
+              (((val >> 48) & 0xffff) != (inverse ? 0xffff : 0x0000));
+}
+
 static inline void emit_a64_mov_i64(const int reg, const u64 val,
                                    struct jit_ctx *ctx)
 {
-       u64 tmp = val;
-       int shift = 0;
-
-       emit(A64_MOVZ(1, reg, tmp & 0xffff, shift), ctx);
-       tmp >>= 16;
-       shift += 16;
-       while (tmp) {
-               if (tmp & 0xffff)
-                       emit(A64_MOVK(1, reg, tmp & 0xffff, shift), ctx);
-               tmp >>= 16;
-               shift += 16;
+       u64 nrm_tmp = val, rev_tmp = ~val;
+       bool inverse;
+       int shift;
+
+       if (!(nrm_tmp >> 32))
+               return emit_a64_mov_i(0, reg, (u32)val, ctx);
+
+       inverse = i64_i16_blocks(nrm_tmp, true) < i64_i16_blocks(nrm_tmp, false);
+       shift = max(round_down((inverse ? (fls64(rev_tmp) - 1) :
+                                         (fls64(nrm_tmp) - 1)), 16), 0);
+       if (inverse)
+               emit(A64_MOVN(1, reg, (rev_tmp >> shift) & 0xffff, shift), ctx);
+       else
+               emit(A64_MOVZ(1, reg, (nrm_tmp >> shift) & 0xffff, shift), ctx);
+       shift -= 16;
+       while (shift >= 0) {
+               if (((nrm_tmp >> shift) & 0xffff) != (inverse ? 0xffff : 0x0000))
+                       emit(A64_MOVK(1, reg, (nrm_tmp >> shift) & 0xffff, shift), ctx);
+               shift -= 16;
        }
 }
 
+/*
+ * This is an unoptimized 64 immediate emission used for BPF to BPF call
+ * addresses. It will always do a full 64 bit decomposition as otherwise
+ * more complexity in the last extra pass is required since we previously
+ * reserved 4 instructions for the address.
+ */
 static inline void emit_addr_mov_i64(const int reg, const u64 val,
                                     struct jit_ctx *ctx)
 {
@@ -111,26 +153,6 @@ static inline void emit_addr_mov_i64(const int reg, const u64 val,
        }
 }
 
-static inline void emit_a64_mov_i(const int is64, const int reg,
-                                 const s32 val, struct jit_ctx *ctx)
-{
-       u16 hi = val >> 16;
-       u16 lo = val & 0xffff;
-
-       if (hi & 0x8000) {
-               if (hi == 0xffff) {
-                       emit(A64_MOVN(is64, reg, (u16)~lo, 0), ctx);
-               } else {
-                       emit(A64_MOVN(is64, reg, (u16)~hi, 16), ctx);
-                       emit(A64_MOVK(is64, reg, lo, 0), ctx);
-               }
-       } else {
-               emit(A64_MOVZ(is64, reg, lo, 0), ctx);
-               if (hi)
-                       emit(A64_MOVK(is64, reg, hi, 16), ctx);
-       }
-}
-
 static inline int bpf2a64_offset(int bpf_to, int bpf_from,
                                 const struct jit_ctx *ctx)
 {
@@ -163,7 +185,7 @@ static inline int epilogue_offset(const struct jit_ctx *ctx)
 /* Tail call offset to jump into */
 #define PROLOGUE_OFFSET 7
 
-static int build_prologue(struct jit_ctx *ctx)
+static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
 {
        const struct bpf_prog *prog = ctx->prog;
        const u8 r6 = bpf2a64[BPF_REG_6];
@@ -188,7 +210,7 @@ static int build_prologue(struct jit_ctx *ctx)
         *                        | ... | BPF prog stack
         *                        |     |
         *                        +-----+ <= (BPF_FP - prog->aux->stack_depth)
-        *                        |RSVD | JIT scratchpad
+        *                        |RSVD | padding
         * current A64_SP =>      +-----+ <= (BPF_FP - ctx->stack_size)
         *                        |     |
         *                        | ... | Function call stack
@@ -210,19 +232,19 @@ static int build_prologue(struct jit_ctx *ctx)
        /* Set up BPF prog stack base register */
        emit(A64_MOV(1, fp, A64_SP), ctx);
 
-       /* Initialize tail_call_cnt */
-       emit(A64_MOVZ(1, tcc, 0, 0), ctx);
+       if (!ebpf_from_cbpf) {
+               /* Initialize tail_call_cnt */
+               emit(A64_MOVZ(1, tcc, 0, 0), ctx);
 
-       cur_offset = ctx->idx - idx0;
-       if (cur_offset != PROLOGUE_OFFSET) {
-               pr_err_once("PROLOGUE_OFFSET = %d, expected %d!\n",
-                           cur_offset, PROLOGUE_OFFSET);
-               return -1;
+               cur_offset = ctx->idx - idx0;
+               if (cur_offset != PROLOGUE_OFFSET) {
+                       pr_err_once("PROLOGUE_OFFSET = %d, expected %d!\n",
+                                   cur_offset, PROLOGUE_OFFSET);
+                       return -1;
+               }
        }
 
-       /* 4 byte extra for skb_copy_bits buffer */
-       ctx->stack_size = prog->aux->stack_depth + 4;
-       ctx->stack_size = STACK_ALIGN(ctx->stack_size);
+       ctx->stack_size = STACK_ALIGN(prog->aux->stack_depth);
 
        /* Set up function call stack */
        emit(A64_SUB_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);
@@ -723,71 +745,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
                emit(A64_CBNZ(0, tmp3, jmp_offset), ctx);
                break;
 
-       /* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + imm)) */
-       case BPF_LD | BPF_ABS | BPF_W:
-       case BPF_LD | BPF_ABS | BPF_H:
-       case BPF_LD | BPF_ABS | BPF_B:
-       /* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + src + imm)) */
-       case BPF_LD | BPF_IND | BPF_W:
-       case BPF_LD | BPF_IND | BPF_H:
-       case BPF_LD | BPF_IND | BPF_B:
-       {
-               const u8 r0 = bpf2a64[BPF_REG_0]; /* r0 = return value */
-               const u8 r6 = bpf2a64[BPF_REG_6]; /* r6 = pointer to sk_buff */
-               const u8 fp = bpf2a64[BPF_REG_FP];
-               const u8 r1 = bpf2a64[BPF_REG_1]; /* r1: struct sk_buff *skb */
-               const u8 r2 = bpf2a64[BPF_REG_2]; /* r2: int k */
-               const u8 r3 = bpf2a64[BPF_REG_3]; /* r3: unsigned int size */
-               const u8 r4 = bpf2a64[BPF_REG_4]; /* r4: void *buffer */
-               const u8 r5 = bpf2a64[BPF_REG_5]; /* r5: void *(*func)(...) */
-               int size;
-
-               emit(A64_MOV(1, r1, r6), ctx);
-               emit_a64_mov_i(0, r2, imm, ctx);
-               if (BPF_MODE(code) == BPF_IND)
-                       emit(A64_ADD(0, r2, r2, src), ctx);
-               switch (BPF_SIZE(code)) {
-               case BPF_W:
-                       size = 4;
-                       break;
-               case BPF_H:
-                       size = 2;
-                       break;
-               case BPF_B:
-                       size = 1;
-                       break;
-               default:
-                       return -EINVAL;
-               }
-               emit_a64_mov_i64(r3, size, ctx);
-               emit(A64_SUB_I(1, r4, fp, ctx->stack_size), ctx);
-               emit_a64_mov_i64(r5, (unsigned long)bpf_load_pointer, ctx);
-               emit(A64_BLR(r5), ctx);
-               emit(A64_MOV(1, r0, A64_R(0)), ctx);
-
-               jmp_offset = epilogue_offset(ctx);
-               check_imm19(jmp_offset);
-               emit(A64_CBZ(1, r0, jmp_offset), ctx);
-               emit(A64_MOV(1, r5, r0), ctx);
-               switch (BPF_SIZE(code)) {
-               case BPF_W:
-                       emit(A64_LDR32(r0, r5, A64_ZR), ctx);
-#ifndef CONFIG_CPU_BIG_ENDIAN
-                       emit(A64_REV32(0, r0, r0), ctx);
-#endif
-                       break;
-               case BPF_H:
-                       emit(A64_LDRH(r0, r5, A64_ZR), ctx);
-#ifndef CONFIG_CPU_BIG_ENDIAN
-                       emit(A64_REV16(0, r0, r0), ctx);
-#endif
-                       break;
-               case BPF_B:
-                       emit(A64_LDRB(r0, r5, A64_ZR), ctx);
-                       break;
-               }
-               break;
-       }
        default:
                pr_err_once("unknown opcode %02x\n", code);
                return -EINVAL;
@@ -851,6 +808,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
        struct bpf_prog *tmp, *orig_prog = prog;
        struct bpf_binary_header *header;
        struct arm64_jit_data *jit_data;
+       bool was_classic = bpf_prog_was_classic(prog);
        bool tmp_blinded = false;
        bool extra_pass = false;
        struct jit_ctx ctx;
@@ -905,7 +863,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
                goto out_off;
        }
 
-       if (build_prologue(&ctx)) {
+       if (build_prologue(&ctx, was_classic)) {
                prog = orig_prog;
                goto out_off;
        }
@@ -928,7 +886,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 skip_init_ctx:
        ctx.idx = 0;
 
-       build_prologue(&ctx);
+       build_prologue(&ctx, was_classic);
 
        if (build_body(&ctx)) {
                bpf_jit_binary_free(header);
index 9e8621d94ee9c0ecece44aa76f93810fea1e70fb..e17262ad125e7ee0c48be297dfff08dc50f2d5e6 100644 (file)
@@ -216,6 +216,12 @@ static inline void memcpy_toio(volatile void __iomem *dst, const void *src,
        memcpy((void *) dst, src, count);
 }
 
+static inline void memset_io(volatile void __iomem *addr, int value,
+                            size_t size)
+{
+       memset((void __force *)addr, value, size);
+}
+
 #define PCI_IO_ADDR    (volatile void __iomem *)
 
 /*
index 617506d1a5596679b19b27af252c3199e61ab1f1..7cd0a2259269659b2b90222c074d06d18bc5b907 100644 (file)
@@ -199,3 +199,4 @@ csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum)
        memcpy(dst, src, len);
        return csum_partial(dst, len, sum);
 }
+EXPORT_SYMBOL(csum_partial_copy_nocheck);
index b3043c08f7694244604af5a09065800e7a97f587..aee8d7b8f09143fd8e4ce30a9552bf827ec357e4 100644 (file)
@@ -18,9 +18,9 @@
 #define PORT(offset) (CKSEG1ADDR(AR7_REGS_UART0) + (4 * offset))
 #endif
 
-#if defined(CONFIG_MACH_JZ4740) || defined(CONFIG_MACH_JZ4780)
-#include <asm/mach-jz4740/base.h>
-#define PORT(offset) (CKSEG1ADDR(JZ4740_UART0_BASE_ADDR) + (4 * offset))
+#ifdef CONFIG_MACH_INGENIC
+#define INGENIC_UART0_BASE_ADDR        0x10030000
+#define PORT(offset) (CKSEG1ADDR(INGENIC_UART0_BASE_ADDR) + (4 * offset))
 #endif
 
 #ifdef CONFIG_CPU_XLR
index 9987e0e378c50c6f19eb0457eae914f827f9688b..69ca00590b8de6cbf1b3fb3397e8e4116c690ebf 100644 (file)
@@ -1,4 +1,2 @@
 # SPDX-License-Identifier: GPL-2.0
 dtb-$(CONFIG_FIT_IMAGE_FDT_XILFPGA)    += nexys4ddr.dtb
-
-obj-y                          += $(patsubst %.dtb, %.dtb.o, $(dtb-y))
index b51432dd10b6fa3789ff01b1d0b902ac200fe89d..0dd0d5d460a5fc7988b03f856f47563589d7e995 100644 (file)
@@ -16,3 +16,4 @@ all-$(CONFIG_MIPS_GENERIC)    := vmlinux.gz.itb
 its-y                                  := vmlinux.its.S
 its-$(CONFIG_FIT_IMAGE_FDT_BOSTON)     += board-boston.its.S
 its-$(CONFIG_FIT_IMAGE_FDT_NI169445)   += board-ni169445.its.S
+its-$(CONFIG_FIT_IMAGE_FDT_XILFPGA)    += board-xilfpga.its.S
index 0b23b1ad99e65f1e21d1810340f9dd306483b8d3..8d098b9f395c13746a4f0855f54a6303bfc21098 100644 (file)
@@ -463,7 +463,7 @@ static int fpr_get_msa(struct task_struct *target,
 /*
  * Copy the floating-point context to the supplied NT_PRFPREG buffer.
  * Choose the appropriate helper for general registers, and then copy
- * the FCSR register separately.
+ * the FCSR and FIR registers separately.
  */
 static int fpr_get(struct task_struct *target,
                   const struct user_regset *regset,
@@ -471,6 +471,7 @@ static int fpr_get(struct task_struct *target,
                   void *kbuf, void __user *ubuf)
 {
        const int fcr31_pos = NUM_FPU_REGS * sizeof(elf_fpreg_t);
+       const int fir_pos = fcr31_pos + sizeof(u32);
        int err;
 
        if (sizeof(target->thread.fpu.fpr[0]) == sizeof(elf_fpreg_t))
@@ -483,6 +484,12 @@ static int fpr_get(struct task_struct *target,
        err = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
                                  &target->thread.fpu.fcr31,
                                  fcr31_pos, fcr31_pos + sizeof(u32));
+       if (err)
+               return err;
+
+       err = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+                                 &boot_cpu_data.fpu_id,
+                                 fir_pos, fir_pos + sizeof(u32));
 
        return err;
 }
@@ -531,7 +538,8 @@ static int fpr_set_msa(struct task_struct *target,
 /*
  * Copy the supplied NT_PRFPREG buffer to the floating-point context.
  * Choose the appropriate helper for general registers, and then copy
- * the FCSR register separately.
+ * the FCSR register separately.  Ignore the incoming FIR register
+ * contents though, as the register is read-only.
  *
  * We optimize for the case where `count % sizeof(elf_fpreg_t) == 0',
  * which is supposed to have been guaranteed by the kernel before
@@ -545,6 +553,7 @@ static int fpr_set(struct task_struct *target,
                   const void *kbuf, const void __user *ubuf)
 {
        const int fcr31_pos = NUM_FPU_REGS * sizeof(elf_fpreg_t);
+       const int fir_pos = fcr31_pos + sizeof(u32);
        u32 fcr31;
        int err;
 
@@ -572,6 +581,11 @@ static int fpr_set(struct task_struct *target,
                ptrace_setfcr31(target, fcr31);
        }
 
+       if (count > 0)
+               err = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+                                               fir_pos,
+                                               fir_pos + sizeof(u32));
+
        return err;
 }
 
@@ -793,7 +807,7 @@ long arch_ptrace(struct task_struct *child, long request,
                        fregs = get_fpu_regs(child);
 
 #ifdef CONFIG_32BIT
-                       if (test_thread_flag(TIF_32BIT_FPREGS)) {
+                       if (test_tsk_thread_flag(child, TIF_32BIT_FPREGS)) {
                                /*
                                 * The odd registers are actually the high
                                 * order bits of the values stored in the even
@@ -888,7 +902,7 @@ long arch_ptrace(struct task_struct *child, long request,
 
                        init_fp_ctx(child);
 #ifdef CONFIG_32BIT
-                       if (test_thread_flag(TIF_32BIT_FPREGS)) {
+                       if (test_tsk_thread_flag(child, TIF_32BIT_FPREGS)) {
                                /*
                                 * The odd registers are actually the high
                                 * order bits of the values stored in the even
index 2b9260f92ccd3019fe3d733c96a631faa7f59e2b..656a137c1fe2c4dcaa9fb410d5787c9e9b39ce1f 100644 (file)
@@ -99,7 +99,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
                                break;
                        }
                        fregs = get_fpu_regs(child);
-                       if (test_thread_flag(TIF_32BIT_FPREGS)) {
+                       if (test_tsk_thread_flag(child, TIF_32BIT_FPREGS)) {
                                /*
                                 * The odd registers are actually the high
                                 * order bits of the values stored in the even
@@ -212,7 +212,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
                                       sizeof(child->thread.fpu));
                                child->thread.fpu.fcr31 = 0;
                        }
-                       if (test_thread_flag(TIF_32BIT_FPREGS)) {
+                       if (test_tsk_thread_flag(child, TIF_32BIT_FPREGS)) {
                                /*
                                 * The odd registers are actually the high
                                 * order bits of the values stored in the even
index 2549fdd27ee16842c1ce7dd2bd422f27a2d3a769..0f725e9cee8f69230ca7ddff5f6023c30294395c 100644 (file)
@@ -45,7 +45,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
        { "cache",        VCPU_STAT(cache_exits),        KVM_STAT_VCPU },
        { "signal",       VCPU_STAT(signal_exits),       KVM_STAT_VCPU },
        { "interrupt",    VCPU_STAT(int_exits),          KVM_STAT_VCPU },
-       { "cop_unsuable", VCPU_STAT(cop_unusable_exits), KVM_STAT_VCPU },
+       { "cop_unusable", VCPU_STAT(cop_unusable_exits), KVM_STAT_VCPU },
        { "tlbmod",       VCPU_STAT(tlbmod_exits),       KVM_STAT_VCPU },
        { "tlbmiss_ld",   VCPU_STAT(tlbmiss_ld_exits),   KVM_STAT_VCPU },
        { "tlbmiss_st",   VCPU_STAT(tlbmiss_st_exits),   KVM_STAT_VCPU },
index 6f534b2099717da8c2d7be70bfa035a05ed5aede..e12dfa48b478dd3ec51369236bb84040c044bd82 100644 (file)
@@ -851,9 +851,12 @@ static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size)
        /*
         * Either no secondary cache or the available caches don't have the
         * subset property so we have to flush the primary caches
-        * explicitly
+        * explicitly.
+        * If we would need IPI to perform an INDEX-type operation, then
+        * we have to use the HIT-type alternative as IPI cannot be used
+        * here due to interrupts possibly being disabled.
         */
-       if (size >= dcache_size) {
+       if (!r4k_op_needs_ipi(R4K_INDEX) && size >= dcache_size) {
                r4k_blast_dcache();
        } else {
                R4600_HIT_CACHEOP_WAR_IMPL;
@@ -890,7 +893,7 @@ static void r4k_dma_cache_inv(unsigned long addr, unsigned long size)
                return;
        }
 
-       if (size >= dcache_size) {
+       if (!r4k_op_needs_ipi(R4K_INDEX) && size >= dcache_size) {
                r4k_blast_dcache();
        } else {
                R4600_HIT_CACHEOP_WAR_IMPL;
index 3e2798bfea4f5849fe1cac82352f8e47569b8039..aeb7b1b0f2024e1de01c25c14566d7f3e71c03e4 100644 (file)
@@ -95,7 +95,6 @@ enum reg_val_type {
  * struct jit_ctx - JIT context
  * @skf:               The sk_filter
  * @stack_size:                eBPF stack size
- * @tmp_offset:                eBPF $sp offset to 8-byte temporary memory
  * @idx:               Instruction index
  * @flags:             JIT flags
  * @offsets:           Instruction offsets
@@ -105,7 +104,6 @@ enum reg_val_type {
 struct jit_ctx {
        const struct bpf_prog *skf;
        int stack_size;
-       int tmp_offset;
        u32 idx;
        u32 flags;
        u32 *offsets;
@@ -293,7 +291,6 @@ static int gen_int_prologue(struct jit_ctx *ctx)
        locals_size = (ctx->flags & EBPF_SEEN_FP) ? MAX_BPF_STACK : 0;
 
        stack_adjust += locals_size;
-       ctx->tmp_offset = locals_size;
 
        ctx->stack_size = stack_adjust;
 
@@ -399,7 +396,6 @@ static void gen_imm_to_reg(const struct bpf_insn *insn, int reg,
                emit_instr(ctx, lui, reg, upper >> 16);
                emit_instr(ctx, addiu, reg, reg, lower);
        }
-
 }
 
 static int gen_imm_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
@@ -547,28 +543,6 @@ static int gen_imm_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
        return 0;
 }
 
-static void * __must_check
-ool_skb_header_pointer(const struct sk_buff *skb, int offset,
-                      int len, void *buffer)
-{
-       return skb_header_pointer(skb, offset, len, buffer);
-}
-
-static int size_to_len(const struct bpf_insn *insn)
-{
-       switch (BPF_SIZE(insn->code)) {
-       case BPF_B:
-               return 1;
-       case BPF_H:
-               return 2;
-       case BPF_W:
-               return 4;
-       case BPF_DW:
-               return 8;
-       }
-       return 0;
-}
-
 static void emit_const_to_reg(struct jit_ctx *ctx, int dst, u64 value)
 {
        if (value >= 0xffffffffffff8000ull || value < 0x8000ull) {
@@ -1267,110 +1241,6 @@ static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
                        return -EINVAL;
                break;
 
-       case BPF_LD | BPF_B | BPF_ABS:
-       case BPF_LD | BPF_H | BPF_ABS:
-       case BPF_LD | BPF_W | BPF_ABS:
-       case BPF_LD | BPF_DW | BPF_ABS:
-               ctx->flags |= EBPF_SAVE_RA;
-
-               gen_imm_to_reg(insn, MIPS_R_A1, ctx);
-               emit_instr(ctx, addiu, MIPS_R_A2, MIPS_R_ZERO, size_to_len(insn));
-
-               if (insn->imm < 0) {
-                       emit_const_to_reg(ctx, MIPS_R_T9, (u64)bpf_internal_load_pointer_neg_helper);
-               } else {
-                       emit_const_to_reg(ctx, MIPS_R_T9, (u64)ool_skb_header_pointer);
-                       emit_instr(ctx, daddiu, MIPS_R_A3, MIPS_R_SP, ctx->tmp_offset);
-               }
-               goto ld_skb_common;
-
-       case BPF_LD | BPF_B | BPF_IND:
-       case BPF_LD | BPF_H | BPF_IND:
-       case BPF_LD | BPF_W | BPF_IND:
-       case BPF_LD | BPF_DW | BPF_IND:
-               ctx->flags |= EBPF_SAVE_RA;
-               src = ebpf_to_mips_reg(ctx, insn, src_reg_no_fp);
-               if (src < 0)
-                       return src;
-               ts = get_reg_val_type(ctx, this_idx, insn->src_reg);
-               if (ts == REG_32BIT_ZERO_EX) {
-                       /* sign extend */
-                       emit_instr(ctx, sll, MIPS_R_A1, src, 0);
-                       src = MIPS_R_A1;
-               }
-               if (insn->imm >= S16_MIN && insn->imm <= S16_MAX) {
-                       emit_instr(ctx, daddiu, MIPS_R_A1, src, insn->imm);
-               } else {
-                       gen_imm_to_reg(insn, MIPS_R_AT, ctx);
-                       emit_instr(ctx, daddu, MIPS_R_A1, MIPS_R_AT, src);
-               }
-               /* truncate to 32-bit int */
-               emit_instr(ctx, sll, MIPS_R_A1, MIPS_R_A1, 0);
-               emit_instr(ctx, daddiu, MIPS_R_A3, MIPS_R_SP, ctx->tmp_offset);
-               emit_instr(ctx, slt, MIPS_R_AT, MIPS_R_A1, MIPS_R_ZERO);
-
-               emit_const_to_reg(ctx, MIPS_R_T8, (u64)bpf_internal_load_pointer_neg_helper);
-               emit_const_to_reg(ctx, MIPS_R_T9, (u64)ool_skb_header_pointer);
-               emit_instr(ctx, addiu, MIPS_R_A2, MIPS_R_ZERO, size_to_len(insn));
-               emit_instr(ctx, movn, MIPS_R_T9, MIPS_R_T8, MIPS_R_AT);
-
-ld_skb_common:
-               emit_instr(ctx, jalr, MIPS_R_RA, MIPS_R_T9);
-               /* delay slot move */
-               emit_instr(ctx, daddu, MIPS_R_A0, MIPS_R_S0, MIPS_R_ZERO);
-
-               /* Check the error value */
-               b_off = b_imm(exit_idx, ctx);
-               if (is_bad_offset(b_off)) {
-                       target = j_target(ctx, exit_idx);
-                       if (target == (unsigned int)-1)
-                               return -E2BIG;
-
-                       if (!(ctx->offsets[this_idx] & OFFSETS_B_CONV)) {
-                               ctx->offsets[this_idx] |= OFFSETS_B_CONV;
-                               ctx->long_b_conversion = 1;
-                       }
-                       emit_instr(ctx, bne, MIPS_R_V0, MIPS_R_ZERO, 4 * 3);
-                       emit_instr(ctx, nop);
-                       emit_instr(ctx, j, target);
-                       emit_instr(ctx, nop);
-               } else {
-                       emit_instr(ctx, beq, MIPS_R_V0, MIPS_R_ZERO, b_off);
-                       emit_instr(ctx, nop);
-               }
-
-#ifdef __BIG_ENDIAN
-               need_swap = false;
-#else
-               need_swap = true;
-#endif
-               dst = MIPS_R_V0;
-               switch (BPF_SIZE(insn->code)) {
-               case BPF_B:
-                       emit_instr(ctx, lbu, dst, 0, MIPS_R_V0);
-                       break;
-               case BPF_H:
-                       emit_instr(ctx, lhu, dst, 0, MIPS_R_V0);
-                       if (need_swap)
-                               emit_instr(ctx, wsbh, dst, dst);
-                       break;
-               case BPF_W:
-                       emit_instr(ctx, lw, dst, 0, MIPS_R_V0);
-                       if (need_swap) {
-                               emit_instr(ctx, wsbh, dst, dst);
-                               emit_instr(ctx, rotr, dst, dst, 16);
-                       }
-                       break;
-               case BPF_DW:
-                       emit_instr(ctx, ld, dst, 0, MIPS_R_V0);
-                       if (need_swap) {
-                               emit_instr(ctx, dsbh, dst, dst);
-                               emit_instr(ctx, dshd, dst, dst);
-                       }
-                       break;
-               }
-
-               break;
        case BPF_ALU | BPF_END | BPF_FROM_BE:
        case BPF_ALU | BPF_END | BPF_FROM_LE:
                dst = ebpf_to_mips_reg(ctx, insn, dst_reg);
index e2364ff591809287a227ba32e6533a911b48032a..34ac503e28add6c2e47d0e90bf3230f06eab64ed 100644 (file)
@@ -123,6 +123,9 @@ INSTALL_TARGETS = zinstall install
 
 PHONY += bzImage $(BOOT_TARGETS) $(INSTALL_TARGETS)
 
+# Default kernel to build
+all: bzImage
+
 zImage: vmlinuz
 Image: vmlinux
 
index 3b8507f710507dcd141d0d11f550d53722a0bd7c..e0e1c9775c320b46d85da0f2e6ce22bc2275b9fb 100644 (file)
@@ -268,7 +268,7 @@ static struct parisc_device *find_device_by_addr(unsigned long hpa)
  * Walks up the device tree looking for a device of the specified type.
  * If it finds it, it returns it.  If not, it returns NULL.
  */
-const struct parisc_device * __init
+const struct parisc_device *
 find_pa_parent_type(const struct parisc_device *padev, int type)
 {
        const struct device *dev = &padev->dev;
@@ -448,7 +448,8 @@ static int match_by_id(struct device * dev, void * data)
  * Checks all the children of @parent for a matching @id.  If none
  * found, it allocates a new device and returns it.
  */
-static struct parisc_device * alloc_tree_node(struct device *parent, char id)
+static struct parisc_device * __init alloc_tree_node(
+                       struct device *parent, char id)
 {
        struct match_id_data d = {
                .id = id,
@@ -825,8 +826,8 @@ static void walk_lower_bus(struct parisc_device *dev)
  * devices which are not physically connected (such as extra serial &
  * keyboard ports).  This problem is not yet solved.
  */
-static void walk_native_bus(unsigned long io_io_low, unsigned long io_io_high,
-                            struct device *parent)
+static void __init walk_native_bus(unsigned long io_io_low,
+       unsigned long io_io_high, struct device *parent)
 {
        int i, devices_found = 0;
        unsigned long hpa = io_io_low;
index 13ee3569959acf7bf0a161fe2ae8beca32533740..ae684ac6efb6e6d3f5b5284dda279eda54dea311 100644 (file)
@@ -174,7 +174,7 @@ void pcibios_set_master(struct pci_dev *dev)
  * pcibios_init_bridge() initializes cache line and default latency
  * for pci controllers and pci-pci bridges
  */
-void __init pcibios_init_bridge(struct pci_dev *dev)
+void __ref pcibios_init_bridge(struct pci_dev *dev)
 {
        unsigned short bridge_ctl, bridge_ctl_new;
 
index 4065b5e48c9d68e70b38da3743e219e02934fe5b..5e26dbede5fc23d37f734e1511bc405207d37266 100644 (file)
@@ -423,8 +423,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
 }
 
 #ifdef CONFIG_PROC_FS
-int __init
-setup_profiling_timer(unsigned int multiplier)
+int setup_profiling_timer(unsigned int multiplier)
 {
        return -EINVAL;
 }
index c3830400ca28ef1c37f7e9909b1bd7cc011c27af..a1e772f909cbf4c2f0c1bd361f968221de3cb0a0 100644 (file)
@@ -205,7 +205,7 @@ static int __init rtc_init(void)
 device_initcall(rtc_init);
 #endif
 
-void read_persistent_clock(struct timespec *ts)
+void read_persistent_clock64(struct timespec64 *ts)
 {
        static struct pdc_tod tod_data;
        if (pdc_tod_read(&tod_data) == 0) {
index 68e671a11987a6f2fe3de6823e8e3e1ed497334d..71d31274d782eecb7708b915d1ac15410e933c62 100644 (file)
@@ -837,6 +837,17 @@ void __init initialize_ivt(const void *iva)
        if (pdc_instr(&instr) == PDC_OK)
                ivap[0] = instr;
 
+       /*
+        * Rules for the checksum of the HPMC handler:
+        * 1. The IVA does not point to PDC/PDH space (ie: the OS has installed
+        *    its own IVA).
+        * 2. The word at IVA + 32 is nonzero.
+        * 3. If Length (IVA + 60) is not zero, then Length (IVA + 60) and
+        *    Address (IVA + 56) are word-aligned.
+        * 4. The checksum of the 8 words starting at IVA + 32 plus the sum of
+        *    the Length/4 words starting at Address is zero.
+        */
+
        /* Compute Checksum for HPMC handler */
        length = os_hpmc_size;
        ivap[7] = length;
index cab32ee824d2ac4b7fe9adf4f3bb25533cc043c6..2607d2d33405fb422ca7ef1e9bf9b0a0df6f3aa9 100644 (file)
@@ -516,7 +516,7 @@ static void __init map_pages(unsigned long start_vaddr,
        }
 }
 
-void free_initmem(void)
+void __ref free_initmem(void)
 {
        unsigned long init_begin = (unsigned long)__init_begin;
        unsigned long init_end = (unsigned long)__init_end;
index 9abddde372abfbb11934f93da2e39924fe4116d3..b2dabd06659dd0b4cce575e0e00336f9d5f36cb6 100644 (file)
@@ -69,17 +69,30 @@ struct dyn_arch_ftrace {
 #endif
 
 #if defined(CONFIG_FTRACE_SYSCALLS) && !defined(__ASSEMBLY__)
-#ifdef PPC64_ELF_ABI_v1
+/*
+ * Some syscall entry functions on powerpc start with "ppc_" (fork and clone,
+ * for instance) or ppc32_/ppc64_. We should also match the sys_ variant with
+ * those.
+ */
 #define ARCH_HAS_SYSCALL_MATCH_SYM_NAME
+#ifdef PPC64_ELF_ABI_v1
+static inline bool arch_syscall_match_sym_name(const char *sym, const char *name)
+{
+       /* We need to skip past the initial dot, and the __se_sys alias */
+       return !strcmp(sym + 1, name) ||
+               (!strncmp(sym, ".__se_sys", 9) && !strcmp(sym + 6, name)) ||
+               (!strncmp(sym, ".ppc_", 5) && !strcmp(sym + 5, name + 4)) ||
+               (!strncmp(sym, ".ppc32_", 7) && !strcmp(sym + 7, name + 4)) ||
+               (!strncmp(sym, ".ppc64_", 7) && !strcmp(sym + 7, name + 4));
+}
+#else
 static inline bool arch_syscall_match_sym_name(const char *sym, const char *name)
 {
-       /*
-        * Compare the symbol name with the system call name. Skip the .sys or .SyS
-        * prefix from the symbol name and the sys prefix from the system call name and
-        * just match the rest. This is only needed on ppc64 since symbol names on
-        * 32bit do not start with a period so the generic function will work.
-        */
-       return !strcmp(sym + 4, name + 3);
+       return !strcmp(sym, name) ||
+               (!strncmp(sym, "__se_sys", 8) && !strcmp(sym + 5, name)) ||
+               (!strncmp(sym, "ppc_", 4) && !strcmp(sym + 4, name + 4)) ||
+               (!strncmp(sym, "ppc32_", 6) && !strcmp(sym + 6, name + 4)) ||
+               (!strncmp(sym, "ppc64_", 6) && !strcmp(sym + 6, name + 4));
 }
 #endif
 #endif /* CONFIG_FTRACE_SYSCALLS && !__ASSEMBLY__ */
index 4185f1c9612501b51ffdafc2c8f125d1fa6ee1f3..3f109a3e3edb226578b37de27c8ffde14e9748e2 100644 (file)
@@ -165,7 +165,6 @@ struct paca_struct {
        u64 saved_msr;                  /* MSR saved here by enter_rtas */
        u16 trap_save;                  /* Used when bad stack is encountered */
        u8 irq_soft_mask;               /* mask for irq soft masking */
-       u8 soft_enabled;                /* irq soft-enable flag */
        u8 irq_happened;                /* irq happened while soft-disabled */
        u8 io_sync;                     /* writel() needs spin_unlock sync */
        u8 irq_work_pending;            /* IRQ_WORK interrupt while soft-disable */
index d1c2d2e658cf4d5b1d3c3718a377efa345067128..2f3ff7a278815a131f9ebec73bffcdaedf0abab3 100644 (file)
@@ -15,7 +15,7 @@
 extern void powernv_set_nmmu_ptcr(unsigned long ptcr);
 extern struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
                        unsigned long flags,
-                       struct npu_context *(*cb)(struct npu_context *, void *),
+                       void (*cb)(struct npu_context *, void *),
                        void *priv);
 extern void pnv_npu2_destroy_context(struct npu_context *context,
                                struct pci_dev *gpdev);
index 9f421641a35c8240cbacf192f6a1b22b4f33c63c..16b077801a5f97125f48cc9d597580cef7f8f65d 100644 (file)
@@ -91,6 +91,7 @@ extern int start_topology_update(void);
 extern int stop_topology_update(void);
 extern int prrn_is_enabled(void);
 extern int find_and_online_cpu_nid(int cpu);
+extern int timed_topology_update(int nsecs);
 #else
 static inline int start_topology_update(void)
 {
@@ -108,16 +109,12 @@ static inline int find_and_online_cpu_nid(int cpu)
 {
        return 0;
 }
+static inline int timed_topology_update(int nsecs)
+{
+       return 0;
+}
 #endif /* CONFIG_NUMA && CONFIG_PPC_SPLPAR */
 
-#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_NEED_MULTIPLE_NODES)
-#if defined(CONFIG_PPC_SPLPAR)
-extern int timed_topology_update(int nsecs);
-#else
-#define        timed_topology_update(nsecs)
-#endif /* CONFIG_PPC_SPLPAR */
-#endif /* CONFIG_HOTPLUG_CPU || CONFIG_NEED_MULTIPLE_NODES */
-
 #include <asm-generic/topology.h>
 
 #ifdef CONFIG_SMP
index fe6fc63251fec70e7cf2dee7f6deab47fe8ab256..38c5b4764bfed0e0aeb647418f53875bb0f7b2a0 100644 (file)
@@ -441,7 +441,6 @@ static int mce_handle_ierror(struct pt_regs *regs,
                                        if (pfn != ULONG_MAX) {
                                                *phys_addr =
                                                        (pfn << PAGE_SHIFT);
-                                               handled = 1;
                                        }
                                }
                        }
@@ -532,9 +531,7 @@ static int mce_handle_derror(struct pt_regs *regs,
                         * kernel/exception-64s.h
                         */
                        if (get_paca()->in_mce < MAX_MCE_DEPTH)
-                               if (!mce_find_instr_ea_and_pfn(regs, addr,
-                                                               phys_addr))
-                                       handled = 1;
+                               mce_find_instr_ea_and_pfn(regs, addr, phys_addr);
                }
                found = 1;
        }
@@ -572,7 +569,7 @@ static long mce_handle_error(struct pt_regs *regs,
                const struct mce_ierror_table itable[])
 {
        struct mce_error_info mce_err = { 0 };
-       uint64_t addr, phys_addr;
+       uint64_t addr, phys_addr = ULONG_MAX;
        uint64_t srr1 = regs->msr;
        long handled;
 
index e16ec7b3b427ea2d2ce2ca0a1229b16df51dc0e1..9ca7148b5881a098abf7bf0a3eff99536f99a7ec 100644 (file)
@@ -566,10 +566,35 @@ void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *))
 #endif
 
 #ifdef CONFIG_NMI_IPI
-static void stop_this_cpu(struct pt_regs *regs)
-#else
+static void nmi_stop_this_cpu(struct pt_regs *regs)
+{
+       /*
+        * This is a special case because it never returns, so the NMI IPI
+        * handling would never mark it as done, which makes any later
+        * smp_send_nmi_ipi() call spin forever. Mark it done now.
+        *
+        * IRQs are already hard disabled by the smp_handle_nmi_ipi.
+        */
+       nmi_ipi_lock();
+       nmi_ipi_busy_count--;
+       nmi_ipi_unlock();
+
+       /* Remove this CPU */
+       set_cpu_online(smp_processor_id(), false);
+
+       spin_begin();
+       while (1)
+               spin_cpu_relax();
+}
+
+void smp_send_stop(void)
+{
+       smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, nmi_stop_this_cpu, 1000000);
+}
+
+#else /* CONFIG_NMI_IPI */
+
 static void stop_this_cpu(void *dummy)
-#endif
 {
        /* Remove this CPU */
        set_cpu_online(smp_processor_id(), false);
@@ -582,12 +607,22 @@ static void stop_this_cpu(void *dummy)
 
 void smp_send_stop(void)
 {
-#ifdef CONFIG_NMI_IPI
-       smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, stop_this_cpu, 1000000);
-#else
+       static bool stopped = false;
+
+       /*
+        * Prevent waiting on csd lock from a previous smp_send_stop.
+        * This is racy, but in general callers try to do the right
+        * thing and only fire off one smp_send_stop (e.g., see
+        * kernel/panic.c)
+        */
+       if (stopped)
+               return;
+
+       stopped = true;
+
        smp_call_function(stop_this_cpu, NULL, 0);
-#endif
 }
+#endif /* CONFIG_NMI_IPI */
 
 struct thread_info *current_set[NR_CPUS];
 
index 6038e2e7aee03c2b29edb65624be370f01a6c928..876d4f294fdd89fc160439073a3c04c9c8e81174 100644 (file)
@@ -305,6 +305,13 @@ void kvmppc_core_queue_fpunavail(struct kvm_vcpu *vcpu)
        kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_FP_UNAVAIL);
 }
 
+#ifdef CONFIG_ALTIVEC
+void kvmppc_core_queue_vec_unavail(struct kvm_vcpu *vcpu)
+{
+       kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ALTIVEC_UNAVAIL);
+}
+#endif
+
 void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu)
 {
        kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DECREMENTER);
index 737f8a4632ccc68abfdd61a1900380834b125340..c3c39b02b2ba7b0ae58df5bf293103b382b2b708 100644 (file)
@@ -133,6 +133,7 @@ int __meminit arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *
                        start, start + size, rc);
                return -EFAULT;
        }
+       flush_inval_dcache_range(start, start + size);
 
        return __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock);
 }
@@ -159,6 +160,7 @@ int __meminit arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap
 
        /* Remove htab bolted mappings for this section of memory */
        start = (unsigned long)__va(start);
+       flush_inval_dcache_range(start, start + size);
        ret = remove_section_mapping(start, start + size);
 
        /* Ensure all vmalloc mappings are flushed in case they also
index 02d369ca6a53b2ffa1dae4de42c8e9cb902efa39..809f019d3cba5511eee5fc8af2a8cbbc274ac741 100644 (file)
@@ -3,7 +3,7 @@
 # Arch-specific network modules
 #
 ifeq ($(CONFIG_PPC64),y)
-obj-$(CONFIG_BPF_JIT) += bpf_jit_asm64.o bpf_jit_comp64.o
+obj-$(CONFIG_BPF_JIT) += bpf_jit_comp64.o
 else
 obj-$(CONFIG_BPF_JIT) += bpf_jit_asm.o bpf_jit_comp.o
 endif
index 8bdef7ed28a831353d1d6b17499f54f439aff191..3609be4692b35e948f3ceff98c39c1b3bdaea744 100644 (file)
@@ -20,7 +20,7 @@
  * with our redzone usage.
  *
  *             [       prev sp         ] <-------------
- *             [   nv gpr save area    ] 8*8           |
+ *             [   nv gpr save area    ] 6*8           |
  *             [    tail_call_cnt      ] 8             |
  *             [    local_tmp_var      ] 8             |
  * fp (r31) -->        [   ebpf stack space    ] upto 512      |
@@ -28,8 +28,8 @@
  * sp (r1) --->        [    stack pointer      ] --------------
  */
 
-/* for gpr non volatile registers BPG_REG_6 to 10, plus skb cache registers */
-#define BPF_PPC_STACK_SAVE     (8*8)
+/* for gpr non volatile registers BPG_REG_6 to 10 */
+#define BPF_PPC_STACK_SAVE     (6*8)
 /* for bpf JIT code internal usage */
 #define BPF_PPC_STACK_LOCALS   16
 /* stack frame excluding BPF stack, ensure this is quadword aligned */
 #ifndef __ASSEMBLY__
 
 /* BPF register usage */
-#define SKB_HLEN_REG   (MAX_BPF_JIT_REG + 0)
-#define SKB_DATA_REG   (MAX_BPF_JIT_REG + 1)
-#define TMP_REG_1      (MAX_BPF_JIT_REG + 2)
-#define TMP_REG_2      (MAX_BPF_JIT_REG + 3)
+#define TMP_REG_1      (MAX_BPF_JIT_REG + 0)
+#define TMP_REG_2      (MAX_BPF_JIT_REG + 1)
 
 /* BPF to ppc register mappings */
 static const int b2p[] = {
@@ -63,40 +61,23 @@ static const int b2p[] = {
        [BPF_REG_FP] = 31,
        /* eBPF jit internal registers */
        [BPF_REG_AX] = 2,
-       [SKB_HLEN_REG] = 25,
-       [SKB_DATA_REG] = 26,
        [TMP_REG_1] = 9,
        [TMP_REG_2] = 10
 };
 
-/* PPC NVR range -- update this if we ever use NVRs below r24 */
-#define BPF_PPC_NVR_MIN                24
-
-/* Assembly helpers */
-#define DECLARE_LOAD_FUNC(func)        u64 func(u64 r3, u64 r4);                       \
-                               u64 func##_negative_offset(u64 r3, u64 r4);     \
-                               u64 func##_positive_offset(u64 r3, u64 r4);
-
-DECLARE_LOAD_FUNC(sk_load_word);
-DECLARE_LOAD_FUNC(sk_load_half);
-DECLARE_LOAD_FUNC(sk_load_byte);
-
-#define CHOOSE_LOAD_FUNC(imm, func)                                            \
-                       (imm < 0 ?                                              \
-                       (imm >= SKF_LL_OFF ? func##_negative_offset : func) :   \
-                       func##_positive_offset)
+/* PPC NVR range -- update this if we ever use NVRs below r27 */
+#define BPF_PPC_NVR_MIN                27
 
 #define SEEN_FUNC      0x1000 /* might call external helpers */
 #define SEEN_STACK     0x2000 /* uses BPF stack */
-#define SEEN_SKB       0x4000 /* uses sk_buff */
-#define SEEN_TAILCALL  0x8000 /* uses tail calls */
+#define SEEN_TAILCALL  0x4000 /* uses tail calls */
 
 struct codegen_context {
        /*
         * This is used to track register usage as well
         * as calls to external helpers.
         * - register usage is tracked with corresponding
-        *   bits (r3-r10 and r25-r31)
+        *   bits (r3-r10 and r27-r31)
         * - rest of the bits can be used to track other
         *   things -- for now, we use bits 16 to 23
         *   encoded in SEEN_* macros above
diff --git a/arch/powerpc/net/bpf_jit_asm64.S b/arch/powerpc/net/bpf_jit_asm64.S
deleted file mode 100644 (file)
index 7e4c514..0000000
+++ /dev/null
@@ -1,180 +0,0 @@
-/*
- * bpf_jit_asm64.S: Packet/header access helper functions
- * for PPC64 BPF compiler.
- *
- * Copyright 2016, Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
- *                IBM Corporation
- *
- * Based on bpf_jit_asm.S by Matt Evans
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
- */
-
-#include <asm/ppc_asm.h>
-#include <asm/ptrace.h>
-#include "bpf_jit64.h"
-
-/*
- * All of these routines are called directly from generated code,
- * with the below register usage:
- * r27         skb pointer (ctx)
- * r25         skb header length
- * r26         skb->data pointer
- * r4          offset
- *
- * Result is passed back in:
- * r8          data read in host endian format (accumulator)
- *
- * r9 is used as a temporary register
- */
-
-#define r_skb  r27
-#define r_hlen r25
-#define r_data r26
-#define r_off  r4
-#define r_val  r8
-#define r_tmp  r9
-
-_GLOBAL_TOC(sk_load_word)
-       cmpdi   r_off, 0
-       blt     bpf_slow_path_word_neg
-       b       sk_load_word_positive_offset
-
-_GLOBAL_TOC(sk_load_word_positive_offset)
-       /* Are we accessing past headlen? */
-       subi    r_tmp, r_hlen, 4
-       cmpd    r_tmp, r_off
-       blt     bpf_slow_path_word
-       /* Nope, just hitting the header.  cr0 here is eq or gt! */
-       LWZX_BE r_val, r_data, r_off
-       blr     /* Return success, cr0 != LT */
-
-_GLOBAL_TOC(sk_load_half)
-       cmpdi   r_off, 0
-       blt     bpf_slow_path_half_neg
-       b       sk_load_half_positive_offset
-
-_GLOBAL_TOC(sk_load_half_positive_offset)
-       subi    r_tmp, r_hlen, 2
-       cmpd    r_tmp, r_off
-       blt     bpf_slow_path_half
-       LHZX_BE r_val, r_data, r_off
-       blr
-
-_GLOBAL_TOC(sk_load_byte)
-       cmpdi   r_off, 0
-       blt     bpf_slow_path_byte_neg
-       b       sk_load_byte_positive_offset
-
-_GLOBAL_TOC(sk_load_byte_positive_offset)
-       cmpd    r_hlen, r_off
-       ble     bpf_slow_path_byte
-       lbzx    r_val, r_data, r_off
-       blr
-
-/*
- * Call out to skb_copy_bits:
- * Allocate a new stack frame here to remain ABI-compliant in
- * stashing LR.
- */
-#define bpf_slow_path_common(SIZE)                                     \
-       mflr    r0;                                                     \
-       std     r0, PPC_LR_STKOFF(r1);                                  \
-       stdu    r1, -(STACK_FRAME_MIN_SIZE + BPF_PPC_STACK_LOCALS)(r1); \
-       mr      r3, r_skb;                                              \
-       /* r4 = r_off as passed */                                      \
-       addi    r5, r1, STACK_FRAME_MIN_SIZE;                           \
-       li      r6, SIZE;                                               \
-       bl      skb_copy_bits;                                          \
-       nop;                                                            \
-       /* save r5 */                                                   \
-       addi    r5, r1, STACK_FRAME_MIN_SIZE;                           \
-       /* r3 = 0 on success */                                         \
-       addi    r1, r1, STACK_FRAME_MIN_SIZE + BPF_PPC_STACK_LOCALS;    \
-       ld      r0, PPC_LR_STKOFF(r1);                                  \
-       mtlr    r0;                                                     \
-       cmpdi   r3, 0;                                                  \
-       blt     bpf_error;      /* cr0 = LT */
-
-bpf_slow_path_word:
-       bpf_slow_path_common(4)
-       /* Data value is on stack, and cr0 != LT */
-       LWZX_BE r_val, 0, r5
-       blr
-
-bpf_slow_path_half:
-       bpf_slow_path_common(2)
-       LHZX_BE r_val, 0, r5
-       blr
-
-bpf_slow_path_byte:
-       bpf_slow_path_common(1)
-       lbzx    r_val, 0, r5
-       blr
-
-/*
- * Call out to bpf_internal_load_pointer_neg_helper
- */
-#define sk_negative_common(SIZE)                               \
-       mflr    r0;                                             \
-       std     r0, PPC_LR_STKOFF(r1);                          \
-       stdu    r1, -STACK_FRAME_MIN_SIZE(r1);                  \
-       mr      r3, r_skb;                                      \
-       /* r4 = r_off, as passed */                             \
-       li      r5, SIZE;                                       \
-       bl      bpf_internal_load_pointer_neg_helper;           \
-       nop;                                                    \
-       addi    r1, r1, STACK_FRAME_MIN_SIZE;                   \
-       ld      r0, PPC_LR_STKOFF(r1);                          \
-       mtlr    r0;                                             \
-       /* R3 != 0 on success */                                \
-       cmpldi  r3, 0;                                          \
-       beq     bpf_error_slow; /* cr0 = EQ */
-
-bpf_slow_path_word_neg:
-       lis     r_tmp, -32      /* SKF_LL_OFF */
-       cmpd    r_off, r_tmp    /* addr < SKF_* */
-       blt     bpf_error       /* cr0 = LT */
-       b       sk_load_word_negative_offset
-
-_GLOBAL_TOC(sk_load_word_negative_offset)
-       sk_negative_common(4)
-       LWZX_BE r_val, 0, r3
-       blr
-
-bpf_slow_path_half_neg:
-       lis     r_tmp, -32      /* SKF_LL_OFF */
-       cmpd    r_off, r_tmp    /* addr < SKF_* */
-       blt     bpf_error       /* cr0 = LT */
-       b       sk_load_half_negative_offset
-
-_GLOBAL_TOC(sk_load_half_negative_offset)
-       sk_negative_common(2)
-       LHZX_BE r_val, 0, r3
-       blr
-
-bpf_slow_path_byte_neg:
-       lis     r_tmp, -32      /* SKF_LL_OFF */
-       cmpd    r_off, r_tmp    /* addr < SKF_* */
-       blt     bpf_error       /* cr0 = LT */
-       b       sk_load_byte_negative_offset
-
-_GLOBAL_TOC(sk_load_byte_negative_offset)
-       sk_negative_common(1)
-       lbzx    r_val, 0, r3
-       blr
-
-bpf_error_slow:
-       /* fabricate a cr0 = lt */
-       li      r_tmp, -1
-       cmpdi   r_tmp, 0
-bpf_error:
-       /*
-        * Entered with cr0 = lt
-        * Generated code will 'blt epilogue', returning 0.
-        */
-       li      r_val, 0
-       blr
index 0ef3d9580e98ca7c4f747edb115b16c56ccf6d43..1bdb1aff061907ca4523f556e0724248782ecbfc 100644 (file)
@@ -59,7 +59,7 @@ static inline bool bpf_has_stack_frame(struct codegen_context *ctx)
  *             [       prev sp         ] <-------------
  *             [         ...           ]               |
  * sp (r1) --->        [    stack pointer      ] --------------
- *             [   nv gpr save area    ] 8*8
+ *             [   nv gpr save area    ] 6*8
  *             [    tail_call_cnt      ] 8
  *             [    local_tmp_var      ] 8
  *             [   unused red zone     ] 208 bytes protected
@@ -88,21 +88,6 @@ static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg)
        BUG();
 }
 
-static void bpf_jit_emit_skb_loads(u32 *image, struct codegen_context *ctx)
-{
-       /*
-        * Load skb->len and skb->data_len
-        * r3 points to skb
-        */
-       PPC_LWZ(b2p[SKB_HLEN_REG], 3, offsetof(struct sk_buff, len));
-       PPC_LWZ(b2p[TMP_REG_1], 3, offsetof(struct sk_buff, data_len));
-       /* header_len = len - data_len */
-       PPC_SUB(b2p[SKB_HLEN_REG], b2p[SKB_HLEN_REG], b2p[TMP_REG_1]);
-
-       /* skb->data pointer */
-       PPC_BPF_LL(b2p[SKB_DATA_REG], 3, offsetof(struct sk_buff, data));
-}
-
 static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
 {
        int i;
@@ -145,18 +130,6 @@ static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
                if (bpf_is_seen_register(ctx, i))
                        PPC_BPF_STL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i]));
 
-       /*
-        * Save additional non-volatile regs if we cache skb
-        * Also, setup skb data
-        */
-       if (ctx->seen & SEEN_SKB) {
-               PPC_BPF_STL(b2p[SKB_HLEN_REG], 1,
-                               bpf_jit_stack_offsetof(ctx, b2p[SKB_HLEN_REG]));
-               PPC_BPF_STL(b2p[SKB_DATA_REG], 1,
-                               bpf_jit_stack_offsetof(ctx, b2p[SKB_DATA_REG]));
-               bpf_jit_emit_skb_loads(image, ctx);
-       }
-
        /* Setup frame pointer to point to the bpf stack area */
        if (bpf_is_seen_register(ctx, BPF_REG_FP))
                PPC_ADDI(b2p[BPF_REG_FP], 1,
@@ -172,14 +145,6 @@ static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx
                if (bpf_is_seen_register(ctx, i))
                        PPC_BPF_LL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i]));
 
-       /* Restore non-volatile registers used for skb cache */
-       if (ctx->seen & SEEN_SKB) {
-               PPC_BPF_LL(b2p[SKB_HLEN_REG], 1,
-                               bpf_jit_stack_offsetof(ctx, b2p[SKB_HLEN_REG]));
-               PPC_BPF_LL(b2p[SKB_DATA_REG], 1,
-                               bpf_jit_stack_offsetof(ctx, b2p[SKB_DATA_REG]));
-       }
-
        /* Tear down our stack frame */
        if (bpf_has_stack_frame(ctx)) {
                PPC_ADDI(1, 1, BPF_PPC_STACKFRAME + ctx->stack_size);
@@ -753,23 +718,10 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
                        ctx->seen |= SEEN_FUNC;
                        func = (u8 *) __bpf_call_base + imm;
 
-                       /* Save skb pointer if we need to re-cache skb data */
-                       if ((ctx->seen & SEEN_SKB) &&
-                           bpf_helper_changes_pkt_data(func))
-                               PPC_BPF_STL(3, 1, bpf_jit_stack_local(ctx));
-
                        bpf_jit_emit_func_call(image, ctx, (u64)func);
 
                        /* move return value from r3 to BPF_REG_0 */
                        PPC_MR(b2p[BPF_REG_0], 3);
-
-                       /* refresh skb cache */
-                       if ((ctx->seen & SEEN_SKB) &&
-                           bpf_helper_changes_pkt_data(func)) {
-                               /* reload skb pointer to r3 */
-                               PPC_BPF_LL(3, 1, bpf_jit_stack_local(ctx));
-                               bpf_jit_emit_skb_loads(image, ctx);
-                       }
                        break;
 
                /*
@@ -886,65 +838,6 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
                        PPC_BCC(true_cond, addrs[i + 1 + off]);
                        break;
 
-               /*
-                * Loads from packet header/data
-                * Assume 32-bit input value in imm and X (src_reg)
-                */
-
-               /* Absolute loads */
-               case BPF_LD | BPF_W | BPF_ABS:
-                       func = (u8 *)CHOOSE_LOAD_FUNC(imm, sk_load_word);
-                       goto common_load_abs;
-               case BPF_LD | BPF_H | BPF_ABS:
-                       func = (u8 *)CHOOSE_LOAD_FUNC(imm, sk_load_half);
-                       goto common_load_abs;
-               case BPF_LD | BPF_B | BPF_ABS:
-                       func = (u8 *)CHOOSE_LOAD_FUNC(imm, sk_load_byte);
-common_load_abs:
-                       /*
-                        * Load from [imm]
-                        * Load into r4, which can just be passed onto
-                        *  skb load helpers as the second parameter
-                        */
-                       PPC_LI32(4, imm);
-                       goto common_load;
-
-               /* Indirect loads */
-               case BPF_LD | BPF_W | BPF_IND:
-                       func = (u8 *)sk_load_word;
-                       goto common_load_ind;
-               case BPF_LD | BPF_H | BPF_IND:
-                       func = (u8 *)sk_load_half;
-                       goto common_load_ind;
-               case BPF_LD | BPF_B | BPF_IND:
-                       func = (u8 *)sk_load_byte;
-common_load_ind:
-                       /*
-                        * Load from [src_reg + imm]
-                        * Treat src_reg as a 32-bit value
-                        */
-                       PPC_EXTSW(4, src_reg);
-                       if (imm) {
-                               if (imm >= -32768 && imm < 32768)
-                                       PPC_ADDI(4, 4, IMM_L(imm));
-                               else {
-                                       PPC_LI32(b2p[TMP_REG_1], imm);
-                                       PPC_ADD(4, 4, b2p[TMP_REG_1]);
-                               }
-                       }
-
-common_load:
-                       ctx->seen |= SEEN_SKB;
-                       ctx->seen |= SEEN_FUNC;
-                       bpf_jit_emit_func_call(image, ctx, (u64)func);
-
-                       /*
-                        * Helper returns 'lt' condition on error, and an
-                        * appropriate return value in BPF_REG_0
-                        */
-                       PPC_BCC(COND_LT, exit_addr);
-                       break;
-
                /*
                 * Tail call
                 */
index 9033c8194eda5d7d39db99af0af9e6f33811a272..ccc42150336394ca0fa8a7263b5d408e59c30552 100644 (file)
@@ -1093,7 +1093,7 @@ static int show_spu_loadavg(struct seq_file *s, void *private)
                LOAD_INT(c), LOAD_FRAC(c),
                count_active_contexts(),
                atomic_read(&nr_spu_contexts),
-               idr_get_cursor(&task_active_pid_ns(current)->idr));
+               idr_get_cursor(&task_active_pid_ns(current)->idr) - 1);
        return 0;
 }
 
index de470caf07848e28864cbb8ff0129e4ed7aaf021..fc222a0c2ac46b06095ed831827d40e84994e3ca 100644 (file)
@@ -82,19 +82,6 @@ static const struct file_operations memtrace_fops = {
        .open   = simple_open,
 };
 
-static void flush_memory_region(u64 base, u64 size)
-{
-       unsigned long line_size = ppc64_caches.l1d.size;
-       u64 end = base + size;
-       u64 addr;
-
-       base = round_down(base, line_size);
-       end = round_up(end, line_size);
-
-       for (addr = base; addr < end; addr += line_size)
-               asm volatile("dcbf 0,%0" : "=r" (addr) :: "memory");
-}
-
 static int check_memblock_online(struct memory_block *mem, void *arg)
 {
        if (mem->state != MEM_ONLINE)
@@ -132,10 +119,6 @@ static bool memtrace_offline_pages(u32 nid, u64 start_pfn, u64 nr_pages)
        walk_memory_range(start_pfn, end_pfn, (void *)MEM_OFFLINE,
                          change_memblock_state);
 
-       /* RCU grace period? */
-       flush_memory_region((u64)__va(start_pfn << PAGE_SHIFT),
-                           nr_pages << PAGE_SHIFT);
-
        lock_device_hotplug();
        remove_memory(nid, start_pfn << PAGE_SHIFT, nr_pages << PAGE_SHIFT);
        unlock_device_hotplug();
index 69a4f9e8bd554f137dd01b930b1b3d87e204fd47..525e966dce3418cef4a82f494da331793b628668 100644 (file)
 
 #define npu_to_phb(x) container_of(x, struct pnv_phb, npu)
 
+/*
+ * spinlock to protect initialisation of an npu_context for a particular
+ * mm_struct.
+ */
+static DEFINE_SPINLOCK(npu_context_lock);
+
+/*
+ * When an address shootdown range exceeds this threshold we invalidate the
+ * entire TLB on the GPU for the given PID rather than each specific address in
+ * the range.
+ */
+#define ATSD_THRESHOLD (2*1024*1024)
+
 /*
  * Other types of TCE cache invalidation are not functional in the
  * hardware.
@@ -401,7 +414,7 @@ struct npu_context {
        bool nmmu_flush;
 
        /* Callback to stop translation requests on a given GPU */
-       struct npu_context *(*release_cb)(struct npu_context *, void *);
+       void (*release_cb)(struct npu_context *context, void *priv);
 
        /*
         * Private pointer passed to the above callback for usage by
@@ -671,11 +684,19 @@ static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
        struct npu_context *npu_context = mn_to_npu_context(mn);
        unsigned long address;
 
-       for (address = start; address < end; address += PAGE_SIZE)
-               mmio_invalidate(npu_context, 1, address, false);
+       if (end - start > ATSD_THRESHOLD) {
+               /*
+                * Just invalidate the entire PID if the address range is too
+                * large.
+                */
+               mmio_invalidate(npu_context, 0, 0, true);
+       } else {
+               for (address = start; address < end; address += PAGE_SIZE)
+                       mmio_invalidate(npu_context, 1, address, false);
 
-       /* Do the flush only on the final addess == end */
-       mmio_invalidate(npu_context, 1, address, true);
+               /* Do the flush only on the final addess == end */
+               mmio_invalidate(npu_context, 1, address, true);
+       }
 }
 
 static const struct mmu_notifier_ops nv_nmmu_notifier_ops = {
@@ -696,11 +717,12 @@ static const struct mmu_notifier_ops nv_nmmu_notifier_ops = {
  * Returns an error if there no contexts are currently available or a
  * npu_context which should be passed to pnv_npu2_handle_fault().
  *
- * mmap_sem must be held in write mode.
+ * mmap_sem must be held in write mode and must not be called from interrupt
+ * context.
  */
 struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
                        unsigned long flags,
-                       struct npu_context *(*cb)(struct npu_context *, void *),
+                       void (*cb)(struct npu_context *, void *),
                        void *priv)
 {
        int rc;
@@ -743,7 +765,9 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
        /*
         * Setup the NPU context table for a particular GPU. These need to be
         * per-GPU as we need the tables to filter ATSDs when there are no
-        * active contexts on a particular GPU.
+        * active contexts on a particular GPU. It is safe for these to be
+        * called concurrently with destroy as the OPAL call takes appropriate
+        * locks and refcounts on init/destroy.
         */
        rc = opal_npu_init_context(nphb->opal_id, mm->context.id, flags,
                                PCI_DEVID(gpdev->bus->number, gpdev->devfn));
@@ -754,8 +778,29 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
         * We store the npu pci device so we can more easily get at the
         * associated npus.
         */
+       spin_lock(&npu_context_lock);
        npu_context = mm->context.npu_context;
+       if (npu_context) {
+               if (npu_context->release_cb != cb ||
+                       npu_context->priv != priv) {
+                       spin_unlock(&npu_context_lock);
+                       opal_npu_destroy_context(nphb->opal_id, mm->context.id,
+                                               PCI_DEVID(gpdev->bus->number,
+                                                       gpdev->devfn));
+                       return ERR_PTR(-EINVAL);
+               }
+
+               WARN_ON(!kref_get_unless_zero(&npu_context->kref));
+       }
+       spin_unlock(&npu_context_lock);
+
        if (!npu_context) {
+               /*
+                * We can set up these fields without holding the
+                * npu_context_lock as the npu_context hasn't been returned to
+                * the caller meaning it can't be destroyed. Parallel allocation
+                * is protected against by mmap_sem.
+                */
                rc = -ENOMEM;
                npu_context = kzalloc(sizeof(struct npu_context), GFP_KERNEL);
                if (npu_context) {
@@ -774,8 +819,6 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
                }
 
                mm->context.npu_context = npu_context;
-       } else {
-               WARN_ON(!kref_get_unless_zero(&npu_context->kref));
        }
 
        npu_context->release_cb = cb;
@@ -814,15 +857,16 @@ static void pnv_npu2_release_context(struct kref *kref)
                mm_context_remove_copro(npu_context->mm);
 
        npu_context->mm->context.npu_context = NULL;
-       mmu_notifier_unregister(&npu_context->mn,
-                               npu_context->mm);
-
-       kfree(npu_context);
 }
 
+/*
+ * Destroy a context on the given GPU. May free the npu_context if it is no
+ * longer active on any GPUs. Must not be called from interrupt context.
+ */
 void pnv_npu2_destroy_context(struct npu_context *npu_context,
                        struct pci_dev *gpdev)
 {
+       int removed;
        struct pnv_phb *nphb;
        struct npu *npu;
        struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0);
@@ -844,7 +888,21 @@ void pnv_npu2_destroy_context(struct npu_context *npu_context,
        WRITE_ONCE(npu_context->npdev[npu->index][nvlink_index], NULL);
        opal_npu_destroy_context(nphb->opal_id, npu_context->mm->context.id,
                                PCI_DEVID(gpdev->bus->number, gpdev->devfn));
-       kref_put(&npu_context->kref, pnv_npu2_release_context);
+       spin_lock(&npu_context_lock);
+       removed = kref_put(&npu_context->kref, pnv_npu2_release_context);
+       spin_unlock(&npu_context_lock);
+
+       /*
+        * We need to do this outside of pnv_npu2_release_context so that it is
+        * outside the spinlock as mmu_notifier_destroy uses SRCU.
+        */
+       if (removed) {
+               mmu_notifier_unregister(&npu_context->mn,
+                                       npu_context->mm);
+
+               kfree(npu_context);
+       }
+
 }
 EXPORT_SYMBOL(pnv_npu2_destroy_context);
 
index 1bceb95f422d0f828017128580695c0d4c87ba47..5584247f502929de6e13df0ca5127a1ab92cdbe8 100644 (file)
@@ -44,6 +44,10 @@ static ssize_t opal_nvram_read(char *buf, size_t count, loff_t *index)
        return count;
 }
 
+/*
+ * This can be called in the panic path with interrupts off, so use
+ * mdelay in that case.
+ */
 static ssize_t opal_nvram_write(char *buf, size_t count, loff_t *index)
 {
        s64 rc = OPAL_BUSY;
@@ -58,10 +62,16 @@ static ssize_t opal_nvram_write(char *buf, size_t count, loff_t *index)
        while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
                rc = opal_write_nvram(__pa(buf), count, off);
                if (rc == OPAL_BUSY_EVENT) {
-                       msleep(OPAL_BUSY_DELAY_MS);
+                       if (in_interrupt() || irqs_disabled())
+                               mdelay(OPAL_BUSY_DELAY_MS);
+                       else
+                               msleep(OPAL_BUSY_DELAY_MS);
                        opal_poll_events(NULL);
                } else if (rc == OPAL_BUSY) {
-                       msleep(OPAL_BUSY_DELAY_MS);
+                       if (in_interrupt() || irqs_disabled())
+                               mdelay(OPAL_BUSY_DELAY_MS);
+                       else
+                               msleep(OPAL_BUSY_DELAY_MS);
                }
        }
 
index f8868864f373ed5eecba333a5b1a2b83e38e0ec6..aa2a5139462ea5f6c81f51c8f025c9af84ce0be2 100644 (file)
@@ -48,10 +48,12 @@ unsigned long __init opal_get_boot_time(void)
 
        while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
                rc = opal_rtc_read(&__y_m_d, &__h_m_s_ms);
-               if (rc == OPAL_BUSY_EVENT)
+               if (rc == OPAL_BUSY_EVENT) {
+                       mdelay(OPAL_BUSY_DELAY_MS);
                        opal_poll_events(NULL);
-               else if (rc == OPAL_BUSY)
-                       mdelay(10);
+               } else if (rc == OPAL_BUSY) {
+                       mdelay(OPAL_BUSY_DELAY_MS);
+               }
        }
        if (rc != OPAL_SUCCESS)
                return 0;
index 23d8acca5c9038a184df62d24ccb2112cb92a7c0..cd4fd85fde84e76fa205737076c5a721b9f4c6f4 100644 (file)
@@ -11,6 +11,7 @@ config RISCV
        select ARCH_WANT_FRAME_POINTERS
        select CLONE_BACKWARDS
        select COMMON_CLK
+       select DMA_DIRECT_OPS
        select GENERIC_CLOCKEVENTS
        select GENERIC_CPU_DEVICES
        select GENERIC_IRQ_SHOW
@@ -89,9 +90,6 @@ config PGTABLE_LEVELS
 config HAVE_KPROBES
        def_bool n
 
-config DMA_DIRECT_OPS
-       def_bool y
-
 menu "Platform type"
 
 choice
index 1e5fd280fb4d150ebc5a593b7f249af0330a7228..4286a5f838760c7ad4d922ddd2b49286c374df56 100644 (file)
@@ -15,7 +15,6 @@ generic-y += fcntl.h
 generic-y += futex.h
 generic-y += hardirq.h
 generic-y += hash.h
-generic-y += handle_irq.h
 generic-y += hw_irq.h
 generic-y += ioctl.h
 generic-y += ioctls.h
index 324568d3392130fe5beba7bd03928a762848d377..f6561b783b619282c014ac62230b2a029c4ef226 100644 (file)
@@ -52,7 +52,7 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE
 # Add -lgcc so rv32 gets static muldi3 and lshrdi3 definitions.
 # Make sure only to export the intended __vdso_xxx symbol offsets.
 quiet_cmd_vdsold = VDSOLD  $@
-      cmd_vdsold = $(CC) $(KCFLAGS) -nostdlib $(SYSCFLAGS_$(@F)) \
+      cmd_vdsold = $(CC) $(KCFLAGS) $(call cc-option, -no-pie) -nostdlib $(SYSCFLAGS_$(@F)) \
                            -Wl,-T,$(filter-out FORCE,$^) -o $@.tmp -lgcc && \
                    $(CROSS_COMPILE)objcopy \
                            $(patsubst %, -G __vdso_%, $(vdso-syms)) $@.tmp $@
index 6176fe9795caf995daeb986948ee2c1375d8ee10..941d8cc6c9f5990ffe5aa39aa16bd79ff70ed9fc 100644 (file)
@@ -261,9 +261,9 @@ CONFIG_IP_VS_NQ=m
 CONFIG_IP_VS_FTP=m
 CONFIG_IP_VS_PE_SIP=m
 CONFIG_NF_CONNTRACK_IPV4=m
-CONFIG_NF_TABLES_IPV4=m
+CONFIG_NF_TABLES_IPV4=y
 CONFIG_NFT_CHAIN_ROUTE_IPV4=m
-CONFIG_NF_TABLES_ARP=m
+CONFIG_NF_TABLES_ARP=y
 CONFIG_NFT_CHAIN_NAT_IPV4=m
 CONFIG_IP_NF_IPTABLES=m
 CONFIG_IP_NF_MATCH_AH=m
@@ -284,7 +284,7 @@ CONFIG_IP_NF_ARPTABLES=m
 CONFIG_IP_NF_ARPFILTER=m
 CONFIG_IP_NF_ARP_MANGLE=m
 CONFIG_NF_CONNTRACK_IPV6=m
-CONFIG_NF_TABLES_IPV6=m
+CONFIG_NF_TABLES_IPV6=y
 CONFIG_NFT_CHAIN_ROUTE_IPV6=m
 CONFIG_NFT_CHAIN_NAT_IPV6=m
 CONFIG_IP6_NF_IPTABLES=m
@@ -305,7 +305,7 @@ CONFIG_IP6_NF_RAW=m
 CONFIG_IP6_NF_SECURITY=m
 CONFIG_IP6_NF_NAT=m
 CONFIG_IP6_NF_TARGET_MASQUERADE=m
-CONFIG_NF_TABLES_BRIDGE=m
+CONFIG_NF_TABLES_BRIDGE=y
 CONFIG_RDS=m
 CONFIG_RDS_RDMA=m
 CONFIG_RDS_TCP=m
@@ -604,7 +604,6 @@ CONFIG_DETECT_HUNG_TASK=y
 CONFIG_WQ_WATCHDOG=y
 CONFIG_PANIC_ON_OOPS=y
 CONFIG_DEBUG_TIMEKEEPING=y
-CONFIG_DEBUG_WW_MUTEX_SLOWPATH=y
 CONFIG_PROVE_LOCKING=y
 CONFIG_LOCK_STAT=y
 CONFIG_DEBUG_LOCKDEP=y
index c105bcc6d7a6fc0f932ba7537f85fe6e1a171b1a..eb6f75f242089b6f67115bbbd74c7e8a4f2fbeac 100644 (file)
@@ -259,9 +259,9 @@ CONFIG_IP_VS_NQ=m
 CONFIG_IP_VS_FTP=m
 CONFIG_IP_VS_PE_SIP=m
 CONFIG_NF_CONNTRACK_IPV4=m
-CONFIG_NF_TABLES_IPV4=m
+CONFIG_NF_TABLES_IPV4=y
 CONFIG_NFT_CHAIN_ROUTE_IPV4=m
-CONFIG_NF_TABLES_ARP=m
+CONFIG_NF_TABLES_ARP=y
 CONFIG_NFT_CHAIN_NAT_IPV4=m
 CONFIG_IP_NF_IPTABLES=m
 CONFIG_IP_NF_MATCH_AH=m
@@ -282,7 +282,7 @@ CONFIG_IP_NF_ARPTABLES=m
 CONFIG_IP_NF_ARPFILTER=m
 CONFIG_IP_NF_ARP_MANGLE=m
 CONFIG_NF_CONNTRACK_IPV6=m
-CONFIG_NF_TABLES_IPV6=m
+CONFIG_NF_TABLES_IPV6=y
 CONFIG_NFT_CHAIN_ROUTE_IPV6=m
 CONFIG_NFT_CHAIN_NAT_IPV6=m
 CONFIG_IP6_NF_IPTABLES=m
@@ -303,7 +303,7 @@ CONFIG_IP6_NF_RAW=m
 CONFIG_IP6_NF_SECURITY=m
 CONFIG_IP6_NF_NAT=m
 CONFIG_IP6_NF_TARGET_MASQUERADE=m
-CONFIG_NF_TABLES_BRIDGE=m
+CONFIG_NF_TABLES_BRIDGE=y
 CONFIG_RDS=m
 CONFIG_RDS_RDMA=m
 CONFIG_RDS_TCP=m
index e8077f0971f89f1695d5b5637d9c89126cf1e9cc..2bf01ba44107cd678e1fd38b31e5be18080f80b0 100644 (file)
@@ -13,6 +13,7 @@
  */
 
 #include <linux/linkage.h>
+#include <asm/nospec-insn.h>
 #include <asm/vx-insn.h>
 
 /* Vector register range containing CRC-32 constants */
@@ -67,6 +68,8 @@
 
 .previous
 
+       GEN_BR_THUNK %r14
+
 .text
 /*
  * The CRC-32 function(s) use these calling conventions:
@@ -203,6 +206,6 @@ ENTRY(crc32_be_vgfm_16)
 
 .Ldone:
        VLGVF   %r2,%v2,3
-       br      %r14
+       BR_EX   %r14
 
 .previous
index d8c67a58c0c53b620c4a8f1837ce9bf3c0db5207..7d6f568bd3ad1fe19586e7597ae127b519c7709f 100644 (file)
@@ -14,6 +14,7 @@
  */
 
 #include <linux/linkage.h>
+#include <asm/nospec-insn.h>
 #include <asm/vx-insn.h>
 
 /* Vector register range containing CRC-32 constants */
@@ -76,6 +77,7 @@
 
 .previous
 
+       GEN_BR_THUNK %r14
 
 .text
 
@@ -264,6 +266,6 @@ crc32_le_vgfm_generic:
 
 .Ldone:
        VLGVF   %r2,%v2,2
-       br      %r14
+       BR_EX   %r14
 
 .previous
diff --git a/arch/s390/include/asm/nospec-insn.h b/arch/s390/include/asm/nospec-insn.h
new file mode 100644 (file)
index 0000000..a01f811
--- /dev/null
@@ -0,0 +1,196 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_NOSPEC_ASM_H
+#define _ASM_S390_NOSPEC_ASM_H
+
+#include <asm/alternative-asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/dwarf.h>
+
+#ifdef __ASSEMBLY__
+
+#ifdef CONFIG_EXPOLINE
+
+_LC_BR_R1 = __LC_BR_R1
+
+/*
+ * The expoline macros are used to create thunks in the same format
+ * as gcc generates them. The 'comdat' section flag makes sure that
+ * the various thunks are merged into a single copy.
+ */
+       .macro __THUNK_PROLOG_NAME name
+       .pushsection .text.\name,"axG",@progbits,\name,comdat
+       .globl \name
+       .hidden \name
+       .type \name,@function
+\name:
+       CFI_STARTPROC
+       .endm
+
+       .macro __THUNK_EPILOG
+       CFI_ENDPROC
+       .popsection
+       .endm
+
+       .macro __THUNK_PROLOG_BR r1,r2
+       __THUNK_PROLOG_NAME __s390x_indirect_jump_r\r2\()use_r\r1
+       .endm
+
+       .macro __THUNK_PROLOG_BC d0,r1,r2
+       __THUNK_PROLOG_NAME __s390x_indirect_branch_\d0\()_\r2\()use_\r1
+       .endm
+
+       .macro __THUNK_BR r1,r2
+       jg      __s390x_indirect_jump_r\r2\()use_r\r1
+       .endm
+
+       .macro __THUNK_BC d0,r1,r2
+       jg      __s390x_indirect_branch_\d0\()_\r2\()use_\r1
+       .endm
+
+       .macro __THUNK_BRASL r1,r2,r3
+       brasl   \r1,__s390x_indirect_jump_r\r3\()use_r\r2
+       .endm
+
+       .macro  __DECODE_RR expand,reg,ruse
+       .set __decode_fail,1
+       .irp r1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
+       .ifc \reg,%r\r1
+       .irp r2,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
+       .ifc \ruse,%r\r2
+       \expand \r1,\r2
+       .set __decode_fail,0
+       .endif
+       .endr
+       .endif
+       .endr
+       .if __decode_fail == 1
+       .error "__DECODE_RR failed"
+       .endif
+       .endm
+
+       .macro  __DECODE_RRR expand,rsave,rtarget,ruse
+       .set __decode_fail,1
+       .irp r1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
+       .ifc \rsave,%r\r1
+       .irp r2,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
+       .ifc \rtarget,%r\r2
+       .irp r3,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
+       .ifc \ruse,%r\r3
+       \expand \r1,\r2,\r3
+       .set __decode_fail,0
+       .endif
+       .endr
+       .endif
+       .endr
+       .endif
+       .endr
+       .if __decode_fail == 1
+       .error "__DECODE_RRR failed"
+       .endif
+       .endm
+
+       .macro  __DECODE_DRR expand,disp,reg,ruse
+       .set __decode_fail,1
+       .irp r1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
+       .ifc \reg,%r\r1
+       .irp r2,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
+       .ifc \ruse,%r\r2
+       \expand \disp,\r1,\r2
+       .set __decode_fail,0
+       .endif
+       .endr
+       .endif
+       .endr
+       .if __decode_fail == 1
+       .error "__DECODE_DRR failed"
+       .endif
+       .endm
+
+       .macro __THUNK_EX_BR reg,ruse
+       # Be very careful when adding instructions to this macro!
+       # The ALTERNATIVE replacement code has a .+10 which targets
+       # the "br \reg" after the code has been patched.
+#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES
+       exrl    0,555f
+       j       .
+#else
+       .ifc \reg,%r1
+       ALTERNATIVE "ex %r0,_LC_BR_R1", ".insn ril,0xc60000000000,0,.+10", 35
+       j       .
+       .else
+       larl    \ruse,555f
+       ex      0,0(\ruse)
+       j       .
+       .endif
+#endif
+555:   br      \reg
+       .endm
+
+       .macro __THUNK_EX_BC disp,reg,ruse
+#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES
+       exrl    0,556f
+       j       .
+#else
+       larl    \ruse,556f
+       ex      0,0(\ruse)
+       j       .
+#endif
+556:   b       \disp(\reg)
+       .endm
+
+       .macro GEN_BR_THUNK reg,ruse=%r1
+       __DECODE_RR __THUNK_PROLOG_BR,\reg,\ruse
+       __THUNK_EX_BR \reg,\ruse
+       __THUNK_EPILOG
+       .endm
+
+       .macro GEN_B_THUNK disp,reg,ruse=%r1
+       __DECODE_DRR __THUNK_PROLOG_BC,\disp,\reg,\ruse
+       __THUNK_EX_BC \disp,\reg,\ruse
+       __THUNK_EPILOG
+       .endm
+
+       .macro BR_EX reg,ruse=%r1
+557:   __DECODE_RR __THUNK_BR,\reg,\ruse
+       .pushsection .s390_indirect_branches,"a",@progbits
+       .long   557b-.
+       .popsection
+       .endm
+
+        .macro B_EX disp,reg,ruse=%r1
+558:   __DECODE_DRR __THUNK_BC,\disp,\reg,\ruse
+       .pushsection .s390_indirect_branches,"a",@progbits
+       .long   558b-.
+       .popsection
+       .endm
+
+       .macro BASR_EX rsave,rtarget,ruse=%r1
+559:   __DECODE_RRR __THUNK_BRASL,\rsave,\rtarget,\ruse
+       .pushsection .s390_indirect_branches,"a",@progbits
+       .long   559b-.
+       .popsection
+       .endm
+
+#else
+       .macro GEN_BR_THUNK reg,ruse=%r1
+       .endm
+
+       .macro GEN_B_THUNK disp,reg,ruse=%r1
+       .endm
+
+        .macro BR_EX reg,ruse=%r1
+       br      \reg
+       .endm
+
+        .macro B_EX disp,reg,ruse=%r1
+       b       \disp(\reg)
+       .endm
+
+       .macro BASR_EX rsave,rtarget,ruse=%r1
+       basr    \rsave,\rtarget
+       .endm
+#endif
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_S390_NOSPEC_ASM_H */
index e297bcfc476f65d0c41e591b9df9f43754b03aa7..6090670df51fcee21b2523fc388ba93ca5510cb0 100644 (file)
 
 int verify_sha256_digest(void);
 
+extern u64 kernel_entry;
+extern u64 kernel_type;
+
+extern u64 crash_start;
+extern u64 crash_size;
+
 #endif /* __ASSEMBLY__ */
 #endif /* _S390_PURGATORY_H_ */
index 83ba57533ce6fb63887d06046758a409fd58b1f3..3c883c368eb0587daef70a03c74010b6579f9084 100644 (file)
@@ -45,6 +45,9 @@ struct thread_info {
 void arch_release_task_struct(struct task_struct *tsk);
 int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
 
+void arch_setup_new_exec(void);
+#define arch_setup_new_exec arch_setup_new_exec
+
 #endif
 
 /*
index 84ea6225efb4be999ca6c92343d69f0b138447d6..f92dd8ed3884ae4dbd4bc2f47d30248e794241e1 100644 (file)
@@ -65,6 +65,7 @@ obj-y += nospec-branch.o
 
 extra-y                                += head.o head64.o vmlinux.lds
 
+obj-$(CONFIG_SYSFS)            += nospec-sysfs.o
 CFLAGS_REMOVE_nospec-branch.o  += $(CC_FLAGS_EXPOLINE)
 
 obj-$(CONFIG_MODULES)          += module.o
index eb2a5c0443cd9c4fb6e0cfd4aa71ebdf14e163fa..11aea745a2a6ebce7ea29a894b822d3bcc560c7b 100644 (file)
@@ -181,6 +181,7 @@ int main(void)
        OFFSET(__LC_MACHINE_FLAGS, lowcore, machine_flags);
        OFFSET(__LC_PREEMPT_COUNT, lowcore, preempt_count);
        OFFSET(__LC_GMAP, lowcore, gmap);
+       OFFSET(__LC_BR_R1, lowcore, br_r1_trampoline);
        /* software defined ABI-relevant lowcore locations 0xe00 - 0xe20 */
        OFFSET(__LC_DUMP_REIPL, lowcore, ipib);
        /* hardware defined lowcore locations 0x1000 - 0x18ff */
index f6c56009e822473d701cecbc896d3693b080d6c2..b65874b0b412e40ea1baea814fb1169d04f02104 100644 (file)
@@ -9,18 +9,22 @@
 
 #include <linux/linkage.h>
 #include <asm/asm-offsets.h>
+#include <asm/nospec-insn.h>
 #include <asm/ptrace.h>
 #include <asm/sigp.h>
 
+       GEN_BR_THUNK %r9
+       GEN_BR_THUNK %r14
+
 ENTRY(s390_base_mcck_handler)
        basr    %r13,0
 0:     lg      %r15,__LC_PANIC_STACK   # load panic stack
        aghi    %r15,-STACK_FRAME_OVERHEAD
        larl    %r1,s390_base_mcck_handler_fn
-       lg      %r1,0(%r1)
-       ltgr    %r1,%r1
+       lg      %r9,0(%r1)
+       ltgr    %r9,%r9
        jz      1f
-       basr    %r14,%r1
+       BASR_EX %r14,%r9
 1:     la      %r1,4095
        lmg     %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)
        lpswe   __LC_MCK_OLD_PSW
@@ -37,10 +41,10 @@ ENTRY(s390_base_ext_handler)
        basr    %r13,0
 0:     aghi    %r15,-STACK_FRAME_OVERHEAD
        larl    %r1,s390_base_ext_handler_fn
-       lg      %r1,0(%r1)
-       ltgr    %r1,%r1
+       lg      %r9,0(%r1)
+       ltgr    %r9,%r9
        jz      1f
-       basr    %r14,%r1
+       BASR_EX %r14,%r9
 1:     lmg     %r0,%r15,__LC_SAVE_AREA_ASYNC
        ni      __LC_EXT_OLD_PSW+1,0xfd # clear wait state bit
        lpswe   __LC_EXT_OLD_PSW
@@ -57,10 +61,10 @@ ENTRY(s390_base_pgm_handler)
        basr    %r13,0
 0:     aghi    %r15,-STACK_FRAME_OVERHEAD
        larl    %r1,s390_base_pgm_handler_fn
-       lg      %r1,0(%r1)
-       ltgr    %r1,%r1
+       lg      %r9,0(%r1)
+       ltgr    %r9,%r9
        jz      1f
-       basr    %r14,%r1
+       BASR_EX %r14,%r9
        lmg     %r0,%r15,__LC_SAVE_AREA_SYNC
        lpswe   __LC_PGM_OLD_PSW
 1:     lpswe   disabled_wait_psw-0b(%r13)
@@ -117,7 +121,7 @@ ENTRY(diag308_reset)
        larl    %r4,.Lcontinue_psw      # Restore PSW flags
        lpswe   0(%r4)
 .Lcontinue:
-       br      %r14
+       BR_EX   %r14
 .align 16
 .Lrestart_psw:
        .long   0x00080000,0x80000000 + .Lrestart_part2
index 3f22f139a0413f6880118998bbbb48f280bdad5f..f03402efab4b414eefdfd59135f4ee89dda68e8a 100644 (file)
@@ -28,6 +28,7 @@
 #include <asm/setup.h>
 #include <asm/nmi.h>
 #include <asm/export.h>
+#include <asm/nospec-insn.h>
 
 __PT_R0      = __PT_GPRS
 __PT_R1      = __PT_GPRS + 8
@@ -183,67 +184,9 @@ _LPP_OFFSET        = __LC_LPP
                    "jnz .+8; .long 0xb2e8d000", 82
        .endm
 
-#ifdef CONFIG_EXPOLINE
-
-       .macro GEN_BR_THUNK name,reg,tmp
-       .section .text.\name,"axG",@progbits,\name,comdat
-       .globl \name
-       .hidden \name
-       .type \name,@function
-\name:
-       CFI_STARTPROC
-#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES
-       exrl    0,0f
-#else
-       larl    \tmp,0f
-       ex      0,0(\tmp)
-#endif
-       j       .
-0:     br      \reg
-       CFI_ENDPROC
-       .endm
-
-       GEN_BR_THUNK __s390x_indirect_jump_r1use_r9,%r9,%r1
-       GEN_BR_THUNK __s390x_indirect_jump_r1use_r14,%r14,%r1
-       GEN_BR_THUNK __s390x_indirect_jump_r11use_r14,%r14,%r11
-
-       .macro BASR_R14_R9
-0:     brasl   %r14,__s390x_indirect_jump_r1use_r9
-       .pushsection .s390_indirect_branches,"a",@progbits
-       .long   0b-.
-       .popsection
-       .endm
-
-       .macro BR_R1USE_R14
-0:     jg      __s390x_indirect_jump_r1use_r14
-       .pushsection .s390_indirect_branches,"a",@progbits
-       .long   0b-.
-       .popsection
-       .endm
-
-       .macro BR_R11USE_R14
-0:     jg      __s390x_indirect_jump_r11use_r14
-       .pushsection .s390_indirect_branches,"a",@progbits
-       .long   0b-.
-       .popsection
-       .endm
-
-#else  /* CONFIG_EXPOLINE */
-
-       .macro BASR_R14_R9
-       basr    %r14,%r9
-       .endm
-
-       .macro BR_R1USE_R14
-       br      %r14
-       .endm
-
-       .macro BR_R11USE_R14
-       br      %r14
-       .endm
-
-#endif /* CONFIG_EXPOLINE */
-
+       GEN_BR_THUNK %r9
+       GEN_BR_THUNK %r14
+       GEN_BR_THUNK %r14,%r11
 
        .section .kprobes.text, "ax"
 .Ldummy:
@@ -260,7 +203,7 @@ _LPP_OFFSET = __LC_LPP
 ENTRY(__bpon)
        .globl __bpon
        BPON
-       BR_R1USE_R14
+       BR_EX   %r14
 
 /*
  * Scheduler resume function, called by switch_to
@@ -284,7 +227,7 @@ ENTRY(__switch_to)
        mvc     __LC_CURRENT_PID(4,%r0),0(%r3)  # store pid of next
        lmg     %r6,%r15,__SF_GPRS(%r15)        # load gprs of next task
        ALTERNATIVE "", ".insn s,0xb2800000,_LPP_OFFSET", 40
-       BR_R1USE_R14
+       BR_EX   %r14
 
 .L__critical_start:
 
@@ -351,7 +294,7 @@ sie_exit:
        xgr     %r5,%r5
        lmg     %r6,%r14,__SF_GPRS(%r15)        # restore kernel registers
        lg      %r2,__SF_SIE_REASON(%r15)       # return exit reason code
-       BR_R1USE_R14
+       BR_EX   %r14
 .Lsie_fault:
        lghi    %r14,-EFAULT
        stg     %r14,__SF_SIE_REASON(%r15)      # set exit reason code
@@ -410,7 +353,7 @@ ENTRY(system_call)
        lgf     %r9,0(%r8,%r10)                 # get system call add.
        TSTMSK  __TI_flags(%r12),_TIF_TRACE
        jnz     .Lsysc_tracesys
-       BASR_R14_R9                             # call sys_xxxx
+       BASR_EX %r14,%r9                        # call sys_xxxx
        stg     %r2,__PT_R2(%r11)               # store return value
 
 .Lsysc_return:
@@ -595,7 +538,7 @@ ENTRY(system_call)
        lmg     %r3,%r7,__PT_R3(%r11)
        stg     %r7,STACK_FRAME_OVERHEAD(%r15)
        lg      %r2,__PT_ORIG_GPR2(%r11)
-       BASR_R14_R9                     # call sys_xxx
+       BASR_EX %r14,%r9                # call sys_xxx
        stg     %r2,__PT_R2(%r11)       # store return value
 .Lsysc_tracenogo:
        TSTMSK  __TI_flags(%r12),_TIF_TRACE
@@ -619,7 +562,7 @@ ENTRY(ret_from_fork)
        lmg     %r9,%r10,__PT_R9(%r11)  # load gprs
 ENTRY(kernel_thread_starter)
        la      %r2,0(%r10)
-       BASR_R14_R9
+       BASR_EX %r14,%r9
        j       .Lsysc_tracenogo
 
 /*
@@ -701,7 +644,7 @@ ENTRY(pgm_check_handler)
        je      .Lpgm_return
        lgf     %r9,0(%r10,%r1)         # load address of handler routine
        lgr     %r2,%r11                # pass pointer to pt_regs
-       BASR_R14_R9                     # branch to interrupt-handler
+       BASR_EX %r14,%r9                # branch to interrupt-handler
 .Lpgm_return:
        LOCKDEP_SYS_EXIT
        tm      __PT_PSW+1(%r11),0x01   # returning to user ?
@@ -1019,7 +962,7 @@ ENTRY(psw_idle)
        stpt    __TIMER_IDLE_ENTER(%r2)
 .Lpsw_idle_lpsw:
        lpswe   __SF_EMPTY(%r15)
-       BR_R1USE_R14
+       BR_EX   %r14
 .Lpsw_idle_end:
 
 /*
@@ -1061,7 +1004,7 @@ ENTRY(save_fpu_regs)
 .Lsave_fpu_regs_done:
        oi      __LC_CPU_FLAGS+7,_CIF_FPU
 .Lsave_fpu_regs_exit:
-       BR_R1USE_R14
+       BR_EX   %r14
 .Lsave_fpu_regs_end:
 EXPORT_SYMBOL(save_fpu_regs)
 
@@ -1107,7 +1050,7 @@ load_fpu_regs:
 .Lload_fpu_regs_done:
        ni      __LC_CPU_FLAGS+7,255-_CIF_FPU
 .Lload_fpu_regs_exit:
-       BR_R1USE_R14
+       BR_EX   %r14
 .Lload_fpu_regs_end:
 
 .L__critical_end:
@@ -1322,7 +1265,7 @@ cleanup_critical:
        jl      0f
        clg     %r9,BASED(.Lcleanup_table+104)  # .Lload_fpu_regs_end
        jl      .Lcleanup_load_fpu_regs
-0:     BR_R11USE_R14
+0:     BR_EX   %r14
 
        .align  8
 .Lcleanup_table:
@@ -1358,7 +1301,7 @@ cleanup_critical:
        ni      __SIE_PROG0C+3(%r9),0xfe        # no longer in SIE
        lctlg   %c1,%c1,__LC_USER_ASCE          # load primary asce
        larl    %r9,sie_exit                    # skip forward to sie_exit
-       BR_R11USE_R14
+       BR_EX   %r14
 #endif
 
 .Lcleanup_system_call:
@@ -1412,7 +1355,7 @@ cleanup_critical:
        stg     %r15,56(%r11)           # r15 stack pointer
        # set new psw address and exit
        larl    %r9,.Lsysc_do_svc
-       BR_R11USE_R14
+       BR_EX   %r14,%r11
 .Lcleanup_system_call_insn:
        .quad   system_call
        .quad   .Lsysc_stmg
@@ -1424,7 +1367,7 @@ cleanup_critical:
 
 .Lcleanup_sysc_tif:
        larl    %r9,.Lsysc_tif
-       BR_R11USE_R14
+       BR_EX   %r14,%r11
 
 .Lcleanup_sysc_restore:
        # check if stpt has been executed
@@ -1441,14 +1384,14 @@ cleanup_critical:
        mvc     0(64,%r11),__PT_R8(%r9)
        lmg     %r0,%r7,__PT_R0(%r9)
 1:     lmg     %r8,%r9,__LC_RETURN_PSW
-       BR_R11USE_R14
+       BR_EX   %r14,%r11
 .Lcleanup_sysc_restore_insn:
        .quad   .Lsysc_exit_timer
        .quad   .Lsysc_done - 4
 
 .Lcleanup_io_tif:
        larl    %r9,.Lio_tif
-       BR_R11USE_R14
+       BR_EX   %r14,%r11
 
 .Lcleanup_io_restore:
        # check if stpt has been executed
@@ -1462,7 +1405,7 @@ cleanup_critical:
        mvc     0(64,%r11),__PT_R8(%r9)
        lmg     %r0,%r7,__PT_R0(%r9)
 1:     lmg     %r8,%r9,__LC_RETURN_PSW
-       BR_R11USE_R14
+       BR_EX   %r14,%r11
 .Lcleanup_io_restore_insn:
        .quad   .Lio_exit_timer
        .quad   .Lio_done - 4
@@ -1515,17 +1458,17 @@ cleanup_critical:
        # prepare return psw
        nihh    %r8,0xfcfd              # clear irq & wait state bits
        lg      %r9,48(%r11)            # return from psw_idle
-       BR_R11USE_R14
+       BR_EX   %r14,%r11
 .Lcleanup_idle_insn:
        .quad   .Lpsw_idle_lpsw
 
 .Lcleanup_save_fpu_regs:
        larl    %r9,save_fpu_regs
-       BR_R11USE_R14
+       BR_EX   %r14,%r11
 
 .Lcleanup_load_fpu_regs:
        larl    %r9,load_fpu_regs
-       BR_R11USE_R14
+       BR_EX   %r14,%r11
 
 /*
  * Integer constants
index 94f2099bceb04cbfdc73d8f498a42e777a4b4e07..3d17c41074ca55d59fbe156c5967605912af9734 100644 (file)
@@ -176,10 +176,9 @@ void do_softirq_own_stack(void)
                new -= STACK_FRAME_OVERHEAD;
                ((struct stack_frame *) new)->back_chain = old;
                asm volatile("   la    15,0(%0)\n"
-                            "   basr  14,%2\n"
+                            "   brasl 14,__do_softirq\n"
                             "   la    15,0(%1)\n"
-                            : : "a" (new), "a" (old),
-                                "a" (__do_softirq)
+                            : : "a" (new), "a" (old)
                             : "0", "1", "2", "3", "4", "5", "14",
                               "cc", "memory" );
        } else {
index 82df7d80fab22090cb943e1d54562ee00acfaec2..27110f3294edcdf30935048d5553f712caf44116 100644 (file)
@@ -9,13 +9,17 @@
 #include <linux/linkage.h>
 #include <asm/asm-offsets.h>
 #include <asm/ftrace.h>
+#include <asm/nospec-insn.h>
 #include <asm/ptrace.h>
 #include <asm/export.h>
 
+       GEN_BR_THUNK %r1
+       GEN_BR_THUNK %r14
+
        .section .kprobes.text, "ax"
 
 ENTRY(ftrace_stub)
-       br      %r14
+       BR_EX   %r14
 
 #define STACK_FRAME_SIZE  (STACK_FRAME_OVERHEAD + __PT_SIZE)
 #define STACK_PTREGS     (STACK_FRAME_OVERHEAD)
@@ -23,7 +27,7 @@ ENTRY(ftrace_stub)
 #define STACK_PTREGS_PSW  (STACK_PTREGS + __PT_PSW)
 
 ENTRY(_mcount)
-       br      %r14
+       BR_EX   %r14
 
 EXPORT_SYMBOL(_mcount)
 
@@ -53,7 +57,7 @@ ENTRY(ftrace_caller)
 #endif
        lgr     %r3,%r14
        la      %r5,STACK_PTREGS(%r15)
-       basr    %r14,%r1
+       BASR_EX %r14,%r1
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 # The j instruction gets runtime patched to a nop instruction.
 # See ftrace_enable_ftrace_graph_caller.
@@ -68,7 +72,7 @@ ftrace_graph_caller_end:
 #endif
        lg      %r1,(STACK_PTREGS_PSW+8)(%r15)
        lmg     %r2,%r15,(STACK_PTREGS_GPRS+2*8)(%r15)
-       br      %r1
+       BR_EX   %r1
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 
@@ -81,6 +85,6 @@ ENTRY(return_to_handler)
        aghi    %r15,STACK_FRAME_OVERHEAD
        lgr     %r14,%r2
        lmg     %r2,%r5,32(%r15)
-       br      %r14
+       BR_EX   %r14
 
 #endif
index 5a83be955c70ed8f9a378ec6a47c61933c2968d9..0dc8ac8548ee3f32904038b41b5b30517516bf41 100644 (file)
@@ -465,11 +465,11 @@ int module_finalize(const Elf_Ehdr *hdr,
                        apply_alternatives(aseg, aseg + s->sh_size);
 
                if (IS_ENABLED(CONFIG_EXPOLINE) &&
-                   (!strcmp(".nospec_call_table", secname)))
+                   (!strncmp(".s390_indirect", secname, 14)))
                        nospec_revert(aseg, aseg + s->sh_size);
 
                if (IS_ENABLED(CONFIG_EXPOLINE) &&
-                   (!strcmp(".nospec_return_table", secname)))
+                   (!strncmp(".s390_return", secname, 12)))
                        nospec_revert(aseg, aseg + s->sh_size);
        }
 
index 46d49a11663f5915753c42ece6c0f56870889b80..8ad6a7128b3a5eba73345e2365613284466e7489 100644 (file)
@@ -1,7 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <linux/module.h>
 #include <linux/device.h>
-#include <linux/cpu.h>
 #include <asm/nospec-branch.h>
 
 static int __init nobp_setup_early(char *str)
@@ -44,24 +43,6 @@ static int __init nospec_report(void)
 }
 arch_initcall(nospec_report);
 
-#ifdef CONFIG_SYSFS
-ssize_t cpu_show_spectre_v1(struct device *dev,
-                           struct device_attribute *attr, char *buf)
-{
-       return sprintf(buf, "Mitigation: __user pointer sanitization\n");
-}
-
-ssize_t cpu_show_spectre_v2(struct device *dev,
-                           struct device_attribute *attr, char *buf)
-{
-       if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable)
-               return sprintf(buf, "Mitigation: execute trampolines\n");
-       if (__test_facility(82, S390_lowcore.alt_stfle_fac_list))
-               return sprintf(buf, "Mitigation: limited branch prediction.\n");
-       return sprintf(buf, "Vulnerable\n");
-}
-#endif
-
 #ifdef CONFIG_EXPOLINE
 
 int nospec_disable = IS_ENABLED(CONFIG_EXPOLINE_OFF);
@@ -112,7 +93,6 @@ static void __init_or_module __nospec_revert(s32 *start, s32 *end)
        s32 *epo;
 
        /* Second part of the instruction replace is always a nop */
-       memcpy(insnbuf + 2, (char[]) { 0x47, 0x00, 0x00, 0x00 }, 4);
        for (epo = start; epo < end; epo++) {
                instr = (u8 *) epo + *epo;
                if (instr[0] == 0xc0 && (instr[1] & 0x0f) == 0x04)
@@ -133,18 +113,34 @@ static void __init_or_module __nospec_revert(s32 *start, s32 *end)
                        br = thunk + (*(int *)(thunk + 2)) * 2;
                else
                        continue;
-               if (br[0] != 0x07 || (br[1] & 0xf0) != 0xf0)
+               /* Check for unconditional branch 0x07f? or 0x47f???? */
+               if ((br[0] & 0xbf) != 0x07 || (br[1] & 0xf0) != 0xf0)
                        continue;
+
+               memcpy(insnbuf + 2, (char[]) { 0x47, 0x00, 0x07, 0x00 }, 4);
                switch (type) {
                case BRCL_EXPOLINE:
-                       /* brcl to thunk, replace with br + nop */
                        insnbuf[0] = br[0];
                        insnbuf[1] = (instr[1] & 0xf0) | (br[1] & 0x0f);
+                       if (br[0] == 0x47) {
+                               /* brcl to b, replace with bc + nopr */
+                               insnbuf[2] = br[2];
+                               insnbuf[3] = br[3];
+                       } else {
+                               /* brcl to br, replace with bcr + nop */
+                       }
                        break;
                case BRASL_EXPOLINE:
-                       /* brasl to thunk, replace with basr + nop */
-                       insnbuf[0] = 0x0d;
                        insnbuf[1] = (instr[1] & 0xf0) | (br[1] & 0x0f);
+                       if (br[0] == 0x47) {
+                               /* brasl to b, replace with bas + nopr */
+                               insnbuf[0] = 0x4d;
+                               insnbuf[2] = br[2];
+                               insnbuf[3] = br[3];
+                       } else {
+                               /* brasl to br, replace with basr + nop */
+                               insnbuf[0] = 0x0d;
+                       }
                        break;
                }
 
diff --git a/arch/s390/kernel/nospec-sysfs.c b/arch/s390/kernel/nospec-sysfs.c
new file mode 100644 (file)
index 0000000..8affad5
--- /dev/null
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/device.h>
+#include <linux/cpu.h>
+#include <asm/facility.h>
+#include <asm/nospec-branch.h>
+
+ssize_t cpu_show_spectre_v1(struct device *dev,
+                           struct device_attribute *attr, char *buf)
+{
+       return sprintf(buf, "Mitigation: __user pointer sanitization\n");
+}
+
+ssize_t cpu_show_spectre_v2(struct device *dev,
+                           struct device_attribute *attr, char *buf)
+{
+       if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable)
+               return sprintf(buf, "Mitigation: execute trampolines\n");
+       if (__test_facility(82, S390_lowcore.alt_stfle_fac_list))
+               return sprintf(buf, "Mitigation: limited branch prediction\n");
+       return sprintf(buf, "Vulnerable\n");
+}
index 5ee27dc9a10cf454f08c9430935aa1363121a608..feebb294488203b77899f5415f841e12cefedf29 100644 (file)
@@ -123,7 +123,7 @@ CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES_IV, 0x00a1);
 CPUMF_EVENT_ATTR(cf_zec12, TX_NC_TABORT, 0x00b1);
 CPUMF_EVENT_ATTR(cf_zec12, TX_C_TABORT_NO_SPECIAL, 0x00b2);
 CPUMF_EVENT_ATTR(cf_zec12, TX_C_TABORT_SPECIAL, 0x00b3);
-CPUMF_EVENT_ATTR(cf_z13, L1D_WRITES_RO_EXCL, 0x0080);
+CPUMF_EVENT_ATTR(cf_z13, L1D_RO_EXCL_WRITES, 0x0080);
 CPUMF_EVENT_ATTR(cf_z13, DTLB1_WRITES, 0x0081);
 CPUMF_EVENT_ATTR(cf_z13, DTLB1_MISSES, 0x0082);
 CPUMF_EVENT_ATTR(cf_z13, DTLB1_HPAGE_WRITES, 0x0083);
@@ -179,7 +179,7 @@ CPUMF_EVENT_ATTR(cf_z13, TX_C_TABORT_NO_SPECIAL, 0x00db);
 CPUMF_EVENT_ATTR(cf_z13, TX_C_TABORT_SPECIAL, 0x00dc);
 CPUMF_EVENT_ATTR(cf_z13, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0);
 CPUMF_EVENT_ATTR(cf_z13, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1);
-CPUMF_EVENT_ATTR(cf_z14, L1D_WRITES_RO_EXCL, 0x0080);
+CPUMF_EVENT_ATTR(cf_z14, L1D_RO_EXCL_WRITES, 0x0080);
 CPUMF_EVENT_ATTR(cf_z14, DTLB2_WRITES, 0x0081);
 CPUMF_EVENT_ATTR(cf_z14, DTLB2_MISSES, 0x0082);
 CPUMF_EVENT_ATTR(cf_z14, DTLB2_HPAGE_WRITES, 0x0083);
@@ -371,7 +371,7 @@ static struct attribute *cpumcf_zec12_pmu_event_attr[] __initdata = {
 };
 
 static struct attribute *cpumcf_z13_pmu_event_attr[] __initdata = {
-       CPUMF_EVENT_PTR(cf_z13, L1D_WRITES_RO_EXCL),
+       CPUMF_EVENT_PTR(cf_z13, L1D_RO_EXCL_WRITES),
        CPUMF_EVENT_PTR(cf_z13, DTLB1_WRITES),
        CPUMF_EVENT_PTR(cf_z13, DTLB1_MISSES),
        CPUMF_EVENT_PTR(cf_z13, DTLB1_HPAGE_WRITES),
@@ -431,7 +431,7 @@ static struct attribute *cpumcf_z13_pmu_event_attr[] __initdata = {
 };
 
 static struct attribute *cpumcf_z14_pmu_event_attr[] __initdata = {
-       CPUMF_EVENT_PTR(cf_z14, L1D_WRITES_RO_EXCL),
+       CPUMF_EVENT_PTR(cf_z14, L1D_RO_EXCL_WRITES),
        CPUMF_EVENT_PTR(cf_z14, DTLB2_WRITES),
        CPUMF_EVENT_PTR(cf_z14, DTLB2_MISSES),
        CPUMF_EVENT_PTR(cf_z14, DTLB2_HPAGE_WRITES),
index 1c9ddd7aa5ec8fd32ee626d036a3c3ea6ed79362..0292d68e7dded707496b7090c9b2d7377aab8b2b 100644 (file)
@@ -753,6 +753,10 @@ static int __hw_perf_event_init(struct perf_event *event)
         */
        rate = 0;
        if (attr->freq) {
+               if (!attr->sample_freq) {
+                       err = -EINVAL;
+                       goto out;
+               }
                rate = freq_to_sample_rate(&si, attr->sample_freq);
                rate = hw_limit_rate(&si, rate);
                attr->freq = 0;
index 70576a2f69cf6851ea2917197ed418de1459bf44..6e758bb6cd29b70821bec49fd69b6d8d76111d2e 100644 (file)
@@ -29,6 +29,7 @@
 #include <linux/random.h>
 #include <linux/export.h>
 #include <linux/init_task.h>
+#include <asm/cpu_mf.h>
 #include <asm/io.h>
 #include <asm/processor.h>
 #include <asm/vtimer.h>
@@ -48,6 +49,15 @@ void flush_thread(void)
 {
 }
 
+void arch_setup_new_exec(void)
+{
+       if (S390_lowcore.current_pid != current->pid) {
+               S390_lowcore.current_pid = current->pid;
+               if (test_facility(40))
+                       lpp(&S390_lowcore.lpp);
+       }
+}
+
 void arch_release_task_struct(struct task_struct *tsk)
 {
        runtime_instr_release(tsk);
index 73cc3750f0d3414c56c464585866dbcf738d88f7..7f14adf512c6d229cd4d68dac2c51c8c3f1fe643 100644 (file)
@@ -7,8 +7,11 @@
 
 #include <linux/linkage.h>
 #include <asm/asm-offsets.h>
+#include <asm/nospec-insn.h>
 #include <asm/sigp.h>
 
+       GEN_BR_THUNK %r9
+
 #
 # Issue "store status" for the current CPU to its prefix page
 # and call passed function afterwards
@@ -67,9 +70,9 @@ ENTRY(store_status)
        st      %r4,0(%r1)
        st      %r5,4(%r1)
        stg     %r2,8(%r1)
-       lgr     %r1,%r2
+       lgr     %r9,%r2
        lgr     %r2,%r3
-       br      %r1
+       BR_EX   %r9
 
        .section .bss
        .align  8
index e99187149f1717f1ec81c94ea12cc77fa964c2fd..a049a7b9d6e893801a1ecd79d9332d3faea8d0ba 100644 (file)
@@ -13,6 +13,7 @@
 #include <asm/ptrace.h>
 #include <asm/thread_info.h>
 #include <asm/asm-offsets.h>
+#include <asm/nospec-insn.h>
 #include <asm/sigp.h>
 
 /*
@@ -24,6 +25,8 @@
  * (see below) in the resume process.
  * This function runs with disabled interrupts.
  */
+       GEN_BR_THUNK %r14
+
        .section .text
 ENTRY(swsusp_arch_suspend)
        stmg    %r6,%r15,__SF_GPRS(%r15)
@@ -103,7 +106,7 @@ ENTRY(swsusp_arch_suspend)
        spx     0x318(%r1)
        lmg     %r6,%r15,STACK_FRAME_OVERHEAD + __SF_GPRS(%r15)
        lghi    %r2,0
-       br      %r14
+       BR_EX   %r14
 
 /*
  * Restore saved memory image to correct place and restore register context.
@@ -197,11 +200,10 @@ pgm_check_entry:
        larl    %r15,init_thread_union
        ahi     %r15,1<<(PAGE_SHIFT+THREAD_SIZE_ORDER)
        larl    %r2,.Lpanic_string
-       larl    %r3,sclp_early_printk
        lghi    %r1,0
        sam31
        sigp    %r1,%r0,SIGP_SET_ARCHITECTURE
-       basr    %r14,%r3
+       brasl   %r14,sclp_early_printk
        larl    %r3,.Ldisabled_wait_31
        lpsw    0(%r3)
 4:
@@ -267,7 +269,7 @@ restore_registers:
        /* Return 0 */
        lmg     %r6,%r15,STACK_FRAME_OVERHEAD + __SF_GPRS(%r15)
        lghi    %r2,0
-       br      %r14
+       BR_EX   %r14
 
        .section .data..nosave,"aw",@progbits
        .align  8
index d9d1f512f019415ed36912e7afd81156fbd284ae..5007fac01bb5e215392362768898cc829cc121d0 100644 (file)
@@ -150,6 +150,15 @@ unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline,
        return orig;
 }
 
+bool arch_uretprobe_is_alive(struct return_instance *ret, enum rp_check ctx,
+                            struct pt_regs *regs)
+{
+       if (ctx == RP_CHECK_CHAIN_CALL)
+               return user_stack_pointer(regs) <= ret->stack;
+       else
+               return user_stack_pointer(regs) < ret->stack;
+}
+
 /* Instruction Emulation */
 
 static void adjust_psw_addr(psw_t *psw, unsigned long len)
index 495c9c4bacc7b34bcfe4d966adeb28431b1dc032..2311f15be9cf04b3bad1f766453e6ff970d53a83 100644 (file)
@@ -7,6 +7,9 @@
 
 #include <linux/linkage.h>
 #include <asm/export.h>
+#include <asm/nospec-insn.h>
+
+       GEN_BR_THUNK %r14
 
 /*
  * void *memmove(void *dest, const void *src, size_t n)
@@ -33,14 +36,14 @@ ENTRY(memmove)
 .Lmemmove_forward_remainder:
        larl    %r5,.Lmemmove_mvc
        ex      %r4,0(%r5)
-       br      %r14
+       BR_EX   %r14
 .Lmemmove_reverse:
        ic      %r0,0(%r4,%r3)
        stc     %r0,0(%r4,%r1)
        brctg   %r4,.Lmemmove_reverse
        ic      %r0,0(%r4,%r3)
        stc     %r0,0(%r4,%r1)
-       br      %r14
+       BR_EX   %r14
 .Lmemmove_mvc:
        mvc     0(1,%r1),0(%r3)
 EXPORT_SYMBOL(memmove)
@@ -77,7 +80,7 @@ ENTRY(memset)
 .Lmemset_clear_remainder:
        larl    %r3,.Lmemset_xc
        ex      %r4,0(%r3)
-       br      %r14
+       BR_EX   %r14
 .Lmemset_fill:
        cghi    %r4,1
        lgr     %r1,%r2
@@ -95,10 +98,10 @@ ENTRY(memset)
        stc     %r3,0(%r1)
        larl    %r5,.Lmemset_mvc
        ex      %r4,0(%r5)
-       br      %r14
+       BR_EX   %r14
 .Lmemset_fill_exit:
        stc     %r3,0(%r1)
-       br      %r14
+       BR_EX   %r14
 .Lmemset_xc:
        xc      0(1,%r1),0(%r1)
 .Lmemset_mvc:
@@ -121,7 +124,7 @@ ENTRY(memcpy)
 .Lmemcpy_remainder:
        larl    %r5,.Lmemcpy_mvc
        ex      %r4,0(%r5)
-       br      %r14
+       BR_EX   %r14
 .Lmemcpy_loop:
        mvc     0(256,%r1),0(%r3)
        la      %r1,256(%r1)
@@ -159,10 +162,10 @@ ENTRY(__memset\bits)
        \insn   %r3,0(%r1)
        larl    %r5,.L__memset_mvc\bits
        ex      %r4,0(%r5)
-       br      %r14
+       BR_EX   %r14
 .L__memset_exit\bits:
        \insn   %r3,0(%r2)
-       br      %r14
+       BR_EX   %r14
 .L__memset_mvc\bits:
        mvc     \bytes(1,%r1),0(%r1)
 .endm
index e0d5f245e42bc713443d5c6d09d9034850adbec6..d4663b4bf509894e62c3b02c69726ee5717c2dd4 100644 (file)
@@ -2,4 +2,4 @@
 #
 # Arch-specific network modules
 #
-obj-$(CONFIG_BPF_JIT) += bpf_jit.o bpf_jit_comp.o
+obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o
diff --git a/arch/s390/net/bpf_jit.S b/arch/s390/net/bpf_jit.S
deleted file mode 100644 (file)
index 25bb464..0000000
+++ /dev/null
@@ -1,116 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * BPF Jit compiler for s390, help functions.
- *
- * Copyright IBM Corp. 2012,2015
- *
- * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
- *           Michael Holzheu <holzheu@linux.vnet.ibm.com>
- */
-
-#include <linux/linkage.h>
-#include "bpf_jit.h"
-
-/*
- * Calling convention:
- * registers %r7-%r10, %r11,%r13, and %r15 are call saved
- *
- * Input (64 bit):
- *   %r3 (%b2) = offset into skb data
- *   %r6 (%b5) = return address
- *   %r7 (%b6) = skb pointer
- *   %r12      = skb data pointer
- *
- * Output:
- *   %r14= %b0 = return value (read skb value)
- *
- * Work registers: %r2,%r4,%r5,%r14
- *
- * skb_copy_bits takes 4 parameters:
- *   %r2 = skb pointer
- *   %r3 = offset into skb data
- *   %r4 = pointer to temp buffer
- *   %r5 = length to copy
- *   Return value in %r2: 0 = ok
- *
- * bpf_internal_load_pointer_neg_helper takes 3 parameters:
- *   %r2 = skb pointer
- *   %r3 = offset into data
- *   %r4 = length to copy
- *   Return value in %r2: Pointer to data
- */
-
-#define SKF_MAX_NEG_OFF        -0x200000       /* SKF_LL_OFF from filter.h */
-
-/*
- * Load SIZE bytes from SKB
- */
-#define sk_load_common(NAME, SIZE, LOAD)                               \
-ENTRY(sk_load_##NAME);                                                 \
-       ltgr    %r3,%r3;                /* Is offset negative? */       \
-       jl      sk_load_##NAME##_slow_neg;                              \
-ENTRY(sk_load_##NAME##_pos);                                           \
-       aghi    %r3,SIZE;               /* Offset + SIZE */             \
-       clg     %r3,STK_OFF_HLEN(%r15); /* Offset + SIZE > hlen? */     \
-       jh      sk_load_##NAME##_slow;                                  \
-       LOAD    %r14,-SIZE(%r3,%r12);   /* Get data from skb */         \
-       b       OFF_OK(%r6);            /* Return */                    \
-                                                                       \
-sk_load_##NAME##_slow:;                                                        \
-       lgr     %r2,%r7;                /* Arg1 = skb pointer */        \
-       aghi    %r3,-SIZE;              /* Arg2 = offset */             \
-       la      %r4,STK_OFF_TMP(%r15);  /* Arg3 = temp bufffer */       \
-       lghi    %r5,SIZE;               /* Arg4 = size */               \
-       brasl   %r14,skb_copy_bits;     /* Get data from skb */         \
-       LOAD    %r14,STK_OFF_TMP(%r15); /* Load from temp bufffer */    \
-       ltgr    %r2,%r2;                /* Set cc to (%r2 != 0) */      \
-       br      %r6;                    /* Return */
-
-sk_load_common(word, 4, llgf)  /* r14 = *(u32 *) (skb->data+offset) */
-sk_load_common(half, 2, llgh)  /* r14 = *(u16 *) (skb->data+offset) */
-
-/*
- * Load 1 byte from SKB (optimized version)
- */
-       /* r14 = *(u8 *) (skb->data+offset) */
-ENTRY(sk_load_byte)
-       ltgr    %r3,%r3                 # Is offset negative?
-       jl      sk_load_byte_slow_neg
-ENTRY(sk_load_byte_pos)
-       clg     %r3,STK_OFF_HLEN(%r15)  # Offset >= hlen?
-       jnl     sk_load_byte_slow
-       llgc    %r14,0(%r3,%r12)        # Get byte from skb
-       b       OFF_OK(%r6)             # Return OK
-
-sk_load_byte_slow:
-       lgr     %r2,%r7                 # Arg1 = skb pointer
-                                       # Arg2 = offset
-       la      %r4,STK_OFF_TMP(%r15)   # Arg3 = pointer to temp buffer
-       lghi    %r5,1                   # Arg4 = size (1 byte)
-       brasl   %r14,skb_copy_bits      # Get data from skb
-       llgc    %r14,STK_OFF_TMP(%r15)  # Load result from temp buffer
-       ltgr    %r2,%r2                 # Set cc to (%r2 != 0)
-       br      %r6                     # Return cc
-
-#define sk_negative_common(NAME, SIZE, LOAD)                           \
-sk_load_##NAME##_slow_neg:;                                            \
-       cgfi    %r3,SKF_MAX_NEG_OFF;                                    \
-       jl      bpf_error;                                              \
-       lgr     %r2,%r7;                /* Arg1 = skb pointer */        \
-                                       /* Arg2 = offset */             \
-       lghi    %r4,SIZE;               /* Arg3 = size */               \
-       brasl   %r14,bpf_internal_load_pointer_neg_helper;              \
-       ltgr    %r2,%r2;                                                \
-       jz      bpf_error;                                              \
-       LOAD    %r14,0(%r2);            /* Get data from pointer */     \
-       xr      %r3,%r3;                /* Set cc to zero */            \
-       br      %r6;                    /* Return cc */
-
-sk_negative_common(word, 4, llgf)
-sk_negative_common(half, 2, llgh)
-sk_negative_common(byte, 1, llgc)
-
-bpf_error:
-# force a return 0 from jit handler
-       ltgr    %r15,%r15       # Set condition code
-       br      %r6
index 5e1e5133132de8f0964e3781dffd3d231ce610aa..7822ea92e54afd08ed63f8de266e535f81e41880 100644 (file)
@@ -16,9 +16,6 @@
 #include <linux/filter.h>
 #include <linux/types.h>
 
-extern u8 sk_load_word_pos[], sk_load_half_pos[], sk_load_byte_pos[];
-extern u8 sk_load_word[], sk_load_half[], sk_load_byte[];
-
 #endif /* __ASSEMBLY__ */
 
 /*
@@ -36,15 +33,6 @@ extern u8 sk_load_word[], sk_load_half[], sk_load_byte[];
  *           |               |     |
  *           |   BPF stack   |     |
  *           |               |     |
- *           +---------------+     |
- *           | 8 byte skbp   |     |
- * R15+176 -> +---------------+     |
- *           | 8 byte hlen   |     |
- * R15+168 -> +---------------+     |
- *           | 4 byte align  |     |
- *           +---------------+     |
- *           | 4 byte temp   |     |
- *           | for bpf_jit.S |     |
  * R15+160 -> +---------------+     |
  *           | new backchain |     |
  * R15+152 -> +---------------+     |
@@ -57,17 +45,11 @@ extern u8 sk_load_word[], sk_load_half[], sk_load_byte[];
  * The stack size used by the BPF program ("BPF stack" above) is passed
  * via "aux->stack_depth".
  */
-#define STK_SPACE_ADD (8 + 8 + 4 + 4 + 160)
+#define STK_SPACE_ADD  (160)
 #define STK_160_UNUSED (160 - 12 * 8)
 #define STK_OFF                (STK_SPACE_ADD - STK_160_UNUSED)
-#define STK_OFF_TMP    160     /* Offset of tmp buffer on stack */
-#define STK_OFF_HLEN   168     /* Offset of SKB header length on stack */
-#define STK_OFF_SKBP   176     /* Offset of SKB pointer on stack */
 
 #define STK_OFF_R6     (160 - 11 * 8)  /* Offset of r6 on stack */
 #define STK_OFF_TCCNT  (160 - 12 * 8)  /* Offset of tail_call_cnt on stack */
 
-/* Offset to skip condition code check */
-#define OFF_OK         4
-
 #endif /* __ARCH_S390_NET_BPF_JIT_H */
index 78a19c93b3802ff48b4c27a90d6428100fc5566f..d2db8acb1a55480895e38fdf142c3d074610230d 100644 (file)
@@ -25,6 +25,8 @@
 #include <linux/bpf.h>
 #include <asm/cacheflush.h>
 #include <asm/dis.h>
+#include <asm/facility.h>
+#include <asm/nospec-branch.h>
 #include <asm/set_memory.h>
 #include "bpf_jit.h"
 
@@ -41,29 +43,29 @@ struct bpf_jit {
        int base_ip;            /* Base address for literal pool */
        int ret0_ip;            /* Address of return 0 */
        int exit_ip;            /* Address of exit */
+       int r1_thunk_ip;        /* Address of expoline thunk for 'br %r1' */
+       int r14_thunk_ip;       /* Address of expoline thunk for 'br %r14' */
        int tail_call_start;    /* Tail call start offset */
        int labels[1];          /* Labels for local jumps */
 };
 
 #define BPF_SIZE_MAX   0xffff  /* Max size for program (16 bit branches) */
 
-#define SEEN_SKB       1       /* skb access */
-#define SEEN_MEM       2       /* use mem[] for temporary storage */
-#define SEEN_RET0      4       /* ret0_ip points to a valid return 0 */
-#define SEEN_LITERAL   8       /* code uses literals */
-#define SEEN_FUNC      16      /* calls C functions */
-#define SEEN_TAIL_CALL 32      /* code uses tail calls */
-#define SEEN_REG_AX    64      /* code uses constant blinding */
-#define SEEN_STACK     (SEEN_FUNC | SEEN_MEM | SEEN_SKB)
+#define SEEN_MEM       (1 << 0)        /* use mem[] for temporary storage */
+#define SEEN_RET0      (1 << 1)        /* ret0_ip points to a valid return 0 */
+#define SEEN_LITERAL   (1 << 2)        /* code uses literals */
+#define SEEN_FUNC      (1 << 3)        /* calls C functions */
+#define SEEN_TAIL_CALL (1 << 4)        /* code uses tail calls */
+#define SEEN_REG_AX    (1 << 5)        /* code uses constant blinding */
+#define SEEN_STACK     (SEEN_FUNC | SEEN_MEM)
 
 /*
  * s390 registers
  */
 #define REG_W0         (MAX_BPF_JIT_REG + 0)   /* Work register 1 (even) */
 #define REG_W1         (MAX_BPF_JIT_REG + 1)   /* Work register 2 (odd) */
-#define REG_SKB_DATA   (MAX_BPF_JIT_REG + 2)   /* SKB data register */
-#define REG_L          (MAX_BPF_JIT_REG + 3)   /* Literal pool register */
-#define REG_15         (MAX_BPF_JIT_REG + 4)   /* Register 15 */
+#define REG_L          (MAX_BPF_JIT_REG + 2)   /* Literal pool register */
+#define REG_15         (MAX_BPF_JIT_REG + 3)   /* Register 15 */
 #define REG_0          REG_W0                  /* Register 0 */
 #define REG_1          REG_W1                  /* Register 1 */
 #define REG_2          BPF_REG_1               /* Register 2 */
@@ -88,10 +90,8 @@ static const int reg2hex[] = {
        [BPF_REG_9]     = 10,
        /* BPF stack pointer */
        [BPF_REG_FP]    = 13,
-       /* Register for blinding (shared with REG_SKB_DATA) */
+       /* Register for blinding */
        [BPF_REG_AX]    = 12,
-       /* SKB data pointer */
-       [REG_SKB_DATA]  = 12,
        /* Work registers for s390x backend */
        [REG_W0]        = 0,
        [REG_W1]        = 1,
@@ -250,6 +250,19 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
        REG_SET_SEEN(b2);                                       \
 })
 
+#define EMIT6_PCREL_RILB(op, b, target)                                \
+({                                                             \
+       int rel = (target - jit->prg) / 2;                      \
+       _EMIT6(op | reg_high(b) << 16 | rel >> 16, rel & 0xffff);       \
+       REG_SET_SEEN(b);                                        \
+})
+
+#define EMIT6_PCREL_RIL(op, target)                            \
+({                                                             \
+       int rel = (target - jit->prg) / 2;                      \
+       _EMIT6(op | rel >> 16, rel & 0xffff);                   \
+})
+
 #define _EMIT6_IMM(op, imm)                                    \
 ({                                                             \
        unsigned int __imm = (imm);                             \
@@ -384,27 +397,6 @@ static void save_restore_regs(struct bpf_jit *jit, int op, u32 stack_depth)
        } while (re <= 15);
 }
 
-/*
- * For SKB access %b1 contains the SKB pointer. For "bpf_jit.S"
- * we store the SKB header length on the stack and the SKB data
- * pointer in REG_SKB_DATA if BPF_REG_AX is not used.
- */
-static void emit_load_skb_data_hlen(struct bpf_jit *jit)
-{
-       /* Header length: llgf %w1,<len>(%b1) */
-       EMIT6_DISP_LH(0xe3000000, 0x0016, REG_W1, REG_0, BPF_REG_1,
-                     offsetof(struct sk_buff, len));
-       /* s %w1,<data_len>(%b1) */
-       EMIT4_DISP(0x5b000000, REG_W1, BPF_REG_1,
-                  offsetof(struct sk_buff, data_len));
-       /* stg %w1,ST_OFF_HLEN(%r0,%r15) */
-       EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0, REG_15, STK_OFF_HLEN);
-       if (!(jit->seen & SEEN_REG_AX))
-               /* lg %skb_data,data_off(%b1) */
-               EMIT6_DISP_LH(0xe3000000, 0x0004, REG_SKB_DATA, REG_0,
-                             BPF_REG_1, offsetof(struct sk_buff, data));
-}
-
 /*
  * Emit function prologue
  *
@@ -445,12 +437,6 @@ static void bpf_jit_prologue(struct bpf_jit *jit, u32 stack_depth)
                        EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0,
                                      REG_15, 152);
        }
-       if (jit->seen & SEEN_SKB) {
-               emit_load_skb_data_hlen(jit);
-               /* stg %b1,ST_OFF_SKBP(%r0,%r15) */
-               EMIT6_DISP_LH(0xe3000000, 0x0024, BPF_REG_1, REG_0, REG_15,
-                             STK_OFF_SKBP);
-       }
 }
 
 /*
@@ -469,8 +455,45 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth)
        EMIT4(0xb9040000, REG_2, BPF_REG_0);
        /* Restore registers */
        save_restore_regs(jit, REGS_RESTORE, stack_depth);
+       if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable) {
+               jit->r14_thunk_ip = jit->prg;
+               /* Generate __s390_indirect_jump_r14 thunk */
+               if (test_facility(35)) {
+                       /* exrl %r0,.+10 */
+                       EMIT6_PCREL_RIL(0xc6000000, jit->prg + 10);
+               } else {
+                       /* larl %r1,.+14 */
+                       EMIT6_PCREL_RILB(0xc0000000, REG_1, jit->prg + 14);
+                       /* ex 0,0(%r1) */
+                       EMIT4_DISP(0x44000000, REG_0, REG_1, 0);
+               }
+               /* j . */
+               EMIT4_PCREL(0xa7f40000, 0);
+       }
        /* br %r14 */
        _EMIT2(0x07fe);
+
+       if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable &&
+           (jit->seen & SEEN_FUNC)) {
+               jit->r1_thunk_ip = jit->prg;
+               /* Generate __s390_indirect_jump_r1 thunk */
+               if (test_facility(35)) {
+                       /* exrl %r0,.+10 */
+                       EMIT6_PCREL_RIL(0xc6000000, jit->prg + 10);
+                       /* j . */
+                       EMIT4_PCREL(0xa7f40000, 0);
+                       /* br %r1 */
+                       _EMIT2(0x07f1);
+               } else {
+                       /* larl %r1,.+14 */
+                       EMIT6_PCREL_RILB(0xc0000000, REG_1, jit->prg + 14);
+                       /* ex 0,S390_lowcore.br_r1_tampoline */
+                       EMIT4_DISP(0x44000000, REG_0, REG_0,
+                                  offsetof(struct lowcore, br_r1_trampoline));
+                       /* j . */
+                       EMIT4_PCREL(0xa7f40000, 0);
+               }
+       }
 }
 
 /*
@@ -483,12 +506,12 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
 {
        struct bpf_insn *insn = &fp->insnsi[i];
        int jmp_off, last, insn_count = 1;
-       unsigned int func_addr, mask;
        u32 dst_reg = insn->dst_reg;
        u32 src_reg = insn->src_reg;
        u32 *addrs = jit->addrs;
        s32 imm = insn->imm;
        s16 off = insn->off;
+       unsigned int mask;
 
        if (dst_reg == BPF_REG_AX || src_reg == BPF_REG_AX)
                jit->seen |= SEEN_REG_AX;
@@ -966,17 +989,15 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
                /* lg %w1,<d(imm)>(%l) */
                EMIT6_DISP_LH(0xe3000000, 0x0004, REG_W1, REG_0, REG_L,
                              EMIT_CONST_U64(func));
-               /* basr %r14,%w1 */
-               EMIT2(0x0d00, REG_14, REG_W1);
+               if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable) {
+                       /* brasl %r14,__s390_indirect_jump_r1 */
+                       EMIT6_PCREL_RILB(0xc0050000, REG_14, jit->r1_thunk_ip);
+               } else {
+                       /* basr %r14,%w1 */
+                       EMIT2(0x0d00, REG_14, REG_W1);
+               }
                /* lgr %b0,%r2: load return value into %b0 */
                EMIT4(0xb9040000, BPF_REG_0, REG_2);
-               if ((jit->seen & SEEN_SKB) &&
-                   bpf_helper_changes_pkt_data((void *)func)) {
-                       /* lg %b1,ST_OFF_SKBP(%r15) */
-                       EMIT6_DISP_LH(0xe3000000, 0x0004, BPF_REG_1, REG_0,
-                                     REG_15, STK_OFF_SKBP);
-                       emit_load_skb_data_hlen(jit);
-               }
                break;
        }
        case BPF_JMP | BPF_TAIL_CALL:
@@ -1176,73 +1197,6 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
                jmp_off = addrs[i + off + 1] - (addrs[i + 1] - 4);
                EMIT4_PCREL(0xa7040000 | mask << 8, jmp_off);
                break;
-       /*
-        * BPF_LD
-        */
-       case BPF_LD | BPF_ABS | BPF_B: /* b0 = *(u8 *) (skb->data+imm) */
-       case BPF_LD | BPF_IND | BPF_B: /* b0 = *(u8 *) (skb->data+imm+src) */
-               if ((BPF_MODE(insn->code) == BPF_ABS) && (imm >= 0))
-                       func_addr = __pa(sk_load_byte_pos);
-               else
-                       func_addr = __pa(sk_load_byte);
-               goto call_fn;
-       case BPF_LD | BPF_ABS | BPF_H: /* b0 = *(u16 *) (skb->data+imm) */
-       case BPF_LD | BPF_IND | BPF_H: /* b0 = *(u16 *) (skb->data+imm+src) */
-               if ((BPF_MODE(insn->code) == BPF_ABS) && (imm >= 0))
-                       func_addr = __pa(sk_load_half_pos);
-               else
-                       func_addr = __pa(sk_load_half);
-               goto call_fn;
-       case BPF_LD | BPF_ABS | BPF_W: /* b0 = *(u32 *) (skb->data+imm) */
-       case BPF_LD | BPF_IND | BPF_W: /* b0 = *(u32 *) (skb->data+imm+src) */
-               if ((BPF_MODE(insn->code) == BPF_ABS) && (imm >= 0))
-                       func_addr = __pa(sk_load_word_pos);
-               else
-                       func_addr = __pa(sk_load_word);
-               goto call_fn;
-call_fn:
-               jit->seen |= SEEN_SKB | SEEN_RET0 | SEEN_FUNC;
-               REG_SET_SEEN(REG_14); /* Return address of possible func call */
-
-               /*
-                * Implicit input:
-                *  BPF_REG_6    (R7) : skb pointer
-                *  REG_SKB_DATA (R12): skb data pointer (if no BPF_REG_AX)
-                *
-                * Calculated input:
-                *  BPF_REG_2    (R3) : offset of byte(s) to fetch in skb
-                *  BPF_REG_5    (R6) : return address
-                *
-                * Output:
-                *  BPF_REG_0    (R14): data read from skb
-                *
-                * Scratch registers (BPF_REG_1-5)
-                */
-
-               /* Call function: llilf %w1,func_addr  */
-               EMIT6_IMM(0xc00f0000, REG_W1, func_addr);
-
-               /* Offset: lgfi %b2,imm */
-               EMIT6_IMM(0xc0010000, BPF_REG_2, imm);
-               if (BPF_MODE(insn->code) == BPF_IND)
-                       /* agfr %b2,%src (%src is s32 here) */
-                       EMIT4(0xb9180000, BPF_REG_2, src_reg);
-
-               /* Reload REG_SKB_DATA if BPF_REG_AX is used */
-               if (jit->seen & SEEN_REG_AX)
-                       /* lg %skb_data,data_off(%b6) */
-                       EMIT6_DISP_LH(0xe3000000, 0x0004, REG_SKB_DATA, REG_0,
-                                     BPF_REG_6, offsetof(struct sk_buff, data));
-               /* basr %b5,%w1 (%b5 is call saved) */
-               EMIT2(0x0d00, BPF_REG_5, REG_W1);
-
-               /*
-                * Note: For fast access we jump directly after the
-                * jnz instruction from bpf_jit.S
-                */
-               /* jnz <ret0> */
-               EMIT4_PCREL(0xa7740000, jit->ret0_ip - jit->prg);
-               break;
        default: /* too complex, give up */
                pr_err("Unknown opcode %02x\n", insn->code);
                return -1;
index 97fe2931647648329c1b4b0bb88e8c3ac81e885f..1851eaeee13179d24dd1c362aa4c4b3e4f6c38df 100644 (file)
@@ -9,6 +9,7 @@ config SUPERH
        select HAVE_IDE if HAS_IOPORT_MAP
        select HAVE_MEMBLOCK
        select HAVE_MEMBLOCK_NODE_MAP
+       select NO_BOOTMEM
        select ARCH_DISCARD_MEMBLOCK
        select HAVE_OPROFILE
        select HAVE_GENERIC_DMA_COHERENT
index 4205f6d42b6938cb5a65946932672c3daf19abd4..a5bd03642678906a21e1d5fc8f853b4264bfd2e8 100644 (file)
@@ -43,7 +43,11 @@ void __ref cpu_probe(void)
 #endif
 
 #if defined(CONFIG_CPU_J2)
+#if defined(CONFIG_SMP)
        unsigned cpu = hard_smp_processor_id();
+#else
+       unsigned cpu = 0;
+#endif
        if (cpu == 0) of_scan_flat_dt(scan_cache, NULL);
        if (j2_ccr_base) __raw_writel(0x80000303, j2_ccr_base + 4*cpu);
        if (cpu != 0) return;
index d34e998b809f3461be121935359ba26e5613b8c7..c286cf5da6e770f5b7cbadcb4f815f986822e443 100644 (file)
@@ -11,7 +11,6 @@
 #include <linux/ioport.h>
 #include <linux/init.h>
 #include <linux/initrd.h>
-#include <linux/bootmem.h>
 #include <linux/console.h>
 #include <linux/root_dev.h>
 #include <linux/utsname.h>
index 8ce98691d82257fb61a99c3881506bb790e60217..f1b44697ad680253f2fe0a7236887a17eac9bae4 100644 (file)
@@ -59,7 +59,9 @@ void *dma_generic_alloc_coherent(struct device *dev, size_t size,
 
        split_page(pfn_to_page(virt_to_phys(ret) >> PAGE_SHIFT), order);
 
-       *dma_handle = virt_to_phys(ret) - PFN_PHYS(dev->dma_pfn_offset);
+       *dma_handle = virt_to_phys(ret);
+       if (!WARN_ON(!dev))
+               *dma_handle -= PFN_PHYS(dev->dma_pfn_offset);
 
        return ret_nocache;
 }
@@ -69,9 +71,12 @@ void dma_generic_free_coherent(struct device *dev, size_t size,
                               unsigned long attrs)
 {
        int order = get_order(size);
-       unsigned long pfn = (dma_handle >> PAGE_SHIFT) + dev->dma_pfn_offset;
+       unsigned long pfn = dma_handle >> PAGE_SHIFT;
        int k;
 
+       if (!WARN_ON(!dev))
+               pfn += dev->dma_pfn_offset;
+
        for (k = 0; k < (1 << order); k++)
                __free_pages(pfn_to_page(pfn + k), 0);
 
@@ -143,7 +148,7 @@ int __init platform_resource_setup_memory(struct platform_device *pdev,
        if (!memsize)
                return 0;
 
-       buf = dma_alloc_coherent(NULL, memsize, &dma_handle, GFP_KERNEL);
+       buf = dma_alloc_coherent(&pdev->dev, memsize, &dma_handle, GFP_KERNEL);
        if (!buf) {
                pr_warning("%s: unable to allocate memory\n", name);
                return -ENOMEM;
index ce0bbaa7e40403b37d0d6dbdf107693dd24a2613..4034035fbede8ae1f4101358b6703f28462f21d4 100644 (file)
@@ -211,59 +211,15 @@ void __init allocate_pgdat(unsigned int nid)
 
        NODE_DATA(nid) = __va(phys);
        memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
-
-       NODE_DATA(nid)->bdata = &bootmem_node_data[nid];
 #endif
 
        NODE_DATA(nid)->node_start_pfn = start_pfn;
        NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn;
 }
 
-static void __init bootmem_init_one_node(unsigned int nid)
-{
-       unsigned long total_pages, paddr;
-       unsigned long end_pfn;
-       struct pglist_data *p;
-
-       p = NODE_DATA(nid);
-
-       /* Nothing to do.. */
-       if (!p->node_spanned_pages)
-               return;
-
-       end_pfn = pgdat_end_pfn(p);
-
-       total_pages = bootmem_bootmap_pages(p->node_spanned_pages);
-
-       paddr = memblock_alloc(total_pages << PAGE_SHIFT, PAGE_SIZE);
-       if (!paddr)
-               panic("Can't allocate bootmap for nid[%d]\n", nid);
-
-       init_bootmem_node(p, paddr >> PAGE_SHIFT, p->node_start_pfn, end_pfn);
-
-       free_bootmem_with_active_regions(nid, end_pfn);
-
-       /*
-        * XXX Handle initial reservations for the system memory node
-        * only for the moment, we'll refactor this later for handling
-        * reservations in other nodes.
-        */
-       if (nid == 0) {
-               struct memblock_region *reg;
-
-               /* Reserve the sections we're already using. */
-               for_each_memblock(reserved, reg) {
-                       reserve_bootmem(reg->base, reg->size, BOOTMEM_DEFAULT);
-               }
-       }
-
-       sparse_memory_present_with_active_regions(nid);
-}
-
 static void __init do_init_bootmem(void)
 {
        struct memblock_region *reg;
-       int i;
 
        /* Add active regions with valid PFNs. */
        for_each_memblock(memory, reg) {
@@ -279,9 +235,12 @@ static void __init do_init_bootmem(void)
 
        plat_mem_setup();
 
-       for_each_online_node(i)
-               bootmem_init_one_node(i);
+       for_each_memblock(memory, reg) {
+               int nid = memblock_get_region_node(reg);
 
+               memory_present(nid, memblock_region_memory_base_pfn(reg),
+                       memblock_region_memory_end_pfn(reg));
+       }
        sparse_init();
 }
 
@@ -322,7 +281,6 @@ void __init paging_init(void)
 {
        unsigned long max_zone_pfns[MAX_NR_ZONES];
        unsigned long vaddr, end;
-       int nid;
 
        sh_mv.mv_mem_init();
 
@@ -377,21 +335,7 @@ void __init paging_init(void)
        kmap_coherent_init();
 
        memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
-
-       for_each_online_node(nid) {
-               pg_data_t *pgdat = NODE_DATA(nid);
-               unsigned long low, start_pfn;
-
-               start_pfn = pgdat->bdata->node_min_pfn;
-               low = pgdat->bdata->node_low_pfn;
-
-               if (max_zone_pfns[ZONE_NORMAL] < low)
-                       max_zone_pfns[ZONE_NORMAL] = low;
-
-               printk("Node %u: start_pfn = 0x%lx, low = 0x%lx\n",
-                      nid, start_pfn, low);
-       }
-
+       max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
        free_area_init_nodes(max_zone_pfns);
 }
 
index 05713d190247c6842907a35c8f72b856ab1ce2dd..830e8b3684e4248417464934b8634e4d303f4a16 100644 (file)
@@ -8,7 +8,6 @@
  * for more details.
  */
 #include <linux/module.h>
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <linux/mm.h>
 #include <linux/numa.h>
@@ -26,9 +25,7 @@ EXPORT_SYMBOL_GPL(node_data);
  */
 void __init setup_bootmem_node(int nid, unsigned long start, unsigned long end)
 {
-       unsigned long bootmap_pages;
        unsigned long start_pfn, end_pfn;
-       unsigned long bootmem_paddr;
 
        /* Don't allow bogus node assignment */
        BUG_ON(nid >= MAX_NUMNODES || nid <= 0);
@@ -48,25 +45,9 @@ void __init setup_bootmem_node(int nid, unsigned long start, unsigned long end)
                                             SMP_CACHE_BYTES, end));
        memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
 
-       NODE_DATA(nid)->bdata = &bootmem_node_data[nid];
        NODE_DATA(nid)->node_start_pfn = start_pfn;
        NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn;
 
-       /* Node-local bootmap */
-       bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn);
-       bootmem_paddr = memblock_alloc_base(bootmap_pages << PAGE_SHIFT,
-                                      PAGE_SIZE, end);
-       init_bootmem_node(NODE_DATA(nid), bootmem_paddr >> PAGE_SHIFT,
-                         start_pfn, end_pfn);
-
-       free_bootmem_with_active_regions(nid, end_pfn);
-
-       /* Reserve the pgdat and bootmap space with the bootmem allocator */
-       reserve_bootmem_node(NODE_DATA(nid), start_pfn << PAGE_SHIFT,
-                            sizeof(struct pglist_data), BOOTMEM_DEFAULT);
-       reserve_bootmem_node(NODE_DATA(nid), bootmem_paddr,
-                            bootmap_pages << PAGE_SHIFT, BOOTMEM_DEFAULT);
-
        /* It's up */
        node_set_online(nid);
 
index 722951908b0a45c7dafd8a5861833b221ebe28d2..4f6676fe4bcc8981384928eec5a715f4d99e5225 100644 (file)
@@ -3,7 +3,7 @@
  *
  * This program is free software: you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
+ * the Free Software Foundation, either version 2 of the License, or
  * (at your option) any later version.
  *
  * This program is distributed in the hope that it will be useful,
index 1a0fa10cb6b721747b479651119b095d05893de0..32bae68e34c1b617c2e1547cbea9f609dc285d8f 100644 (file)
@@ -403,7 +403,7 @@ static struct vio_dev *vio_create_one(struct mdesc_handle *hp, u64 mp,
        if (err) {
                printk(KERN_ERR "VIO: Could not register device %s, err=%d\n",
                       dev_name(&vdev->dev), err);
-               kfree(vdev);
+               put_device(&vdev->dev);
                return NULL;
        }
        if (vdev->dp)
index 76fa8e95b721bbddcbc2f1693729bd7cfa56b0c7..d32aac3a25b853fcee81df03118f9d3c7adecd41 100644 (file)
@@ -1,4 +1,7 @@
 #
 # Arch-specific network modules
 #
-obj-$(CONFIG_BPF_JIT) += bpf_jit_asm_$(BITS).o bpf_jit_comp_$(BITS).o
+obj-$(CONFIG_BPF_JIT) += bpf_jit_comp_$(BITS).o
+ifeq ($(BITS),32)
+obj-$(CONFIG_BPF_JIT) += bpf_jit_asm_32.o
+endif
index 428f7fd1917551c2f1235f13fd8c33255fc95d7f..fbc836f1c51cdbe702dca74c86285f42bbf63a8b 100644 (file)
 #define I5             0x1d
 #define FP             0x1e
 #define I7             0x1f
-
-#define r_SKB          L0
-#define r_HEADLEN      L4
-#define r_SKB_DATA     L5
-#define r_TMP          G1
-#define r_TMP2         G3
-
-/* assembly code in arch/sparc/net/bpf_jit_asm_64.S */
-extern u32 bpf_jit_load_word[];
-extern u32 bpf_jit_load_half[];
-extern u32 bpf_jit_load_byte[];
-extern u32 bpf_jit_load_byte_msh[];
-extern u32 bpf_jit_load_word_positive_offset[];
-extern u32 bpf_jit_load_half_positive_offset[];
-extern u32 bpf_jit_load_byte_positive_offset[];
-extern u32 bpf_jit_load_byte_msh_positive_offset[];
-extern u32 bpf_jit_load_word_negative_offset[];
-extern u32 bpf_jit_load_half_negative_offset[];
-extern u32 bpf_jit_load_byte_negative_offset[];
-extern u32 bpf_jit_load_byte_msh_negative_offset[];
-
-#else
-#define r_RESULT       %o0
-#define r_SKB          %o0
-#define r_OFF          %o1
-#define r_HEADLEN      %l4
-#define r_SKB_DATA     %l5
-#define r_TMP          %g1
-#define r_TMP2         %g3
 #endif
 
 #endif /* _BPF_JIT_H */
diff --git a/arch/sparc/net/bpf_jit_asm_64.S b/arch/sparc/net/bpf_jit_asm_64.S
deleted file mode 100644 (file)
index 7177867..0000000
+++ /dev/null
@@ -1,162 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <asm/ptrace.h>
-
-#include "bpf_jit_64.h"
-
-#define SAVE_SZ                176
-#define SCRATCH_OFF    STACK_BIAS + 128
-#define BE_PTR(label)  be,pn %xcc, label
-#define SIGN_EXTEND(reg)       sra reg, 0, reg
-
-#define SKF_MAX_NEG_OFF        (-0x200000) /* SKF_LL_OFF from filter.h */
-
-       .text
-       .globl  bpf_jit_load_word
-bpf_jit_load_word:
-       cmp     r_OFF, 0
-       bl      bpf_slow_path_word_neg
-        nop
-       .globl  bpf_jit_load_word_positive_offset
-bpf_jit_load_word_positive_offset:
-       sub     r_HEADLEN, r_OFF, r_TMP
-       cmp     r_TMP, 3
-       ble     bpf_slow_path_word
-        add    r_SKB_DATA, r_OFF, r_TMP
-       andcc   r_TMP, 3, %g0
-       bne     load_word_unaligned
-        nop
-       retl
-        ld     [r_TMP], r_RESULT
-load_word_unaligned:
-       ldub    [r_TMP + 0x0], r_OFF
-       ldub    [r_TMP + 0x1], r_TMP2
-       sll     r_OFF, 8, r_OFF
-       or      r_OFF, r_TMP2, r_OFF
-       ldub    [r_TMP + 0x2], r_TMP2
-       sll     r_OFF, 8, r_OFF
-       or      r_OFF, r_TMP2, r_OFF
-       ldub    [r_TMP + 0x3], r_TMP2
-       sll     r_OFF, 8, r_OFF
-       retl
-        or     r_OFF, r_TMP2, r_RESULT
-
-       .globl  bpf_jit_load_half
-bpf_jit_load_half:
-       cmp     r_OFF, 0
-       bl      bpf_slow_path_half_neg
-        nop
-       .globl  bpf_jit_load_half_positive_offset
-bpf_jit_load_half_positive_offset:
-       sub     r_HEADLEN, r_OFF, r_TMP
-       cmp     r_TMP, 1
-       ble     bpf_slow_path_half
-        add    r_SKB_DATA, r_OFF, r_TMP
-       andcc   r_TMP, 1, %g0
-       bne     load_half_unaligned
-        nop
-       retl
-        lduh   [r_TMP], r_RESULT
-load_half_unaligned:
-       ldub    [r_TMP + 0x0], r_OFF
-       ldub    [r_TMP + 0x1], r_TMP2
-       sll     r_OFF, 8, r_OFF
-       retl
-        or     r_OFF, r_TMP2, r_RESULT
-
-       .globl  bpf_jit_load_byte
-bpf_jit_load_byte:
-       cmp     r_OFF, 0
-       bl      bpf_slow_path_byte_neg
-        nop
-       .globl  bpf_jit_load_byte_positive_offset
-bpf_jit_load_byte_positive_offset:
-       cmp     r_OFF, r_HEADLEN
-       bge     bpf_slow_path_byte
-        nop
-       retl
-        ldub   [r_SKB_DATA + r_OFF], r_RESULT
-
-#define bpf_slow_path_common(LEN)      \
-       save    %sp, -SAVE_SZ, %sp;     \
-       mov     %i0, %o0;               \
-       mov     %i1, %o1;               \
-       add     %fp, SCRATCH_OFF, %o2;  \
-       call    skb_copy_bits;          \
-        mov    (LEN), %o3;             \
-       cmp     %o0, 0;                 \
-       restore;
-
-bpf_slow_path_word:
-       bpf_slow_path_common(4)
-       bl      bpf_error
-        ld     [%sp + SCRATCH_OFF], r_RESULT
-       retl
-        nop
-bpf_slow_path_half:
-       bpf_slow_path_common(2)
-       bl      bpf_error
-        lduh   [%sp + SCRATCH_OFF], r_RESULT
-       retl
-        nop
-bpf_slow_path_byte:
-       bpf_slow_path_common(1)
-       bl      bpf_error
-        ldub   [%sp + SCRATCH_OFF], r_RESULT
-       retl
-        nop
-
-#define bpf_negative_common(LEN)                       \
-       save    %sp, -SAVE_SZ, %sp;                     \
-       mov     %i0, %o0;                               \
-       mov     %i1, %o1;                               \
-       SIGN_EXTEND(%o1);                               \
-       call    bpf_internal_load_pointer_neg_helper;   \
-        mov    (LEN), %o2;                             \
-       mov     %o0, r_TMP;                             \
-       cmp     %o0, 0;                                 \
-       BE_PTR(bpf_error);                              \
-        restore;
-
-bpf_slow_path_word_neg:
-       sethi   %hi(SKF_MAX_NEG_OFF), r_TMP
-       cmp     r_OFF, r_TMP
-       bl      bpf_error
-        nop
-       .globl  bpf_jit_load_word_negative_offset
-bpf_jit_load_word_negative_offset:
-       bpf_negative_common(4)
-       andcc   r_TMP, 3, %g0
-       bne     load_word_unaligned
-        nop
-       retl
-        ld     [r_TMP], r_RESULT
-
-bpf_slow_path_half_neg:
-       sethi   %hi(SKF_MAX_NEG_OFF), r_TMP
-       cmp     r_OFF, r_TMP
-       bl      bpf_error
-        nop
-       .globl  bpf_jit_load_half_negative_offset
-bpf_jit_load_half_negative_offset:
-       bpf_negative_common(2)
-       andcc   r_TMP, 1, %g0
-       bne     load_half_unaligned
-        nop
-       retl
-        lduh   [r_TMP], r_RESULT
-
-bpf_slow_path_byte_neg:
-       sethi   %hi(SKF_MAX_NEG_OFF), r_TMP
-       cmp     r_OFF, r_TMP
-       bl      bpf_error
-        nop
-       .globl  bpf_jit_load_byte_negative_offset
-bpf_jit_load_byte_negative_offset:
-       bpf_negative_common(1)
-       retl
-        ldub   [r_TMP], r_RESULT
-
-bpf_error:
-       /* Make the JIT program itself return zero. */
-       ret
-       restore %g0, %g0, %o0
index 48a25869349be70e4505e2f1d72d55cc4fab88a1..222785af550b46736676808b6e00d8d8cef9a286 100644 (file)
@@ -48,10 +48,6 @@ static void bpf_flush_icache(void *start_, void *end_)
        }
 }
 
-#define SEEN_DATAREF 1 /* might call external helpers */
-#define SEEN_XREG    2 /* ebx is used */
-#define SEEN_MEM     4 /* use mem[] for temporary storage */
-
 #define S13(X)         ((X) & 0x1fff)
 #define S5(X)          ((X) & 0x1f)
 #define IMMED          0x00002000
@@ -198,7 +194,6 @@ struct jit_ctx {
        bool                    tmp_1_used;
        bool                    tmp_2_used;
        bool                    tmp_3_used;
-       bool                    saw_ld_abs_ind;
        bool                    saw_frame_pointer;
        bool                    saw_call;
        bool                    saw_tail_call;
@@ -207,9 +202,7 @@ struct jit_ctx {
 
 #define TMP_REG_1      (MAX_BPF_JIT_REG + 0)
 #define TMP_REG_2      (MAX_BPF_JIT_REG + 1)
-#define SKB_HLEN_REG   (MAX_BPF_JIT_REG + 2)
-#define SKB_DATA_REG   (MAX_BPF_JIT_REG + 3)
-#define TMP_REG_3      (MAX_BPF_JIT_REG + 4)
+#define TMP_REG_3      (MAX_BPF_JIT_REG + 2)
 
 /* Map BPF registers to SPARC registers */
 static const int bpf2sparc[] = {
@@ -238,9 +231,6 @@ static const int bpf2sparc[] = {
        [TMP_REG_1] = G1,
        [TMP_REG_2] = G2,
        [TMP_REG_3] = G3,
-
-       [SKB_HLEN_REG] = L4,
-       [SKB_DATA_REG] = L5,
 };
 
 static void emit(const u32 insn, struct jit_ctx *ctx)
@@ -800,25 +790,6 @@ static int emit_compare_and_branch(const u8 code, const u8 dst, u8 src,
        return 0;
 }
 
-static void load_skb_regs(struct jit_ctx *ctx, u8 r_skb)
-{
-       const u8 r_headlen = bpf2sparc[SKB_HLEN_REG];
-       const u8 r_data = bpf2sparc[SKB_DATA_REG];
-       const u8 r_tmp = bpf2sparc[TMP_REG_1];
-       unsigned int off;
-
-       off = offsetof(struct sk_buff, len);
-       emit(LD32I | RS1(r_skb) | S13(off) | RD(r_headlen), ctx);
-
-       off = offsetof(struct sk_buff, data_len);
-       emit(LD32I | RS1(r_skb) | S13(off) | RD(r_tmp), ctx);
-
-       emit(SUB | RS1(r_headlen) | RS2(r_tmp) | RD(r_headlen), ctx);
-
-       off = offsetof(struct sk_buff, data);
-       emit(LDPTRI | RS1(r_skb) | S13(off) | RD(r_data), ctx);
-}
-
 /* Just skip the save instruction and the ctx register move.  */
 #define BPF_TAILCALL_PROLOGUE_SKIP     16
 #define BPF_TAILCALL_CNT_SP_OFF                (STACK_BIAS + 128)
@@ -857,9 +828,6 @@ static void build_prologue(struct jit_ctx *ctx)
 
        emit_reg_move(I0, O0, ctx);
        /* If you add anything here, adjust BPF_TAILCALL_PROLOGUE_SKIP above. */
-
-       if (ctx->saw_ld_abs_ind)
-               load_skb_regs(ctx, bpf2sparc[BPF_REG_1]);
 }
 
 static void build_epilogue(struct jit_ctx *ctx)
@@ -926,7 +894,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
        const int i = insn - ctx->prog->insnsi;
        const s16 off = insn->off;
        const s32 imm = insn->imm;
-       u32 *func;
 
        if (insn->src_reg == BPF_REG_FP)
                ctx->saw_frame_pointer = true;
@@ -1225,16 +1192,11 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
                u8 *func = ((u8 *)__bpf_call_base) + imm;
 
                ctx->saw_call = true;
-               if (ctx->saw_ld_abs_ind && bpf_helper_changes_pkt_data(func))
-                       emit_reg_move(bpf2sparc[BPF_REG_1], L7, ctx);
 
                emit_call((u32 *)func, ctx);
                emit_nop(ctx);
 
                emit_reg_move(O0, bpf2sparc[BPF_REG_0], ctx);
-
-               if (ctx->saw_ld_abs_ind && bpf_helper_changes_pkt_data(func))
-                       load_skb_regs(ctx, L7);
                break;
        }
 
@@ -1412,43 +1374,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
                emit_nop(ctx);
                break;
        }
-#define CHOOSE_LOAD_FUNC(K, func) \
-               ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)
-
-       /* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + imm)) */
-       case BPF_LD | BPF_ABS | BPF_W:
-               func = CHOOSE_LOAD_FUNC(imm, bpf_jit_load_word);
-               goto common_load;
-       case BPF_LD | BPF_ABS | BPF_H:
-               func = CHOOSE_LOAD_FUNC(imm, bpf_jit_load_half);
-               goto common_load;
-       case BPF_LD | BPF_ABS | BPF_B:
-               func = CHOOSE_LOAD_FUNC(imm, bpf_jit_load_byte);
-               goto common_load;
-       /* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + src + imm)) */
-       case BPF_LD | BPF_IND | BPF_W:
-               func = bpf_jit_load_word;
-               goto common_load;
-       case BPF_LD | BPF_IND | BPF_H:
-               func = bpf_jit_load_half;
-               goto common_load;
-
-       case BPF_LD | BPF_IND | BPF_B:
-               func = bpf_jit_load_byte;
-       common_load:
-               ctx->saw_ld_abs_ind = true;
-
-               emit_reg_move(bpf2sparc[BPF_REG_6], O0, ctx);
-               emit_loadimm(imm, O1, ctx);
-
-               if (BPF_MODE(code) == BPF_IND)
-                       emit_alu(ADD, src, O1, ctx);
-
-               emit_call(func, ctx);
-               emit_alu_K(SRA, O1, 0, ctx);
-
-               emit_reg_move(O0, bpf2sparc[BPF_REG_0], ctx);
-               break;
 
        default:
                pr_err_once("unknown opcode %02x\n", code);
@@ -1583,12 +1508,11 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
                build_epilogue(&ctx);
 
                if (bpf_jit_enable > 1)
-                       pr_info("Pass %d: shrink = %d, seen = [%c%c%c%c%c%c%c]\n", pass,
+                       pr_info("Pass %d: shrink = %d, seen = [%c%c%c%c%c%c]\n", pass,
                                image_size - (ctx.idx * 4),
                                ctx.tmp_1_used ? '1' : ' ',
                                ctx.tmp_2_used ? '2' : ' ',
                                ctx.tmp_3_used ? '3' : ' ',
-                               ctx.saw_ld_abs_ind ? 'L' : ' ',
                                ctx.saw_frame_pointer ? 'F' : ' ',
                                ctx.saw_call ? 'C' : ' ',
                                ctx.saw_tail_call ? 'T' : ' ');
index 00fcf81f2c569b813a4b48f0800fc3ac72684d33..d51a71dcbac2ada69e489fc67086a3fc2d158917 100644 (file)
@@ -52,6 +52,7 @@ config X86
        select ARCH_HAS_DEVMEM_IS_ALLOWED
        select ARCH_HAS_ELF_RANDOMIZE
        select ARCH_HAS_FAST_MULTIPLIER
+       select ARCH_HAS_FILTER_PGPROT
        select ARCH_HAS_FORTIFY_SOURCE
        select ARCH_HAS_GCOV_PROFILE_ALL
        select ARCH_HAS_KCOV                    if X86_64
@@ -137,7 +138,7 @@ config X86
        select HAVE_DMA_CONTIGUOUS
        select HAVE_DYNAMIC_FTRACE
        select HAVE_DYNAMIC_FTRACE_WITH_REGS
-       select HAVE_EBPF_JIT                    if X86_64
+       select HAVE_EBPF_JIT
        select HAVE_EFFICIENT_UNALIGNED_ACCESS
        select HAVE_EXIT_THREAD
        select HAVE_FENTRY                      if X86_64 || DYNAMIC_FTRACE
@@ -273,6 +274,9 @@ config ARCH_HAS_CPU_RELAX
 config ARCH_HAS_CACHE_LINE_SIZE
        def_bool y
 
+config ARCH_HAS_FILTER_PGPROT
+       def_bool y
+
 config HAVE_SETUP_PER_CPU_AREA
        def_bool y
 
index 47d3efff6805b9787d4be1721cd8cb790818e10f..09f36c0d9d4fe187886bd6bb49892c7de1a434c2 100644 (file)
@@ -163,7 +163,8 @@ __setup_efi_pci32(efi_pci_io_protocol_32 *pci, struct pci_setup_rom **__rom)
        if (status != EFI_SUCCESS)
                goto free_struct;
 
-       memcpy(rom->romdata, pci->romimage, pci->romsize);
+       memcpy(rom->romdata, (void *)(unsigned long)pci->romimage,
+              pci->romsize);
        return status;
 
 free_struct:
@@ -269,7 +270,8 @@ __setup_efi_pci64(efi_pci_io_protocol_64 *pci, struct pci_setup_rom **__rom)
        if (status != EFI_SUCCESS)
                goto free_struct;
 
-       memcpy(rom->romdata, pci->romimage, pci->romsize);
+       memcpy(rom->romdata, (void *)(unsigned long)pci->romimage,
+              pci->romsize);
        return status;
 
 free_struct:
index fca012baba19fff5cb9aacad170b7e2d9b5cb2d9..8169e8b7a4dc1280f40e835085b3a5e502c11466 100644 (file)
@@ -305,6 +305,25 @@ ENTRY(startup_64)
        /* Set up the stack */
        leaq    boot_stack_end(%rbx), %rsp
 
+       /*
+        * paging_prepare() and cleanup_trampoline() below can have GOT
+        * references. Adjust the table with address we are running at.
+        *
+        * Zero RAX for adjust_got: the GOT was not adjusted before;
+        * there's no adjustment to undo.
+        */
+       xorq    %rax, %rax
+
+       /*
+        * Calculate the address the binary is loaded at and use it as
+        * a GOT adjustment.
+        */
+       call    1f
+1:     popq    %rdi
+       subq    $1b, %rdi
+
+       call    adjust_got
+
        /*
         * At this point we are in long mode with 4-level paging enabled,
         * but we might want to enable 5-level paging or vice versa.
@@ -370,10 +389,14 @@ trampoline_return:
        /*
         * cleanup_trampoline() would restore trampoline memory.
         *
+        * RDI is address of the page table to use instead of page table
+        * in trampoline memory (if required).
+        *
         * RSI holds real mode data and needs to be preserved across
         * this function call.
         */
        pushq   %rsi
+       leaq    top_pgtable(%rbx), %rdi
        call    cleanup_trampoline
        popq    %rsi
 
@@ -381,6 +404,21 @@ trampoline_return:
        pushq   $0
        popfq
 
+       /*
+        * Previously we've adjusted the GOT with address the binary was
+        * loaded at. Now we need to re-adjust for relocation address.
+        *
+        * Calculate the address the binary is loaded at, so that we can
+        * undo the previous GOT adjustment.
+        */
+       call    1f
+1:     popq    %rax
+       subq    $1b, %rax
+
+       /* The new adjustment is the relocation address */
+       movq    %rbx, %rdi
+       call    adjust_got
+
 /*
  * Copy the compressed kernel to the end of our buffer
  * where decompression in place becomes safe.
@@ -481,19 +519,6 @@ relocated:
        shrq    $3, %rcx
        rep     stosq
 
-/*
- * Adjust our own GOT
- */
-       leaq    _got(%rip), %rdx
-       leaq    _egot(%rip), %rcx
-1:
-       cmpq    %rcx, %rdx
-       jae     2f
-       addq    %rbx, (%rdx)
-       addq    $8, %rdx
-       jmp     1b
-2:
-       
 /*
  * Do the extraction, and jump to the new kernel..
  */
@@ -512,6 +537,27 @@ relocated:
  */
        jmp     *%rax
 
+/*
+ * Adjust the global offset table
+ *
+ * RAX is the previous adjustment of the table to undo (use 0 if it's the
+ * first time we touch GOT).
+ * RDI is the new adjustment to apply.
+ */
+adjust_got:
+       /* Walk through the GOT adding the address to the entries */
+       leaq    _got(%rip), %rdx
+       leaq    _egot(%rip), %rcx
+1:
+       cmpq    %rcx, %rdx
+       jae     2f
+       subq    %rax, (%rdx)    /* Undo previous adjustment */
+       addq    %rdi, (%rdx)    /* Apply the new adjustment */
+       addq    $8, %rdx
+       jmp     1b
+2:
+       ret
+
        .code32
 /*
  * This is the 32-bit trampoline that will be copied over to low memory.
@@ -649,3 +695,10 @@ boot_stack_end:
        .balign 4096
 pgtable:
        .fill BOOT_PGT_SIZE, 1, 0
+
+/*
+ * The page table is going to be used instead of page table in the trampoline
+ * memory.
+ */
+top_pgtable:
+       .fill PAGE_SIZE, 1, 0
index 32af1cbcd9030f64df39554d430d9be9196a53ec..a362fa0b849c70c3da1e290ea9bf536272ca62c7 100644 (file)
@@ -22,14 +22,6 @@ struct paging_config {
 /* Buffer to preserve trampoline memory */
 static char trampoline_save[TRAMPOLINE_32BIT_SIZE];
 
-/*
- * The page table is going to be used instead of page table in the trampoline
- * memory.
- *
- * It must not be in BSS as BSS is cleared after cleanup_trampoline().
- */
-static char top_pgtable[PAGE_SIZE] __aligned(PAGE_SIZE) __section(.data);
-
 /*
  * Trampoline address will be printed by extract_kernel() for debugging
  * purposes.
@@ -134,7 +126,7 @@ struct paging_config paging_prepare(void)
        return paging_config;
 }
 
-void cleanup_trampoline(void)
+void cleanup_trampoline(void *pgtable)
 {
        void *trampoline_pgtable;
 
@@ -145,8 +137,8 @@ void cleanup_trampoline(void)
         * if it's there.
         */
        if ((void *)__native_read_cr3() == trampoline_pgtable) {
-               memcpy(top_pgtable, trampoline_pgtable, PAGE_SIZE);
-               native_write_cr3((unsigned long)top_pgtable);
+               memcpy(pgtable, trampoline_pgtable, PAGE_SIZE);
+               native_write_cr3((unsigned long)pgtable);
        }
 
        /* Restore trampoline memory */
index 9af927e59d49745757dd1bf07ad148df0db5a006..9de7f1e1dede7f6e6ebdc66e5f63756a173cdc0a 100644 (file)
@@ -84,13 +84,13 @@ ENTRY(entry_SYSENTER_compat)
        pushq   %rdx                    /* pt_regs->dx */
        pushq   %rcx                    /* pt_regs->cx */
        pushq   $-ENOSYS                /* pt_regs->ax */
-       pushq   $0                      /* pt_regs->r8  = 0 */
+       pushq   %r8                     /* pt_regs->r8 */
        xorl    %r8d, %r8d              /* nospec   r8 */
-       pushq   $0                      /* pt_regs->r9  = 0 */
+       pushq   %r9                     /* pt_regs->r9 */
        xorl    %r9d, %r9d              /* nospec   r9 */
-       pushq   $0                      /* pt_regs->r10 = 0 */
+       pushq   %r10                    /* pt_regs->r10 */
        xorl    %r10d, %r10d            /* nospec   r10 */
-       pushq   $0                      /* pt_regs->r11 = 0 */
+       pushq   %r11                    /* pt_regs->r11 */
        xorl    %r11d, %r11d            /* nospec   r11 */
        pushq   %rbx                    /* pt_regs->rbx */
        xorl    %ebx, %ebx              /* nospec   rbx */
diff --git a/arch/x86/entry/vdso/vdso32/vdso-fakesections.c b/arch/x86/entry/vdso/vdso32/vdso-fakesections.c
deleted file mode 100644 (file)
index 541468e..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#include "../vdso-fakesections.c"
index a6006e7bb729a808d5a58829bb2524db98135c2f..45b2b1c93d042231979e621970167f583cb444f5 100644 (file)
@@ -27,6 +27,7 @@
 #include <linux/cpu.h>
 #include <linux/bitops.h>
 #include <linux/device.h>
+#include <linux/nospec.h>
 
 #include <asm/apic.h>
 #include <asm/stacktrace.h>
@@ -304,17 +305,20 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event)
 
        config = attr->config;
 
-       cache_type = (config >>  0) & 0xff;
+       cache_type = (config >> 0) & 0xff;
        if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
                return -EINVAL;
+       cache_type = array_index_nospec(cache_type, PERF_COUNT_HW_CACHE_MAX);
 
        cache_op = (config >>  8) & 0xff;
        if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
                return -EINVAL;
+       cache_op = array_index_nospec(cache_op, PERF_COUNT_HW_CACHE_OP_MAX);
 
        cache_result = (config >> 16) & 0xff;
        if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
                return -EINVAL;
+       cache_result = array_index_nospec(cache_result, PERF_COUNT_HW_CACHE_RESULT_MAX);
 
        val = hw_cache_event_ids[cache_type][cache_op][cache_result];
 
@@ -421,6 +425,8 @@ int x86_setup_perfctr(struct perf_event *event)
        if (attr->config >= x86_pmu.max_events)
                return -EINVAL;
 
+       attr->config = array_index_nospec((unsigned long)attr->config, x86_pmu.max_events);
+
        /*
         * The generic map:
         */
index 607bf565a90c97c3d0d72bf979fdf88cf10c8e28..707b2a96e516b5579c9321cbf0822a627bc8a8d8 100644 (file)
@@ -3339,7 +3339,8 @@ static void intel_pmu_cpu_starting(int cpu)
 
        cpuc->lbr_sel = NULL;
 
-       flip_smm_bit(&x86_pmu.attr_freeze_on_smi);
+       if (x86_pmu.version > 1)
+               flip_smm_bit(&x86_pmu.attr_freeze_on_smi);
 
        if (!cpuc->shared_regs)
                return;
@@ -3502,6 +3503,8 @@ static __initconst const struct x86_pmu core_pmu = {
        .cpu_dying              = intel_pmu_cpu_dying,
 };
 
+static struct attribute *intel_pmu_attrs[];
+
 static __initconst const struct x86_pmu intel_pmu = {
        .name                   = "Intel",
        .handle_irq             = intel_pmu_handle_irq,
@@ -3533,6 +3536,8 @@ static __initconst const struct x86_pmu intel_pmu = {
        .format_attrs           = intel_arch3_formats_attr,
        .events_sysfs_show      = intel_event_sysfs_show,
 
+       .attrs                  = intel_pmu_attrs,
+
        .cpu_prepare            = intel_pmu_cpu_prepare,
        .cpu_starting           = intel_pmu_cpu_starting,
        .cpu_dying              = intel_pmu_cpu_dying,
@@ -3911,8 +3916,6 @@ __init int intel_pmu_init(void)
 
        x86_pmu.max_pebs_events         = min_t(unsigned, MAX_PEBS_EVENTS, x86_pmu.num_counters);
 
-
-       x86_pmu.attrs                   = intel_pmu_attrs;
        /*
         * Quirk: v2 perfmon does not report fixed-purpose events, so
         * assume at least 3 events, when not running in a hypervisor:
index 9aca448bb8e63323738147cbc1c6e95d62ecdd58..9f8084f18d58e440931f593d42cc825bbc00cfb8 100644 (file)
@@ -92,6 +92,7 @@
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/perf_event.h>
+#include <linux/nospec.h>
 #include <asm/cpu_device_id.h>
 #include <asm/intel-family.h>
 #include "../perf_event.h"
@@ -302,6 +303,7 @@ static int cstate_pmu_event_init(struct perf_event *event)
        } else if (event->pmu == &cstate_pkg_pmu) {
                if (cfg >= PERF_CSTATE_PKG_EVENT_MAX)
                        return -EINVAL;
+               cfg = array_index_nospec((unsigned long)cfg, PERF_CSTATE_PKG_EVENT_MAX);
                if (!pkg_msr[cfg].attr)
                        return -EINVAL;
                event->hw.event_base = pkg_msr[cfg].msr;
index c98b943e58b4fca0a5144a20342229b013fe1ee1..77076a102e34a46252a9758d16f6e8f4fefc4012 100644 (file)
@@ -3028,10 +3028,27 @@ static struct intel_uncore_type bdx_uncore_cbox = {
        .format_group           = &hswep_uncore_cbox_format_group,
 };
 
+static struct intel_uncore_type bdx_uncore_sbox = {
+       .name                   = "sbox",
+       .num_counters           = 4,
+       .num_boxes              = 4,
+       .perf_ctr_bits          = 48,
+       .event_ctl              = HSWEP_S0_MSR_PMON_CTL0,
+       .perf_ctr               = HSWEP_S0_MSR_PMON_CTR0,
+       .event_mask             = HSWEP_S_MSR_PMON_RAW_EVENT_MASK,
+       .box_ctl                = HSWEP_S0_MSR_PMON_BOX_CTL,
+       .msr_offset             = HSWEP_SBOX_MSR_OFFSET,
+       .ops                    = &hswep_uncore_sbox_msr_ops,
+       .format_group           = &hswep_uncore_sbox_format_group,
+};
+
+#define BDX_MSR_UNCORE_SBOX    3
+
 static struct intel_uncore_type *bdx_msr_uncores[] = {
        &bdx_uncore_ubox,
        &bdx_uncore_cbox,
        &hswep_uncore_pcu,
+       &bdx_uncore_sbox,
        NULL,
 };
 
@@ -3043,10 +3060,25 @@ static struct event_constraint bdx_uncore_pcu_constraints[] = {
 
 void bdx_uncore_cpu_init(void)
 {
+       int pkg = topology_phys_to_logical_pkg(0);
+
        if (bdx_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
                bdx_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
        uncore_msr_uncores = bdx_msr_uncores;
 
+       /* BDX-DE doesn't have SBOX */
+       if (boot_cpu_data.x86_model == 86) {
+               uncore_msr_uncores[BDX_MSR_UNCORE_SBOX] = NULL;
+       /* Detect systems with no SBOXes */
+       } else if (uncore_extra_pci_dev[pkg].dev[HSWEP_PCI_PCU_3]) {
+               struct pci_dev *pdev;
+               u32 capid4;
+
+               pdev = uncore_extra_pci_dev[pkg].dev[HSWEP_PCI_PCU_3];
+               pci_read_config_dword(pdev, 0x94, &capid4);
+               if (((capid4 >> 6) & 0x3) == 0)
+                       bdx_msr_uncores[BDX_MSR_UNCORE_SBOX] = NULL;
+       }
        hswep_uncore_pcu.constraints = bdx_uncore_pcu_constraints;
 }
 
@@ -3264,6 +3296,11 @@ static const struct pci_device_id bdx_uncore_pci_ids[] = {
                PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f46),
                .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, 2),
        },
+       { /* PCU.3 (for Capability registers) */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fc0),
+               .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
+                                                  HSWEP_PCI_PCU_3),
+       },
        { /* end: all zeroes */ }
 };
 
index e7edf19e64c27c5ecb5eacfb06d139ea28b290f4..b4771a6ddbc1b6686549ee306d3299d68a8b2e3e 100644 (file)
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <linux/perf_event.h>
+#include <linux/nospec.h>
 #include <asm/intel-family.h>
 
 enum perf_msr_id {
@@ -158,9 +159,6 @@ static int msr_event_init(struct perf_event *event)
        if (event->attr.type != event->pmu->type)
                return -ENOENT;
 
-       if (cfg >= PERF_MSR_EVENT_MAX)
-               return -EINVAL;
-
        /* unsupported modes and filters */
        if (event->attr.exclude_user   ||
            event->attr.exclude_kernel ||
@@ -171,6 +169,11 @@ static int msr_event_init(struct perf_event *event)
            event->attr.sample_period) /* no sampling */
                return -EINVAL;
 
+       if (cfg >= PERF_MSR_EVENT_MAX)
+               return -EINVAL;
+
+       cfg = array_index_nospec((unsigned long)cfg, PERF_MSR_EVENT_MAX);
+
        if (!msr[cfg].attr)
                return -EINVAL;
 
index 386a6900e206f6578e3b38ee7f085d36ac50a928..219faaec51dfa192f69d8893c8844219c0c89029 100644 (file)
 #endif
 
 #ifndef __ASSEMBLY__
-#ifndef __BPF__
 /*
  * This output constraint should be used for any inline asm which has a "call"
  * instruction.  Otherwise the asm may be inserted before the frame pointer
 register unsigned long current_stack_pointer asm(_ASM_SP);
 #define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer)
 #endif
-#endif
 
 #endif /* _ASM_X86_ASM_H */
index b27da9602a6dfdf406180214b669a712a126f701..aced6c9290d6f96cdaf4eaadab3dd3835d80b94a 100644 (file)
@@ -140,6 +140,20 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
 
 #define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit)
 
+#if defined(__clang__) && !defined(CC_HAVE_ASM_GOTO)
+
+/*
+ * Workaround for the sake of BPF compilation which utilizes kernel
+ * headers, but clang does not support ASM GOTO and fails the build.
+ */
+#ifndef __BPF_TRACING__
+#warning "Compiler lacks ASM_GOTO support. Add -D __BPF_TRACING__ to your compiler arguments"
+#endif
+
+#define static_cpu_has(bit)            boot_cpu_has(bit)
+
+#else
+
 /*
  * Static testing of CPU features.  Used the same as boot_cpu_has().
  * These will statically patch the target code for additional
@@ -195,6 +209,7 @@ static __always_inline __pure bool _static_cpu_has(u16 bit)
                boot_cpu_has(bit) :                             \
                _static_cpu_has(bit)                            \
 )
+#endif
 
 #define cpu_has_bug(c, bit)            cpu_has(c, (bit))
 #define set_cpu_bug(c, bit)            set_cpu_cap(c, (bit))
index d554c11e01ff46742d53148df0ffb9c3476e8d6e..578793e97431da25b0d5f3cbc20ae4c0655db075 100644 (file)
 #define X86_FEATURE_AVX512_VPOPCNTDQ   (16*32+14) /* POPCNT for vectors of DW/QW */
 #define X86_FEATURE_LA57               (16*32+16) /* 5-level page tables */
 #define X86_FEATURE_RDPID              (16*32+22) /* RDPID instruction */
+#define X86_FEATURE_CLDEMOTE           (16*32+25) /* CLDEMOTE instruction */
 
 /* AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 */
 #define X86_FEATURE_OVERFLOW_RECOV     (17*32+ 0) /* MCA overflow recovery support */
index 09ad8857274692b88942404a8f4e1d5a01624cd0..cc8f8fcf9b4aad1031b80a37717e3fcbb7aced98 100644 (file)
@@ -46,7 +46,21 @@ int ftrace_int3_handler(struct pt_regs *regs);
 #endif /* CONFIG_FUNCTION_TRACER */
 
 
-#if !defined(__ASSEMBLY__) && !defined(COMPILE_OFFSETS)
+#ifndef __ASSEMBLY__
+
+#define ARCH_HAS_SYSCALL_MATCH_SYM_NAME
+static inline bool arch_syscall_match_sym_name(const char *sym, const char *name)
+{
+       /*
+        * Compare the symbol name with the system call name. Skip the
+        * "__x64_sys", "__ia32_sys" or simple "sys" prefix.
+        */
+       return !strcmp(sym + 3, name + 3) ||
+               (!strncmp(sym, "__x64_", 6) && !strcmp(sym + 9, name + 3)) ||
+               (!strncmp(sym, "__ia32_", 7) && !strcmp(sym + 10, name + 3));
+}
+
+#ifndef COMPILE_OFFSETS
 
 #if defined(CONFIG_FTRACE_SYSCALLS) && defined(CONFIG_IA32_EMULATION)
 #include <asm/compat.h>
@@ -67,6 +81,7 @@ static inline bool arch_trace_is_compat_syscall(struct pt_regs *regs)
        return false;
 }
 #endif /* CONFIG_FTRACE_SYSCALLS && CONFIG_IA32_EMULATION */
-#endif /* !__ASSEMBLY__  && !COMPILE_OFFSETS */
+#endif /* !COMPILE_OFFSETS */
+#endif /* !__ASSEMBLY__ */
 
 #endif /* _ASM_X86_FTRACE_H */
index b3e32b010ab194ed613034234c403c4067502776..c2c01f84df75f1f9b35a3c898686a82973026d88 100644 (file)
@@ -208,4 +208,22 @@ static inline int insn_offset_immediate(struct insn *insn)
        return insn_offset_displacement(insn) + insn->displacement.nbytes;
 }
 
+#define POP_SS_OPCODE 0x1f
+#define MOV_SREG_OPCODE 0x8e
+
+/*
+ * Intel SDM Vol.3A 6.8.3 states;
+ * "Any single-step trap that would be delivered following the MOV to SS
+ * instruction or POP to SS instruction (because EFLAGS.TF is 1) is
+ * suppressed."
+ * This function returns true if @insn is MOV SS or POP SS. On these
+ * instructions, single stepping is suppressed.
+ */
+static inline int insn_masking_exception(struct insn *insn)
+{
+       return insn->opcode.bytes[0] == POP_SS_OPCODE ||
+               (insn->opcode.bytes[0] == MOV_SREG_OPCODE &&
+                X86_MODRM_REG(insn->modrm.bytes[0]) == 2);
+}
+
 #endif /* _ASM_X86_INSN_H */
index 404c5fdff859ded0e55325ac601ced0332910aac..548d90bbf919e8d75f9983a2cb97235637708bd8 100644 (file)
  * (0x80 is the syscall vector, 0x30-0x3f are for ISA)
  */
 #define FIRST_EXTERNAL_VECTOR          0x20
-/*
- * We start allocating at 0x21 to spread out vectors evenly between
- * priority levels. (0x80 is the syscall vector)
- */
-#define VECTOR_OFFSET_START            1
 
 /*
  * Reserve the lowest usable vector (and hence lowest priority)  0x20 for
 #define FIRST_SYSTEM_VECTOR            NR_VECTORS
 #endif
 
-#define FPU_IRQ                                  13
-
 /*
  * Size the maximum number of interrupts.
  *
index b885a961a150f079e38d4e5dbb8670ef9fde0b57..a34897aef2c295a8100716263d06153f1a1811e7 100644 (file)
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL2.0 */
+/* SPDX-License-Identifier: GPL-2.0 */
 
 /*
  * Jailhouse paravirt detection
index 57e3785d0d26e19f9134485399d1b3aac01b3a2b..cf9911b5a53cb1de017a27cf20f866e42d2199fd 100644 (file)
@@ -193,7 +193,7 @@ static inline int init_new_context(struct task_struct *tsk,
 
 #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
        if (cpu_feature_enabled(X86_FEATURE_OSPKE)) {
-               /* pkey 0 is the default and always allocated */
+               /* pkey 0 is the default and allocated implicitly */
                mm->context.pkey_allocation_map = 0x1;
                /* -1 means unallocated or invalid */
                mm->context.execute_only_pkey = -1;
index f928ad9b143fedea1085dedc508658fa745b4ceb..2f700a1db851ea3ad907a81a8b595dcba213d773 100644 (file)
@@ -291,16 +291,20 @@ do {                                                                      \
  *    lfence
  *    jmp spec_trap
  *  do_rop:
- *    mov %rax,(%rsp)
+ *    mov %rax,(%rsp) for x86_64
+ *    mov %edx,(%esp) for x86_32
  *    retq
  *
  * Without retpolines configured:
  *
- *    jmp *%rax
+ *    jmp *%rax for x86_64
+ *    jmp *%edx for x86_32
  */
 #ifdef CONFIG_RETPOLINE
-# define RETPOLINE_RAX_BPF_JIT_SIZE    17
-# define RETPOLINE_RAX_BPF_JIT()                               \
+# ifdef CONFIG_X86_64
+#  define RETPOLINE_RAX_BPF_JIT_SIZE   17
+#  define RETPOLINE_RAX_BPF_JIT()                              \
+do {                                                           \
        EMIT1_off32(0xE8, 7);    /* callq do_rop */             \
        /* spec_trap: */                                        \
        EMIT2(0xF3, 0x90);       /* pause */                    \
@@ -308,11 +312,30 @@ do {                                                                      \
        EMIT2(0xEB, 0xF9);       /* jmp spec_trap */            \
        /* do_rop: */                                           \
        EMIT4(0x48, 0x89, 0x04, 0x24); /* mov %rax,(%rsp) */    \
-       EMIT1(0xC3);             /* retq */
-#else
-# define RETPOLINE_RAX_BPF_JIT_SIZE    2
-# define RETPOLINE_RAX_BPF_JIT()                               \
-       EMIT2(0xFF, 0xE0);       /* jmp *%rax */
+       EMIT1(0xC3);             /* retq */                     \
+} while (0)
+# else /* !CONFIG_X86_64 */
+#  define RETPOLINE_EDX_BPF_JIT()                              \
+do {                                                           \
+       EMIT1_off32(0xE8, 7);    /* call do_rop */              \
+       /* spec_trap: */                                        \
+       EMIT2(0xF3, 0x90);       /* pause */                    \
+       EMIT3(0x0F, 0xAE, 0xE8); /* lfence */                   \
+       EMIT2(0xEB, 0xF9);       /* jmp spec_trap */            \
+       /* do_rop: */                                           \
+       EMIT3(0x89, 0x14, 0x24); /* mov %edx,(%esp) */          \
+       EMIT1(0xC3);             /* ret */                      \
+} while (0)
+# endif
+#else /* !CONFIG_RETPOLINE */
+# ifdef CONFIG_X86_64
+#  define RETPOLINE_RAX_BPF_JIT_SIZE   2
+#  define RETPOLINE_RAX_BPF_JIT()                              \
+       EMIT2(0xFF, 0xE0);       /* jmp *%rax */
+# else /* !CONFIG_X86_64 */
+#  define RETPOLINE_EDX_BPF_JIT()                              \
+       EMIT2(0xFF, 0xE2)        /* jmp *%edx */
+# endif
 #endif
 
 #endif /* _ASM_X86_NOSPEC_BRANCH_H_ */
index 5f49b4ff0c248493f77cc12cadee0c2b6261c25d..f1633de5a675925b7b250e5b2e9e16b2ae6dd5e5 100644 (file)
@@ -601,6 +601,11 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
 
 #define canon_pgprot(p) __pgprot(massage_pgprot(p))
 
+static inline pgprot_t arch_filter_pgprot(pgprot_t prot)
+{
+       return canon_pgprot(prot);
+}
+
 static inline int is_new_memtype_allowed(u64 paddr, unsigned long size,
                                         enum page_cache_mode pcm,
                                         enum page_cache_mode new_pcm)
index d5c21a38247570cf025b33ef2c6c203a9d3a4aef..adb47552e6bb700158788b4d42094f2d83931c86 100644 (file)
@@ -105,14 +105,14 @@ extern unsigned int ptrs_per_p4d;
 #define LDT_PGD_ENTRY          (pgtable_l5_enabled ? LDT_PGD_ENTRY_L5 : LDT_PGD_ENTRY_L4)
 #define LDT_BASE_ADDR          (LDT_PGD_ENTRY << PGDIR_SHIFT)
 
-#define __VMALLOC_BASE_L4      0xffffc90000000000
-#define __VMALLOC_BASE_L5      0xffa0000000000000
+#define __VMALLOC_BASE_L4      0xffffc90000000000UL
+#define __VMALLOC_BASE_L5      0xffa0000000000000UL
 
 #define VMALLOC_SIZE_TB_L4     32UL
 #define VMALLOC_SIZE_TB_L5     12800UL
 
-#define __VMEMMAP_BASE_L4      0xffffea0000000000
-#define __VMEMMAP_BASE_L5      0xffd4000000000000
+#define __VMEMMAP_BASE_L4      0xffffea0000000000UL
+#define __VMEMMAP_BASE_L5      0xffd4000000000000UL
 
 #ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT
 # define VMALLOC_START         vmalloc_base
index a0ba1ffda0dfd3dc9ee335f04cee58aafdea6fb8..851c04b7a0922cd3ea79f3a574246b6a94262d40 100644 (file)
@@ -2,6 +2,8 @@
 #ifndef _ASM_X86_PKEYS_H
 #define _ASM_X86_PKEYS_H
 
+#define ARCH_DEFAULT_PKEY      0
+
 #define arch_max_pkey() (boot_cpu_has(X86_FEATURE_OSPKE) ? 16 : 1)
 
 extern int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
@@ -15,7 +17,7 @@ extern int __execute_only_pkey(struct mm_struct *mm);
 static inline int execute_only_pkey(struct mm_struct *mm)
 {
        if (!boot_cpu_has(X86_FEATURE_OSPKE))
-               return 0;
+               return ARCH_DEFAULT_PKEY;
 
        return __execute_only_pkey(mm);
 }
@@ -49,13 +51,21 @@ bool mm_pkey_is_allocated(struct mm_struct *mm, int pkey)
 {
        /*
         * "Allocated" pkeys are those that have been returned
-        * from pkey_alloc().  pkey 0 is special, and never
-        * returned from pkey_alloc().
+        * from pkey_alloc() or pkey 0 which is allocated
+        * implicitly when the mm is created.
         */
-       if (pkey <= 0)
+       if (pkey < 0)
                return false;
        if (pkey >= arch_max_pkey())
                return false;
+       /*
+        * The exec-only pkey is set in the allocation map, but
+        * is not available to any of the user interfaces like
+        * mprotect_pkey().
+        */
+       if (pkey == mm->context.execute_only_pkey)
+               return false;
+
        return mm_pkey_allocation_map(mm) & (1U << pkey);
 }
 
index 4fa4206029e3e126abc671ff0af71d6660c737fc..21a114914ba435e9f56b1c6df941bca1d72b9588 100644 (file)
@@ -749,13 +749,11 @@ enum idle_boot_override {IDLE_NO_OVERRIDE=0, IDLE_HALT, IDLE_NOMWAIT,
 extern void enable_sep_cpu(void);
 extern int sysenter_setup(void);
 
-extern void early_trap_init(void);
 void early_trap_pf_init(void);
 
 /* Defined in head.S */
 extern struct desc_ptr         early_gdt_descr;
 
-extern void cpu_set_gdt(int);
 extern void switch_to_new_gdt(int);
 extern void load_direct_gdt(int);
 extern void load_fixmap_gdt(int);
index 4c851ebb3cebd4654f9dabed1e9c19520b072c0d..0ede697c396119be61eb223a40607ff6e6e985ef 100644 (file)
@@ -29,7 +29,7 @@
 #define KVM_FEATURE_PV_TLB_FLUSH       9
 #define KVM_FEATURE_ASYNC_PF_VMEXIT    10
 
-#define KVM_HINTS_DEDICATED      0
+#define KVM_HINTS_REALTIME      0
 
 /* The last 8 bits are used to indicate how to interpret the flags field
  * in pvclock structure. If no bits are set, all flags are ignored.
index 809134c644a677032e9eb2ae5d5a844806bc347e..90ab9a795b49329b5d06cdb45e15cb800a04ff63 100644 (file)
@@ -1 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __ASM_X64_MSGBUF_H
+#define __ASM_X64_MSGBUF_H
+
+#if !defined(__x86_64__) || !defined(__ILP32__)
 #include <asm-generic/msgbuf.h>
+#else
+/*
+ * The msqid64_ds structure for x86 architecture with x32 ABI.
+ *
+ * On x86-32 and x86-64 we can just use the generic definition, but
+ * x32 uses the same binary layout as x86_64, which is differnet
+ * from other 32-bit architectures.
+ */
+
+struct msqid64_ds {
+       struct ipc64_perm msg_perm;
+       __kernel_time_t msg_stime;      /* last msgsnd time */
+       __kernel_time_t msg_rtime;      /* last msgrcv time */
+       __kernel_time_t msg_ctime;      /* last change time */
+       __kernel_ulong_t msg_cbytes;    /* current number of bytes on queue */
+       __kernel_ulong_t msg_qnum;      /* number of messages in queue */
+       __kernel_ulong_t msg_qbytes;    /* max number of bytes on queue */
+       __kernel_pid_t msg_lspid;       /* pid of last msgsnd */
+       __kernel_pid_t msg_lrpid;       /* last receive pid */
+       __kernel_ulong_t __unused4;
+       __kernel_ulong_t __unused5;
+};
+
+#endif
+
+#endif /* __ASM_GENERIC_MSGBUF_H */
index 83c05fc2de385c3260286bf12fed846617c8c095..644421f3823beefb16ddc83979fbdf01c15c6a7a 100644 (file)
@@ -1 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __ASM_X86_SHMBUF_H
+#define __ASM_X86_SHMBUF_H
+
+#if !defined(__x86_64__) || !defined(__ILP32__)
 #include <asm-generic/shmbuf.h>
+#else
+/*
+ * The shmid64_ds structure for x86 architecture with x32 ABI.
+ *
+ * On x86-32 and x86-64 we can just use the generic definition, but
+ * x32 uses the same binary layout as x86_64, which is differnet
+ * from other 32-bit architectures.
+ */
+
+struct shmid64_ds {
+       struct ipc64_perm       shm_perm;       /* operation perms */
+       size_t                  shm_segsz;      /* size of segment (bytes) */
+       __kernel_time_t         shm_atime;      /* last attach time */
+       __kernel_time_t         shm_dtime;      /* last detach time */
+       __kernel_time_t         shm_ctime;      /* last change time */
+       __kernel_pid_t          shm_cpid;       /* pid of creator */
+       __kernel_pid_t          shm_lpid;       /* pid of last operator */
+       __kernel_ulong_t        shm_nattch;     /* no. of current attaches */
+       __kernel_ulong_t        __unused4;
+       __kernel_ulong_t        __unused5;
+};
+
+struct shminfo64 {
+       __kernel_ulong_t        shmmax;
+       __kernel_ulong_t        shmmin;
+       __kernel_ulong_t        shmmni;
+       __kernel_ulong_t        shmseg;
+       __kernel_ulong_t        shmall;
+       __kernel_ulong_t        __unused1;
+       __kernel_ulong_t        __unused2;
+       __kernel_ulong_t        __unused3;
+       __kernel_ulong_t        __unused4;
+};
+
+#endif
+
+#endif /* __ASM_X86_SHMBUF_H */
index dde444f932c135d11f93b22188689e3e97e8e046..3b20607d581b5340fcf5e0345b7e70f9e83d95ad 100644 (file)
@@ -215,6 +215,10 @@ acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end)
        apic_id = processor->local_apic_id;
        enabled = processor->lapic_flags & ACPI_MADT_ENABLED;
 
+       /* Ignore invalid ID */
+       if (apic_id == 0xffffffff)
+               return 0;
+
        /*
         * We need to register disabled CPU as well to permit
         * counting disabled CPUs. This allows us to size
index c88e0b127810f22b15b53eb150d11e9584201885..b481b95bd8f6b9e439c5d72e42af21b18d250af4 100644 (file)
 #include <asm/amd_nb.h>
 
 #define PCI_DEVICE_ID_AMD_17H_ROOT     0x1450
+#define PCI_DEVICE_ID_AMD_17H_M10H_ROOT        0x15d0
 #define PCI_DEVICE_ID_AMD_17H_DF_F3    0x1463
 #define PCI_DEVICE_ID_AMD_17H_DF_F4    0x1464
+#define PCI_DEVICE_ID_AMD_17H_M10H_DF_F3 0x15eb
+#define PCI_DEVICE_ID_AMD_17H_M10H_DF_F4 0x15ec
 
 /* Protect the PCI config register pairs used for SMN and DF indirect access. */
 static DEFINE_MUTEX(smn_mutex);
@@ -24,6 +27,7 @@ static u32 *flush_words;
 
 static const struct pci_device_id amd_root_ids[] = {
        { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_ROOT) },
+       { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_ROOT) },
        {}
 };
 
@@ -39,6 +43,7 @@ const struct pci_device_id amd_nb_misc_ids[] = {
        { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3) },
        { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F3) },
        { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_DF_F3) },
+       { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_DF_F3) },
        { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F3) },
        {}
 };
@@ -51,6 +56,7 @@ static const struct pci_device_id amd_nb_link_ids[] = {
        { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F4) },
        { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F4) },
        { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_DF_F4) },
+       { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_DF_F4) },
        { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F4) },
        {}
 };
index 8b04234e010b2616e42bff6cbeabdc3c2087c13c..7685444a106bb29a3994a5d85066e60b2b4c0d09 100644 (file)
@@ -116,6 +116,7 @@ static void init_x2apic_ldr(void)
                        goto update;
        }
        cmsk = cluster_hotplug_mask;
+       cmsk->clusterid = cluster;
        cluster_hotplug_mask = NULL;
 update:
        this_cpu_write(cluster_masks, cmsk);
index 8a5b185735e1c5a49a1eafd19b74316d291efb7a..ce243f7d2d4e0879c9cbff297ff112e236a6802c 100644 (file)
@@ -848,6 +848,11 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
                c->x86_power = edx;
        }
 
+       if (c->extended_cpuid_level >= 0x80000008) {
+               cpuid(0x80000008, &eax, &ebx, &ecx, &edx);
+               c->x86_capability[CPUID_8000_0008_EBX] = ebx;
+       }
+
        if (c->extended_cpuid_level >= 0x8000000a)
                c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a);
 
@@ -871,7 +876,6 @@ static void get_cpu_address_sizes(struct cpuinfo_x86 *c)
 
                c->x86_virt_bits = (eax >> 8) & 0xff;
                c->x86_phys_bits = eax & 0xff;
-               c->x86_capability[CPUID_8000_0008_EBX] = ebx;
        }
 #ifdef CONFIG_X86_32
        else if (cpu_has(c, X86_FEATURE_PAE) || cpu_has(c, X86_FEATURE_PSE36))
index b9693b80fc219c74b29800d226777de3e6f3130e..60d1897041da89c19b97f03ca2c577102ec3186c 100644 (file)
@@ -835,6 +835,9 @@ static const struct _tlb_table intel_tlb_table[] = {
        { 0x5d, TLB_DATA_4K_4M,         256,    " TLB_DATA 4 KByte and 4 MByte pages" },
        { 0x61, TLB_INST_4K,            48,     " TLB_INST 4 KByte pages, full associative" },
        { 0x63, TLB_DATA_1G,            4,      " TLB_DATA 1 GByte pages, 4-way set associative" },
+       { 0x6b, TLB_DATA_4K,            256,    " TLB_DATA 4 KByte pages, 8-way associative" },
+       { 0x6c, TLB_DATA_2M_4M,         128,    " TLB_DATA 2 MByte or 4 MByte pages, 8-way associative" },
+       { 0x6d, TLB_DATA_1G,            16,     " TLB_DATA 1 GByte pages, fully associative" },
        { 0x76, TLB_INST_2M_4M,         8,      " TLB_INST 2-MByte or 4-MByte pages, fully associative" },
        { 0xb0, TLB_INST_4K,            128,    " TLB_INST 4 KByte pages, 4-way set associative" },
        { 0xb1, TLB_INST_2M_4M,         4,      " TLB_INST 2M pages, 4-way, 8 entries or 4M pages, 4-way entries" },
index f7666eef4a879b8e4b4da6a33ad941bc371babd4..c8e038800591674021c84bc66cae2fe4f02fcfc9 100644 (file)
@@ -94,6 +94,11 @@ static struct smca_bank_name smca_names[] = {
        [SMCA_SMU]      = { "smu",              "System Management Unit" },
 };
 
+static u32 smca_bank_addrs[MAX_NR_BANKS][NR_BLOCKS] __ro_after_init =
+{
+       [0 ... MAX_NR_BANKS - 1] = { [0 ... NR_BLOCKS - 1] = -1 }
+};
+
 const char *smca_get_name(enum smca_bank_types t)
 {
        if (t >= N_SMCA_BANK_TYPES)
@@ -443,20 +448,26 @@ static u32 smca_get_block_address(unsigned int cpu, unsigned int bank,
        if (!block)
                return MSR_AMD64_SMCA_MCx_MISC(bank);
 
+       /* Check our cache first: */
+       if (smca_bank_addrs[bank][block] != -1)
+               return smca_bank_addrs[bank][block];
+
        /*
         * For SMCA enabled processors, BLKPTR field of the first MISC register
         * (MCx_MISC0) indicates presence of additional MISC regs set (MISC1-4).
         */
        if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high))
-               return addr;
+               goto out;
 
        if (!(low & MCI_CONFIG_MCAX))
-               return addr;
+               goto out;
 
        if (!rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high) &&
            (low & MASK_BLKPTR_LO))
-               return MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
+               addr = MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
 
+out:
+       smca_bank_addrs[bank][block] = addr;
        return addr;
 }
 
@@ -468,18 +479,6 @@ static u32 get_block_address(unsigned int cpu, u32 current_addr, u32 low, u32 hi
        if ((bank >= mca_cfg.banks) || (block >= NR_BLOCKS))
                return addr;
 
-       /* Get address from already initialized block. */
-       if (per_cpu(threshold_banks, cpu)) {
-               struct threshold_bank *bankp = per_cpu(threshold_banks, cpu)[bank];
-
-               if (bankp && bankp->blocks) {
-                       struct threshold_block *blockp = &bankp->blocks[block];
-
-                       if (blockp)
-                               return blockp->address;
-               }
-       }
-
        if (mce_flags.smca)
                return smca_get_block_address(cpu, bank, block);
 
index 10c4fc2c91f8ed1dd6879c8b0b8aa24d7275cd3b..77e201301528817e32537194cf5e075b29aa9686 100644 (file)
@@ -564,14 +564,12 @@ static int __reload_late(void *info)
        apply_microcode_local(&err);
        spin_unlock(&update_lock);
 
+       /* siblings return UCODE_OK because their engine got updated already */
        if (err > UCODE_NFOUND) {
                pr_warn("Error reloading microcode on CPU %d\n", cpu);
-               return -1;
-       /* siblings return UCODE_OK because their engine got updated already */
+               ret = -1;
        } else if (err == UCODE_UPDATED || err == UCODE_OK) {
                ret = 1;
-       } else {
-               return ret;
        }
 
        /*
index 32b8e5724f966abbc67153065dd17b5ddcfd6d70..1c2cfa0644aa979c97cc01a42925a44c25f9f852 100644 (file)
@@ -485,7 +485,6 @@ static void show_saved_mc(void)
  */
 static void save_mc_for_early(u8 *mc, unsigned int size)
 {
-#ifdef CONFIG_HOTPLUG_CPU
        /* Synchronization during CPU hotplug. */
        static DEFINE_MUTEX(x86_cpu_microcode_mutex);
 
@@ -495,7 +494,6 @@ static void save_mc_for_early(u8 *mc, unsigned int size)
        show_saved_mc();
 
        mutex_unlock(&x86_cpu_microcode_mutex);
-#endif
 }
 
 static bool load_builtin_intel_microcode(struct cpio_data *cp)
index 0c408f8c4ed465722ed2411a25e451a439f4fcfa..2d29e47c056ea17d7c24103f41187b494a611da7 100644 (file)
@@ -104,6 +104,12 @@ static bool __head check_la57_support(unsigned long physaddr)
 }
 #endif
 
+/* Code in __startup_64() can be relocated during execution, but the compiler
+ * doesn't have to generate PC-relative relocations when accessing globals from
+ * that function. Clang actually does not generate them, which leads to
+ * boot-time crashes. To work around this problem, every global pointer must
+ * be adjusted using fixup_pointer().
+ */
 unsigned long __head __startup_64(unsigned long physaddr,
                                  struct boot_params *bp)
 {
@@ -113,6 +119,7 @@ unsigned long __head __startup_64(unsigned long physaddr,
        p4dval_t *p4d;
        pudval_t *pud;
        pmdval_t *pmd, pmd_entry;
+       pteval_t *mask_ptr;
        bool la57;
        int i;
        unsigned int *next_pgt_ptr;
@@ -196,7 +203,8 @@ unsigned long __head __startup_64(unsigned long physaddr,
 
        pmd_entry = __PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL;
        /* Filter out unsupported __PAGE_KERNEL_* bits: */
-       pmd_entry &= __supported_pte_mask;
+       mask_ptr = fixup_pointer(&__supported_pte_mask, physaddr);
+       pmd_entry &= *mask_ptr;
        pmd_entry += sme_get_me_mask();
        pmd_entry +=  physaddr;
 
index fa183a131edc7b493dd86f47cf637ba42f15d274..a15fe0e92cf994be8a0d84284e6ee458c0f91e5a 100644 (file)
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL2.0
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Jailhouse paravirt_ops implementation
  *
index 3182908b7e6c70c621f8e5f8446cb0cfbc850de9..7326078eaa7a6518d30cb98d6443d507299c70c0 100644 (file)
@@ -398,11 +398,10 @@ static void *bzImage64_load(struct kimage *image, char *kernel,
         * little bit simple
         */
        efi_map_sz = efi_get_runtime_map_size();
-       efi_map_sz = ALIGN(efi_map_sz, 16);
        params_cmdline_sz = sizeof(struct boot_params) + cmdline_len +
                                MAX_ELFCOREHDR_STR_LEN;
        params_cmdline_sz = ALIGN(params_cmdline_sz, 16);
-       kbuf.bufsz = params_cmdline_sz + efi_map_sz +
+       kbuf.bufsz = params_cmdline_sz + ALIGN(efi_map_sz, 16) +
                                sizeof(struct setup_data) +
                                sizeof(struct efi_setup_data);
 
@@ -410,7 +409,7 @@ static void *bzImage64_load(struct kimage *image, char *kernel,
        if (!params)
                return ERR_PTR(-ENOMEM);
        efi_map_offset = params_cmdline_sz;
-       efi_setup_data_offset = efi_map_offset + efi_map_sz;
+       efi_setup_data_offset = efi_map_offset + ALIGN(efi_map_sz, 16);
 
        /* Copy setup header onto bootparams. Documentation/x86/boot.txt */
        setup_header_size = 0x0202 + kernel[0x0201] - setup_hdr_offset;
index 0715f827607c4a2742e140f8d9a656ed4514d226..6f4d42377fe520c52a1c67ea0f25b27fc3eae2e1 100644 (file)
@@ -370,6 +370,10 @@ int __copy_instruction(u8 *dest, u8 *src, u8 *real, struct insn *insn)
        if (insn->opcode.bytes[0] == BREAKPOINT_INSTRUCTION)
                return 0;
 
+       /* We should not singlestep on the exception masking instructions */
+       if (insn_masking_exception(insn))
+               return 0;
+
 #ifdef CONFIG_X86_64
        /* Only x86_64 has RIP relative instructions */
        if (insn_rip_relative(insn)) {
index 7867417cfaff2b59ee2c2531072a632a9e810f41..5b2300b818af9333f8d57f6b082f426b8556b606 100644 (file)
@@ -457,7 +457,7 @@ static void __init sev_map_percpu_data(void)
 static void __init kvm_smp_prepare_cpus(unsigned int max_cpus)
 {
        native_smp_prepare_cpus(max_cpus);
-       if (kvm_para_has_hint(KVM_HINTS_DEDICATED))
+       if (kvm_para_has_hint(KVM_HINTS_REALTIME))
                static_branch_disable(&virt_spin_lock_key);
 }
 
@@ -553,7 +553,7 @@ static void __init kvm_guest_init(void)
        }
 
        if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) &&
-           !kvm_para_has_hint(KVM_HINTS_DEDICATED) &&
+           !kvm_para_has_hint(KVM_HINTS_REALTIME) &&
            kvm_para_has_feature(KVM_FEATURE_STEAL_TIME))
                pv_mmu_ops.flush_tlb_others = kvm_flush_tlb_others;
 
@@ -649,7 +649,7 @@ static __init int kvm_setup_pv_tlb_flush(void)
        int cpu;
 
        if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) &&
-           !kvm_para_has_hint(KVM_HINTS_DEDICATED) &&
+           !kvm_para_has_hint(KVM_HINTS_REALTIME) &&
            kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) {
                for_each_possible_cpu(cpu) {
                        zalloc_cpumask_var_node(per_cpu_ptr(&__pv_tlb_mask, cpu),
@@ -745,7 +745,7 @@ void __init kvm_spinlock_init(void)
        if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT))
                return;
 
-       if (kvm_para_has_hint(KVM_HINTS_DEDICATED))
+       if (kvm_para_has_hint(KVM_HINTS_REALTIME))
                return;
 
        __pv_init_lock_hash();
index 60cdec6628b0d33a9ef87abb6a575a17f2263718..d1ab07ec8c9aca2090f42153efd1b6ad93d68ffe 100644 (file)
@@ -57,12 +57,17 @@ static void load_segments(void)
 static void machine_kexec_free_page_tables(struct kimage *image)
 {
        free_page((unsigned long)image->arch.pgd);
+       image->arch.pgd = NULL;
 #ifdef CONFIG_X86_PAE
        free_page((unsigned long)image->arch.pmd0);
+       image->arch.pmd0 = NULL;
        free_page((unsigned long)image->arch.pmd1);
+       image->arch.pmd1 = NULL;
 #endif
        free_page((unsigned long)image->arch.pte0);
+       image->arch.pte0 = NULL;
        free_page((unsigned long)image->arch.pte1);
+       image->arch.pte1 = NULL;
 }
 
 static int machine_kexec_alloc_page_tables(struct kimage *image)
@@ -79,7 +84,6 @@ static int machine_kexec_alloc_page_tables(struct kimage *image)
            !image->arch.pmd0 || !image->arch.pmd1 ||
 #endif
            !image->arch.pte0 || !image->arch.pte1) {
-               machine_kexec_free_page_tables(image);
                return -ENOMEM;
        }
        return 0;
index a5e55d832d0a4d25309766057ee86ee9f1d282da..6010449ca6d2951197c533e35c50a36a2d27f99b 100644 (file)
@@ -39,9 +39,13 @@ const struct kexec_file_ops * const kexec_file_loaders[] = {
 static void free_transition_pgtable(struct kimage *image)
 {
        free_page((unsigned long)image->arch.p4d);
+       image->arch.p4d = NULL;
        free_page((unsigned long)image->arch.pud);
+       image->arch.pud = NULL;
        free_page((unsigned long)image->arch.pmd);
+       image->arch.pmd = NULL;
        free_page((unsigned long)image->arch.pte);
+       image->arch.pte = NULL;
 }
 
 static int init_transition_pgtable(struct kimage *image, pgd_t *pgd)
@@ -91,7 +95,6 @@ static int init_transition_pgtable(struct kimage *image, pgd_t *pgd)
        set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC_NOENC));
        return 0;
 err:
-       free_transition_pgtable(image);
        return result;
 }
 
diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c
deleted file mode 100644 (file)
index ac7ea3a..0000000
+++ /dev/null
@@ -1,90 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* Fallback functions when the main IOMMU code is not compiled in. This
-   code is roughly equivalent to i386. */
-#include <linux/dma-direct.h>
-#include <linux/scatterlist.h>
-#include <linux/string.h>
-#include <linux/gfp.h>
-#include <linux/pci.h>
-#include <linux/mm.h>
-
-#include <asm/processor.h>
-#include <asm/iommu.h>
-#include <asm/dma.h>
-
-#define NOMMU_MAPPING_ERROR            0
-
-static int
-check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size)
-{
-       if (hwdev && !dma_capable(hwdev, bus, size)) {
-               if (*hwdev->dma_mask >= DMA_BIT_MASK(32))
-                       printk(KERN_ERR
-                           "nommu_%s: overflow %Lx+%zu of device mask %Lx\n",
-                               name, (long long)bus, size,
-                               (long long)*hwdev->dma_mask);
-               return 0;
-       }
-       return 1;
-}
-
-static dma_addr_t nommu_map_page(struct device *dev, struct page *page,
-                                unsigned long offset, size_t size,
-                                enum dma_data_direction dir,
-                                unsigned long attrs)
-{
-       dma_addr_t bus = phys_to_dma(dev, page_to_phys(page)) + offset;
-       WARN_ON(size == 0);
-       if (!check_addr("map_single", dev, bus, size))
-               return NOMMU_MAPPING_ERROR;
-       return bus;
-}
-
-/* Map a set of buffers described by scatterlist in streaming
- * mode for DMA.  This is the scatter-gather version of the
- * above pci_map_single interface.  Here the scatter gather list
- * elements are each tagged with the appropriate dma address
- * and length.  They are obtained via sg_dma_{address,length}(SG).
- *
- * NOTE: An implementation may be able to use a smaller number of
- *       DMA address/length pairs than there are SG table elements.
- *       (for example via virtual mapping capabilities)
- *       The routine returns the number of addr/length pairs actually
- *       used, at most nents.
- *
- * Device ownership issues as mentioned above for pci_map_single are
- * the same here.
- */
-static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg,
-                       int nents, enum dma_data_direction dir,
-                       unsigned long attrs)
-{
-       struct scatterlist *s;
-       int i;
-
-       WARN_ON(nents == 0 || sg[0].length == 0);
-
-       for_each_sg(sg, s, nents, i) {
-               BUG_ON(!sg_page(s));
-               s->dma_address = sg_phys(s);
-               if (!check_addr("map_sg", hwdev, s->dma_address, s->length))
-                       return 0;
-               s->dma_length = s->length;
-       }
-       return nents;
-}
-
-static int nommu_mapping_error(struct device *dev, dma_addr_t dma_addr)
-{
-       return dma_addr == NOMMU_MAPPING_ERROR;
-}
-
-const struct dma_map_ops nommu_dma_ops = {
-       .alloc                  = dma_generic_alloc_coherent,
-       .free                   = dma_generic_free_coherent,
-       .map_sg                 = nommu_map_sg,
-       .map_page               = nommu_map_page,
-       .is_phys                = 1,
-       .mapping_error          = nommu_mapping_error,
-       .dma_supported          = x86_dma_supported,
-};
index 4b100fe0f5087f3f1a2364c9ffd48e5c1327ab6d..12bb445fb98d6618013be3b78a07aee02ac4d01a 100644 (file)
@@ -542,6 +542,7 @@ void set_personality_64bit(void)
        clear_thread_flag(TIF_X32);
        /* Pretend that this comes from a 64bit execve */
        task_pt_regs(current)->orig_ax = __NR_execve;
+       current_thread_info()->status &= ~TS_COMPAT;
 
        /* Ensure the corresponding mm is not marked. */
        if (current->mm)
index 6285697b6e565c61611787f0a07274c2f3308630..5c623dfe39d159822f12602e2f1f7ae4d9024f6d 100644 (file)
@@ -50,6 +50,7 @@
 #include <linux/init_ohci1394_dma.h>
 #include <linux/kvm_para.h>
 #include <linux/dma-contiguous.h>
+#include <xen/xen.h>
 
 #include <linux/errno.h>
 #include <linux/kernel.h>
@@ -534,6 +535,11 @@ static void __init reserve_crashkernel(void)
                high = true;
        }
 
+       if (xen_pv_domain()) {
+               pr_info("Ignoring crashkernel for a Xen PV domain\n");
+               return;
+       }
+
        /* 0 means: find the address automatically */
        if (crash_base <= 0) {
                /*
index ff99e2b6fc541a0faf8afaa17328679533b7838a..0f1cbb042f49b82e7a01b7ace0bce08061a8e6f1 100644 (file)
@@ -77,6 +77,8 @@
 #include <asm/i8259.h>
 #include <asm/misc.h>
 #include <asm/qspinlock.h>
+#include <asm/intel-family.h>
+#include <asm/cpu_device_id.h>
 
 /* Number of siblings per CPU package */
 int smp_num_siblings = 1;
@@ -390,15 +392,47 @@ static bool match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
        return false;
 }
 
+/*
+ * Define snc_cpu[] for SNC (Sub-NUMA Cluster) CPUs.
+ *
+ * These are Intel CPUs that enumerate an LLC that is shared by
+ * multiple NUMA nodes. The LLC on these systems is shared for
+ * off-package data access but private to the NUMA node (half
+ * of the package) for on-package access.
+ *
+ * CPUID (the source of the information about the LLC) can only
+ * enumerate the cache as being shared *or* unshared, but not
+ * this particular configuration. The CPU in this case enumerates
+ * the cache to be shared across the entire package (spanning both
+ * NUMA nodes).
+ */
+
+static const struct x86_cpu_id snc_cpu[] = {
+       { X86_VENDOR_INTEL, 6, INTEL_FAM6_SKYLAKE_X },
+       {}
+};
+
 static bool match_llc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
 {
        int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
 
-       if (per_cpu(cpu_llc_id, cpu1) != BAD_APICID &&
-           per_cpu(cpu_llc_id, cpu1) == per_cpu(cpu_llc_id, cpu2))
-               return topology_sane(c, o, "llc");
+       /* Do not match if we do not have a valid APICID for cpu: */
+       if (per_cpu(cpu_llc_id, cpu1) == BAD_APICID)
+               return false;
 
-       return false;
+       /* Do not match if LLC id does not match: */
+       if (per_cpu(cpu_llc_id, cpu1) != per_cpu(cpu_llc_id, cpu2))
+               return false;
+
+       /*
+        * Allow the SNC topology without warning. Return of false
+        * means 'c' does not share the LLC of 'o'. This will be
+        * reflected to userspace.
+        */
+       if (!topology_same_node(c, o) && x86_match_cpu(snc_cpu))
+               return false;
+
+       return topology_sane(c, o, "llc");
 }
 
 /*
@@ -456,7 +490,8 @@ static struct sched_domain_topology_level x86_topology[] = {
 
 /*
  * Set if a package/die has multiple NUMA nodes inside.
- * AMD Magny-Cours and Intel Cluster-on-Die have this.
+ * AMD Magny-Cours, Intel Cluster-on-Die, and Intel
+ * Sub-NUMA Clustering have this.
  */
 static bool x86_has_numa_in_package;
 
@@ -1536,6 +1571,8 @@ static inline void mwait_play_dead(void)
        void *mwait_ptr;
        int i;
 
+       if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+               return;
        if (!this_cpu_has(X86_FEATURE_MWAIT))
                return;
        if (!this_cpu_has(X86_FEATURE_CLFLUSH))
index ef32297ff17e5bdc6a852bb38f88546f90e0af22..74392d9d51e0a799a7f4fcb974a55e7989ed400a 100644 (file)
@@ -317,7 +317,7 @@ static unsigned long calc_hpet_ref(u64 deltatsc, u64 hpet1, u64 hpet2)
        hpet2 -= hpet1;
        tmp = ((u64)hpet2 * hpet_readl(HPET_PERIOD));
        do_div(tmp, 1000000);
-       do_div(deltatsc, tmp);
+       deltatsc = div64_u64(deltatsc, tmp);
 
        return (unsigned long) deltatsc;
 }
@@ -1067,6 +1067,7 @@ static struct clocksource clocksource_tsc_early = {
        .resume                 = tsc_resume,
        .mark_unstable          = tsc_cs_mark_unstable,
        .tick_stable            = tsc_cs_tick_stable,
+       .list                   = LIST_HEAD_INIT(clocksource_tsc_early.list),
 };
 
 /*
@@ -1086,6 +1087,7 @@ static struct clocksource clocksource_tsc = {
        .resume                 = tsc_resume,
        .mark_unstable          = tsc_cs_mark_unstable,
        .tick_stable            = tsc_cs_tick_stable,
+       .list                   = LIST_HEAD_INIT(clocksource_tsc.list),
 };
 
 void mark_tsc_unstable(char *reason)
@@ -1098,13 +1100,9 @@ void mark_tsc_unstable(char *reason)
                clear_sched_clock_stable();
        disable_sched_clock_irqtime();
        pr_info("Marking TSC unstable due to %s\n", reason);
-       /* Change only the rating, when not registered */
-       if (clocksource_tsc.mult) {
-               clocksource_mark_unstable(&clocksource_tsc);
-       } else {
-               clocksource_tsc.flags |= CLOCK_SOURCE_UNSTABLE;
-               clocksource_tsc.rating = 0;
-       }
+
+       clocksource_mark_unstable(&clocksource_tsc_early);
+       clocksource_mark_unstable(&clocksource_tsc);
 }
 
 EXPORT_SYMBOL_GPL(mark_tsc_unstable);
@@ -1244,7 +1242,7 @@ static void tsc_refine_calibration_work(struct work_struct *work)
 
        /* Don't bother refining TSC on unstable systems */
        if (tsc_unstable)
-               return;
+               goto unreg;
 
        /*
         * Since the work is started early in boot, we may be
@@ -1297,11 +1295,12 @@ static void tsc_refine_calibration_work(struct work_struct *work)
 
 out:
        if (tsc_unstable)
-               return;
+               goto unreg;
 
        if (boot_cpu_has(X86_FEATURE_ART))
                art_related_clocksource = &clocksource_tsc;
        clocksource_register_khz(&clocksource_tsc, tsc_khz);
+unreg:
        clocksource_unregister(&clocksource_tsc_early);
 }
 
@@ -1311,8 +1310,8 @@ static int __init init_tsc_clocksource(void)
        if (!boot_cpu_has(X86_FEATURE_TSC) || tsc_disabled > 0 || !tsc_khz)
                return 0;
 
-       if (check_tsc_unstable())
-               return 0;
+       if (tsc_unstable)
+               goto unreg;
 
        if (tsc_clocksource_reliable)
                clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
@@ -1328,6 +1327,7 @@ static int __init init_tsc_clocksource(void)
                if (boot_cpu_has(X86_FEATURE_ART))
                        art_related_clocksource = &clocksource_tsc;
                clocksource_register_khz(&clocksource_tsc, tsc_khz);
+unreg:
                clocksource_unregister(&clocksource_tsc_early);
                return 0;
        }
index 85c7ef23d99f7f9b7373e54a6af423aab38ed445..c84bb539695828328f728ffc42aecf0a23f0c06a 100644 (file)
@@ -299,6 +299,10 @@ static int uprobe_init_insn(struct arch_uprobe *auprobe, struct insn *insn, bool
        if (is_prefix_bad(insn))
                return -ENOTSUPP;
 
+       /* We should not singlestep on the exception masking instructions */
+       if (insn_masking_exception(insn))
+               return -ENOTSUPP;
+
        if (x86_64)
                good_insns = good_insns_64;
        else
index 98618e397342297cdff01a94975147ee543419c0..5708e951a5c69664895a7b9a21ffe3c1bdc669bb 100644 (file)
@@ -1265,7 +1265,7 @@ static int kvm_hv_hypercall_complete_userspace(struct kvm_vcpu *vcpu)
        struct kvm_run *run = vcpu->run;
 
        kvm_hv_hypercall_set_result(vcpu, run->hyperv.u.hcall.result);
-       return 1;
+       return kvm_skip_emulated_instruction(vcpu);
 }
 
 static u16 kvm_hvcall_signal_event(struct kvm_vcpu *vcpu, bool fast, u64 param)
@@ -1296,8 +1296,10 @@ static u16 kvm_hvcall_signal_event(struct kvm_vcpu *vcpu, bool fast, u64 param)
        if (param & ~KVM_HYPERV_CONN_ID_MASK)
                return HV_STATUS_INVALID_HYPERCALL_INPUT;
 
-       /* conn_to_evt is protected by vcpu->kvm->srcu */
+       /* the eventfd is protected by vcpu->kvm->srcu, but conn_to_evt isn't */
+       rcu_read_lock();
        eventfd = idr_find(&vcpu->kvm->arch.hyperv.conn_to_evt, param);
+       rcu_read_unlock();
        if (!eventfd)
                return HV_STATUS_INVALID_PORT_ID;
 
index 70dcb554802289910382762f44e7a20b07d3317a..b74c9c1405b9978331653daa9093e95c36be0252 100644 (file)
@@ -1463,23 +1463,6 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic)
        local_irq_restore(flags);
 }
 
-static void start_sw_period(struct kvm_lapic *apic)
-{
-       if (!apic->lapic_timer.period)
-               return;
-
-       if (apic_lvtt_oneshot(apic) &&
-           ktime_after(ktime_get(),
-                       apic->lapic_timer.target_expiration)) {
-               apic_timer_expired(apic);
-               return;
-       }
-
-       hrtimer_start(&apic->lapic_timer.timer,
-               apic->lapic_timer.target_expiration,
-               HRTIMER_MODE_ABS_PINNED);
-}
-
 static void update_target_expiration(struct kvm_lapic *apic, uint32_t old_divisor)
 {
        ktime_t now, remaining;
@@ -1546,6 +1529,26 @@ static void advance_periodic_target_expiration(struct kvm_lapic *apic)
                                apic->lapic_timer.period);
 }
 
+static void start_sw_period(struct kvm_lapic *apic)
+{
+       if (!apic->lapic_timer.period)
+               return;
+
+       if (ktime_after(ktime_get(),
+                       apic->lapic_timer.target_expiration)) {
+               apic_timer_expired(apic);
+
+               if (apic_lvtt_oneshot(apic))
+                       return;
+
+               advance_periodic_target_expiration(apic);
+       }
+
+       hrtimer_start(&apic->lapic_timer.timer,
+               apic->lapic_timer.target_expiration,
+               HRTIMER_MODE_ABS_PINNED);
+}
+
 bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu)
 {
        if (!lapic_in_kernel(vcpu))
index aa66ccd6ed6cc573b42a2584635f7f525dbe0713..3f1696570b41475848ae7684565da935888e6363 100644 (file)
@@ -1494,6 +1494,12 @@ static inline bool cpu_has_vmx_vmfunc(void)
                SECONDARY_EXEC_ENABLE_VMFUNC;
 }
 
+static bool vmx_umip_emulated(void)
+{
+       return vmcs_config.cpu_based_2nd_exec_ctrl &
+               SECONDARY_EXEC_DESC;
+}
+
 static inline bool report_flexpriority(void)
 {
        return flexpriority_enabled;
@@ -4544,12 +4550,6 @@ static void vmx_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)
        __vmx_flush_tlb(vcpu, to_vmx(vcpu)->vpid, invalidate_gpa);
 }
 
-static void vmx_flush_tlb_ept_only(struct kvm_vcpu *vcpu)
-{
-       if (enable_ept)
-               vmx_flush_tlb(vcpu, true);
-}
-
 static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
 {
        ulong cr0_guest_owned_bits = vcpu->arch.cr0_guest_owned_bits;
@@ -4767,14 +4767,16 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
        else
                hw_cr4 |= KVM_PMODE_VM_CR4_ALWAYS_ON;
 
-       if ((cr4 & X86_CR4_UMIP) && !boot_cpu_has(X86_FEATURE_UMIP)) {
-               vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
-                             SECONDARY_EXEC_DESC);
-               hw_cr4 &= ~X86_CR4_UMIP;
-       } else if (!is_guest_mode(vcpu) ||
-                  !nested_cpu_has2(get_vmcs12(vcpu), SECONDARY_EXEC_DESC))
-               vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL,
+       if (!boot_cpu_has(X86_FEATURE_UMIP) && vmx_umip_emulated()) {
+               if (cr4 & X86_CR4_UMIP) {
+                       vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
                                SECONDARY_EXEC_DESC);
+                       hw_cr4 &= ~X86_CR4_UMIP;
+               } else if (!is_guest_mode(vcpu) ||
+                       !nested_cpu_has2(get_vmcs12(vcpu), SECONDARY_EXEC_DESC))
+                       vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL,
+                                       SECONDARY_EXEC_DESC);
+       }
 
        if (cr4 & X86_CR4_VMXE) {
                /*
@@ -9278,7 +9280,7 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
        } else {
                sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
                sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
-               vmx_flush_tlb_ept_only(vcpu);
+               vmx_flush_tlb(vcpu, true);
        }
        vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control);
 
@@ -9306,7 +9308,7 @@ static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa)
            !nested_cpu_has2(get_vmcs12(&vmx->vcpu),
                             SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
                vmcs_write64(APIC_ACCESS_ADDR, hpa);
-               vmx_flush_tlb_ept_only(vcpu);
+               vmx_flush_tlb(vcpu, true);
        }
 }
 
@@ -9503,12 +9505,6 @@ static bool vmx_xsaves_supported(void)
                SECONDARY_EXEC_XSAVES;
 }
 
-static bool vmx_umip_emulated(void)
-{
-       return vmcs_config.cpu_based_2nd_exec_ctrl &
-               SECONDARY_EXEC_DESC;
-}
-
 static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
 {
        u32 exit_intr_info;
@@ -11220,7 +11216,7 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
                }
        } else if (nested_cpu_has2(vmcs12,
                                   SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
-               vmx_flush_tlb_ept_only(vcpu);
+               vmx_flush_tlb(vcpu, true);
        }
 
        /*
@@ -12073,7 +12069,7 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
        } else if (!nested_cpu_has_ept(vmcs12) &&
                   nested_cpu_has2(vmcs12,
                                   SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
-               vmx_flush_tlb_ept_only(vcpu);
+               vmx_flush_tlb(vcpu, true);
        }
 
        /* This is needed for same reason as it was needed in prepare_vmcs02 */
index 51ecd381793b4f281563779b97a04e5063572fdf..59371de5d722d9687f72e39bd32161ddd75fa04d 100644 (file)
@@ -114,7 +114,7 @@ module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR);
 static bool __read_mostly report_ignored_msrs = true;
 module_param(report_ignored_msrs, bool, S_IRUGO | S_IWUSR);
 
-unsigned int min_timer_period_us = 500;
+unsigned int min_timer_period_us = 200;
 module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR);
 
 static bool __read_mostly kvmclock_periodic_sync = true;
@@ -843,7 +843,10 @@ EXPORT_SYMBOL_GPL(kvm_set_cr4);
 int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
 {
 #ifdef CONFIG_X86_64
-       cr3 &= ~CR3_PCID_INVD;
+       bool pcid_enabled = kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE);
+
+       if (pcid_enabled)
+               cr3 &= ~CR3_PCID_INVD;
 #endif
 
        if (cr3 == kvm_read_cr3(vcpu) && !pdptrs_changed(vcpu)) {
@@ -6671,12 +6674,13 @@ void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu)
 int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
 {
        unsigned long nr, a0, a1, a2, a3, ret;
-       int op_64_bit, r;
-
-       r = kvm_skip_emulated_instruction(vcpu);
+       int op_64_bit;
 
-       if (kvm_hv_hypercall_enabled(vcpu->kvm))
-               return kvm_hv_hypercall(vcpu);
+       if (kvm_hv_hypercall_enabled(vcpu->kvm)) {
+               if (!kvm_hv_hypercall(vcpu))
+                       return 0;
+               goto out;
+       }
 
        nr = kvm_register_read(vcpu, VCPU_REGS_RAX);
        a0 = kvm_register_read(vcpu, VCPU_REGS_RBX);
@@ -6697,7 +6701,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
 
        if (kvm_x86_ops->get_cpl(vcpu) != 0) {
                ret = -KVM_EPERM;
-               goto out;
+               goto out_error;
        }
 
        switch (nr) {
@@ -6717,12 +6721,14 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
                ret = -KVM_ENOSYS;
                break;
        }
-out:
+out_error:
        if (!op_64_bit)
                ret = (u32)ret;
        kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
+
+out:
        ++vcpu->stat.hypercalls;
-       return r;
+       return kvm_skip_emulated_instruction(vcpu);
 }
 EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
 
index 7d35ce672989ddcbc719db96081d37607c79734f..c9492f7649020e2990b420e545e91470ec744e73 100644 (file)
@@ -302,13 +302,6 @@ static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
            __rem;                                              \
         })
 
-#define KVM_X86_DISABLE_EXITS_MWAIT          (1 << 0)
-#define KVM_X86_DISABLE_EXITS_HTL            (1 << 1)
-#define KVM_X86_DISABLE_EXITS_PAUSE          (1 << 2)
-#define KVM_X86_DISABLE_VALID_EXITS          (KVM_X86_DISABLE_EXITS_MWAIT | \
-                                              KVM_X86_DISABLE_EXITS_HTL | \
-                                              KVM_X86_DISABLE_EXITS_PAUSE)
-
 static inline bool kvm_mwait_in_guest(struct kvm *kvm)
 {
        return kvm->arch.mwait_in_guest;
index 62a7e9f65decfcc5c447c01a9b5f986079f1de6f..cc7ff59571943fbcd09295e4d0c83c5be551cb80 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/init.h>
 #include <linux/sched.h>
 #include <linux/seq_file.h>
+#include <linux/highmem.h>
 
 #include <asm/pgtable.h>
 
@@ -334,16 +335,16 @@ static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr,
                           pgprotval_t eff_in, unsigned long P)
 {
        int i;
-       pte_t *start;
+       pte_t *pte;
        pgprotval_t prot, eff;
 
-       start = (pte_t *)pmd_page_vaddr(addr);
        for (i = 0; i < PTRS_PER_PTE; i++) {
-               prot = pte_flags(*start);
-               eff = effective_prot(eff_in, prot);
                st->current_address = normalize_addr(P + i * PTE_LEVEL_MULT);
+               pte = pte_offset_map(&addr, st->current_address);
+               prot = pte_flags(*pte);
+               eff = effective_prot(eff_in, prot);
                note_page(m, st, __pgprot(prot), eff, 5);
-               start++;
+               pte_unmap(pte);
        }
 }
 #ifdef CONFIG_KASAN
index 0f3d50f4c48c5bc7e96f43b007870262245666e4..3bded76e8d5c5676bd972b0217e96fb84753ff7f 100644 (file)
@@ -93,6 +93,18 @@ void arch_report_meminfo(struct seq_file *m)
 static inline void split_page_count(int level) { }
 #endif
 
+static inline int
+within(unsigned long addr, unsigned long start, unsigned long end)
+{
+       return addr >= start && addr < end;
+}
+
+static inline int
+within_inclusive(unsigned long addr, unsigned long start, unsigned long end)
+{
+       return addr >= start && addr <= end;
+}
+
 #ifdef CONFIG_X86_64
 
 static inline unsigned long highmap_start_pfn(void)
@@ -106,20 +118,25 @@ static inline unsigned long highmap_end_pfn(void)
        return __pa_symbol(roundup(_brk_end, PMD_SIZE) - 1) >> PAGE_SHIFT;
 }
 
-#endif
-
-static inline int
-within(unsigned long addr, unsigned long start, unsigned long end)
+static bool __cpa_pfn_in_highmap(unsigned long pfn)
 {
-       return addr >= start && addr < end;
+       /*
+        * Kernel text has an alias mapping at a high address, known
+        * here as "highmap".
+        */
+       return within_inclusive(pfn, highmap_start_pfn(), highmap_end_pfn());
 }
 
-static inline int
-within_inclusive(unsigned long addr, unsigned long start, unsigned long end)
+#else
+
+static bool __cpa_pfn_in_highmap(unsigned long pfn)
 {
-       return addr >= start && addr <= end;
+       /* There is no highmap on 32-bit */
+       return false;
 }
 
+#endif
+
 /*
  * Flushing functions
  */
@@ -172,7 +189,7 @@ static void __cpa_flush_all(void *arg)
 
 static void cpa_flush_all(unsigned long cache)
 {
-       BUG_ON(irqs_disabled());
+       BUG_ON(irqs_disabled() && !early_boot_irqs_disabled);
 
        on_each_cpu(__cpa_flush_all, (void *) cache, 1);
 }
@@ -236,7 +253,7 @@ static void cpa_flush_array(unsigned long *start, int numpages, int cache,
        unsigned long do_wbinvd = cache && numpages >= 1024; /* 4M threshold */
 #endif
 
-       BUG_ON(irqs_disabled());
+       BUG_ON(irqs_disabled() && !early_boot_irqs_disabled);
 
        on_each_cpu(__cpa_flush_all, (void *) do_wbinvd, 1);
 
@@ -1183,6 +1200,10 @@ static int __cpa_process_fault(struct cpa_data *cpa, unsigned long vaddr,
                cpa->numpages = 1;
                cpa->pfn = __pa(vaddr) >> PAGE_SHIFT;
                return 0;
+
+       } else if (__cpa_pfn_in_highmap(cpa->pfn)) {
+               /* Faults in the highmap are OK, so do not warn: */
+               return -EFAULT;
        } else {
                WARN(1, KERN_WARNING "CPA: called for zero pte. "
                        "vaddr = %lx cpa->vaddr = %lx\n", vaddr,
@@ -1335,8 +1356,7 @@ static int cpa_process_alias(struct cpa_data *cpa)
         * to touch the high mapped kernel as well:
         */
        if (!within(vaddr, (unsigned long)_text, _brk_end) &&
-           within_inclusive(cpa->pfn, highmap_start_pfn(),
-                            highmap_end_pfn())) {
+           __cpa_pfn_in_highmap(cpa->pfn)) {
                unsigned long temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) +
                                               __START_KERNEL_map - phys_base;
                alias_cpa = *cpa;
index d7bc0eea20a5ed2fc8ec43ebc06429517cbb362b..6e98e0a7c92315c2a819ee396bf78ae3104688c4 100644 (file)
@@ -94,26 +94,27 @@ int __arch_override_mprotect_pkey(struct vm_area_struct *vma, int prot, int pkey
         */
        if (pkey != -1)
                return pkey;
-       /*
-        * Look for a protection-key-drive execute-only mapping
-        * which is now being given permissions that are not
-        * execute-only.  Move it back to the default pkey.
-        */
-       if (vma_is_pkey_exec_only(vma) &&
-           (prot & (PROT_READ|PROT_WRITE))) {
-               return 0;
-       }
+
        /*
         * The mapping is execute-only.  Go try to get the
         * execute-only protection key.  If we fail to do that,
         * fall through as if we do not have execute-only
-        * support.
+        * support in this mm.
         */
        if (prot == PROT_EXEC) {
                pkey = execute_only_pkey(vma->vm_mm);
                if (pkey > 0)
                        return pkey;
+       } else if (vma_is_pkey_exec_only(vma)) {
+               /*
+                * Protections are *not* PROT_EXEC, but the mapping
+                * is using the exec-only pkey.  This mapping was
+                * PROT_EXEC and will no longer be.  Move back to
+                * the default pkey.
+                */
+               return ARCH_DEFAULT_PKEY;
        }
+
        /*
         * This is a vanilla, non-pkey mprotect (or we failed to
         * setup execute-only), inherit the pkey from the VMA we
index f1fd52f449e00ce717f360fb833a26cd02769681..4d418e70587802b05aff3ae49e80db7faf47a64c 100644 (file)
@@ -421,6 +421,16 @@ static inline bool pti_kernel_image_global_ok(void)
        if (boot_cpu_has(X86_FEATURE_K8))
                return false;
 
+       /*
+        * RANDSTRUCT derives its hardening benefits from the
+        * attacker's lack of knowledge about the layout of kernel
+        * data structures.  Keep the kernel image non-global in
+        * cases where RANDSTRUCT is in use to help keep the layout a
+        * secret.
+        */
+       if (IS_ENABLED(CONFIG_GCC_PLUGIN_RANDSTRUCT))
+               return false;
+
        return true;
 }
 
@@ -430,12 +440,24 @@ static inline bool pti_kernel_image_global_ok(void)
  */
 void pti_clone_kernel_text(void)
 {
+       /*
+        * rodata is part of the kernel image and is normally
+        * readable on the filesystem or on the web.  But, do not
+        * clone the areas past rodata, they might contain secrets.
+        */
        unsigned long start = PFN_ALIGN(_text);
-       unsigned long end = ALIGN((unsigned long)_end, PMD_PAGE_SIZE);
+       unsigned long end = (unsigned long)__end_rodata_hpage_align;
 
        if (!pti_kernel_image_global_ok())
                return;
 
+       pr_debug("mapping partial kernel image into user address space\n");
+
+       /*
+        * Note that this will undo _some_ of the work that
+        * pti_set_kernel_image_nonglobal() did to clear the
+        * global bit.
+        */
        pti_clone_pmds(start, end, _PAGE_RW);
 }
 
@@ -458,8 +480,6 @@ void pti_set_kernel_image_nonglobal(void)
        if (pti_kernel_image_global_ok())
                return;
 
-       pr_debug("set kernel image non-global\n");
-
        set_memory_nonglobal(start, (end - start) >> PAGE_SHIFT);
 }
 
index fefb4b619598c0f34a5994ea9bcc5ba5a70dc9dd..59e123da580cba98d41445a0732bf84bddf95668 100644 (file)
@@ -1,6 +1,9 @@
 #
 # Arch-specific network modules
 #
-OBJECT_FILES_NON_STANDARD_bpf_jit.o += y
 
-obj-$(CONFIG_BPF_JIT) += bpf_jit.o bpf_jit_comp.o
+ifeq ($(CONFIG_X86_32),y)
+        obj-$(CONFIG_BPF_JIT) += bpf_jit_comp32.o
+else
+        obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o
+endif
diff --git a/arch/x86/net/bpf_jit.S b/arch/x86/net/bpf_jit.S
deleted file mode 100644 (file)
index b33093f..0000000
+++ /dev/null
@@ -1,154 +0,0 @@
-/* bpf_jit.S : BPF JIT helper functions
- *
- * Copyright (C) 2011 Eric Dumazet (eric.dumazet@gmail.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
- */
-#include <linux/linkage.h>
-#include <asm/frame.h>
-
-/*
- * Calling convention :
- * rbx : skb pointer (callee saved)
- * esi : offset of byte(s) to fetch in skb (can be scratched)
- * r10 : copy of skb->data
- * r9d : hlen = skb->len - skb->data_len
- */
-#define SKBDATA        %r10
-#define SKF_MAX_NEG_OFF    $(-0x200000) /* SKF_LL_OFF from filter.h */
-
-#define FUNC(name) \
-       .globl name; \
-       .type name, @function; \
-       name:
-
-FUNC(sk_load_word)
-       test    %esi,%esi
-       js      bpf_slow_path_word_neg
-
-FUNC(sk_load_word_positive_offset)
-       mov     %r9d,%eax               # hlen
-       sub     %esi,%eax               # hlen - offset
-       cmp     $3,%eax
-       jle     bpf_slow_path_word
-       mov     (SKBDATA,%rsi),%eax
-       bswap   %eax                    /* ntohl() */
-       ret
-
-FUNC(sk_load_half)
-       test    %esi,%esi
-       js      bpf_slow_path_half_neg
-
-FUNC(sk_load_half_positive_offset)
-       mov     %r9d,%eax
-       sub     %esi,%eax               #       hlen - offset
-       cmp     $1,%eax
-       jle     bpf_slow_path_half
-       movzwl  (SKBDATA,%rsi),%eax
-       rol     $8,%ax                  # ntohs()
-       ret
-
-FUNC(sk_load_byte)
-       test    %esi,%esi
-       js      bpf_slow_path_byte_neg
-
-FUNC(sk_load_byte_positive_offset)
-       cmp     %esi,%r9d   /* if (offset >= hlen) goto bpf_slow_path_byte */
-       jle     bpf_slow_path_byte
-       movzbl  (SKBDATA,%rsi),%eax
-       ret
-
-/* rsi contains offset and can be scratched */
-#define bpf_slow_path_common(LEN)              \
-       lea     32(%rbp), %rdx;\
-       FRAME_BEGIN;                            \
-       mov     %rbx, %rdi; /* arg1 == skb */   \
-       push    %r9;                            \
-       push    SKBDATA;                        \
-/* rsi already has offset */                   \
-       mov     $LEN,%ecx;      /* len */       \
-       call    skb_copy_bits;                  \
-       test    %eax,%eax;                      \
-       pop     SKBDATA;                        \
-       pop     %r9;                            \
-       FRAME_END
-
-
-bpf_slow_path_word:
-       bpf_slow_path_common(4)
-       js      bpf_error
-       mov     32(%rbp),%eax
-       bswap   %eax
-       ret
-
-bpf_slow_path_half:
-       bpf_slow_path_common(2)
-       js      bpf_error
-       mov     32(%rbp),%ax
-       rol     $8,%ax
-       movzwl  %ax,%eax
-       ret
-
-bpf_slow_path_byte:
-       bpf_slow_path_common(1)
-       js      bpf_error
-       movzbl  32(%rbp),%eax
-       ret
-
-#define sk_negative_common(SIZE)                               \
-       FRAME_BEGIN;                                            \
-       mov     %rbx, %rdi; /* arg1 == skb */                   \
-       push    %r9;                                            \
-       push    SKBDATA;                                        \
-/* rsi already has offset */                                   \
-       mov     $SIZE,%edx;     /* size */                      \
-       call    bpf_internal_load_pointer_neg_helper;           \
-       test    %rax,%rax;                                      \
-       pop     SKBDATA;                                        \
-       pop     %r9;                                            \
-       FRAME_END;                                              \
-       jz      bpf_error
-
-bpf_slow_path_word_neg:
-       cmp     SKF_MAX_NEG_OFF, %esi   /* test range */
-       jl      bpf_error       /* offset lower -> error  */
-
-FUNC(sk_load_word_negative_offset)
-       sk_negative_common(4)
-       mov     (%rax), %eax
-       bswap   %eax
-       ret
-
-bpf_slow_path_half_neg:
-       cmp     SKF_MAX_NEG_OFF, %esi
-       jl      bpf_error
-
-FUNC(sk_load_half_negative_offset)
-       sk_negative_common(2)
-       mov     (%rax),%ax
-       rol     $8,%ax
-       movzwl  %ax,%eax
-       ret
-
-bpf_slow_path_byte_neg:
-       cmp     SKF_MAX_NEG_OFF, %esi
-       jl      bpf_error
-
-FUNC(sk_load_byte_negative_offset)
-       sk_negative_common(1)
-       movzbl  (%rax), %eax
-       ret
-
-bpf_error:
-# force a return 0 from jit handler
-       xor     %eax,%eax
-       mov     (%rbp),%rbx
-       mov     8(%rbp),%r13
-       mov     16(%rbp),%r14
-       mov     24(%rbp),%r15
-       add     $40, %rbp
-       leaveq
-       ret
index b725154182cc331e2fdd51c5118a9623f4b4d6d2..8fca446aaef62b92a79a5b76ac3a708aa249b935 100644 (file)
@@ -1,4 +1,5 @@
-/* bpf_jit_comp.c : BPF JIT compiler
+/*
+ * bpf_jit_comp.c: BPF JIT compiler
  *
  * Copyright (C) 2011-2013 Eric Dumazet (eric.dumazet@gmail.com)
  * Internal BPF Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
 #include <asm/set_memory.h>
 #include <asm/nospec-branch.h>
 
-/*
- * assembly code in arch/x86/net/bpf_jit.S
- */
-extern u8 sk_load_word[], sk_load_half[], sk_load_byte[];
-extern u8 sk_load_word_positive_offset[], sk_load_half_positive_offset[];
-extern u8 sk_load_byte_positive_offset[];
-extern u8 sk_load_word_negative_offset[], sk_load_half_negative_offset[];
-extern u8 sk_load_byte_negative_offset[];
-
 static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
 {
        if (len == 1)
@@ -45,14 +37,15 @@ static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
 #define EMIT2(b1, b2)          EMIT((b1) + ((b2) << 8), 2)
 #define EMIT3(b1, b2, b3)      EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3)
 #define EMIT4(b1, b2, b3, b4)   EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4)
+
 #define EMIT1_off32(b1, off) \
-       do {EMIT1(b1); EMIT(off, 4); } while (0)
+       do { EMIT1(b1); EMIT(off, 4); } while (0)
 #define EMIT2_off32(b1, b2, off) \
-       do {EMIT2(b1, b2); EMIT(off, 4); } while (0)
+       do { EMIT2(b1, b2); EMIT(off, 4); } while (0)
 #define EMIT3_off32(b1, b2, b3, off) \
-       do {EMIT3(b1, b2, b3); EMIT(off, 4); } while (0)
+       do { EMIT3(b1, b2, b3); EMIT(off, 4); } while (0)
 #define EMIT4_off32(b1, b2, b3, b4, off) \
-       do {EMIT4(b1, b2, b3, b4); EMIT(off, 4); } while (0)
+       do { EMIT4(b1, b2, b3, b4); EMIT(off, 4); } while (0)
 
 static bool is_imm8(int value)
 {
@@ -70,9 +63,10 @@ static bool is_uimm32(u64 value)
 }
 
 /* mov dst, src */
-#define EMIT_mov(DST, SRC) \
-       do {if (DST != SRC) \
-               EMIT3(add_2mod(0x48, DST, SRC), 0x89, add_2reg(0xC0, DST, SRC)); \
+#define EMIT_mov(DST, SRC)                                                              \
+       do {                                                                             \
+               if (DST != SRC)                                                          \
+                       EMIT3(add_2mod(0x48, DST, SRC), 0x89, add_2reg(0xC0, DST, SRC)); \
        } while (0)
 
 static int bpf_size_to_x86_bytes(int bpf_size)
@@ -89,7 +83,8 @@ static int bpf_size_to_x86_bytes(int bpf_size)
                return 0;
 }
 
-/* list of x86 cond jumps opcodes (. + s8)
+/*
+ * List of x86 cond jumps opcodes (. + s8)
  * Add 0x10 (and an extra 0x0f) to generate far jumps (. + s32)
  */
 #define X86_JB  0x72
@@ -103,38 +98,37 @@ static int bpf_size_to_x86_bytes(int bpf_size)
 #define X86_JLE 0x7E
 #define X86_JG  0x7F
 
-#define CHOOSE_LOAD_FUNC(K, func) \
-       ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)
-
-/* pick a register outside of BPF range for JIT internal work */
+/* Pick a register outside of BPF range for JIT internal work */
 #define AUX_REG (MAX_BPF_JIT_REG + 1)
 
-/* The following table maps BPF registers to x64 registers.
+/*
+ * The following table maps BPF registers to x86-64 registers.
  *
- * x64 register r12 is unused, since if used as base address
+ * x86-64 register R12 is unused, since if used as base address
  * register in load/store instructions, it always needs an
  * extra byte of encoding and is callee saved.
  *
- *  r9 caches skb->len - skb->data_len
- * r10 caches skb->data, and used for blinding (if enabled)
+ * Also x86-64 register R9 is unused. x86-64 register R10 is
+ * used for blinding (if enabled).
  */
 static const int reg2hex[] = {
-       [BPF_REG_0] = 0,  /* rax */
-       [BPF_REG_1] = 7,  /* rdi */
-       [BPF_REG_2] = 6,  /* rsi */
-       [BPF_REG_3] = 2,  /* rdx */
-       [BPF_REG_4] = 1,  /* rcx */
-       [BPF_REG_5] = 0,  /* r8 */
-       [BPF_REG_6] = 3,  /* rbx callee saved */
-       [BPF_REG_7] = 5,  /* r13 callee saved */
-       [BPF_REG_8] = 6,  /* r14 callee saved */
-       [BPF_REG_9] = 7,  /* r15 callee saved */
-       [BPF_REG_FP] = 5, /* rbp readonly */
-       [BPF_REG_AX] = 2, /* r10 temp register */
-       [AUX_REG] = 3,    /* r11 temp register */
+       [BPF_REG_0] = 0,  /* RAX */
+       [BPF_REG_1] = 7,  /* RDI */
+       [BPF_REG_2] = 6,  /* RSI */
+       [BPF_REG_3] = 2,  /* RDX */
+       [BPF_REG_4] = 1,  /* RCX */
+       [BPF_REG_5] = 0,  /* R8  */
+       [BPF_REG_6] = 3,  /* RBX callee saved */
+       [BPF_REG_7] = 5,  /* R13 callee saved */
+       [BPF_REG_8] = 6,  /* R14 callee saved */
+       [BPF_REG_9] = 7,  /* R15 callee saved */
+       [BPF_REG_FP] = 5, /* RBP readonly */
+       [BPF_REG_AX] = 2, /* R10 temp register */
+       [AUX_REG] = 3,    /* R11 temp register */
 };
 
-/* is_ereg() == true if BPF register 'reg' maps to x64 r8..r15
+/*
+ * is_ereg() == true if BPF register 'reg' maps to x86-64 r8..r15
  * which need extra byte of encoding.
  * rax,rcx,...,rbp have simpler encoding
  */
@@ -153,7 +147,7 @@ static bool is_axreg(u32 reg)
        return reg == BPF_REG_0;
 }
 
-/* add modifiers if 'reg' maps to x64 registers r8..r15 */
+/* Add modifiers if 'reg' maps to x86-64 registers R8..R15 */
 static u8 add_1mod(u8 byte, u32 reg)
 {
        if (is_ereg(reg))
@@ -170,13 +164,13 @@ static u8 add_2mod(u8 byte, u32 r1, u32 r2)
        return byte;
 }
 
-/* encode 'dst_reg' register into x64 opcode 'byte' */
+/* Encode 'dst_reg' register into x86-64 opcode 'byte' */
 static u8 add_1reg(u8 byte, u32 dst_reg)
 {
        return byte + reg2hex[dst_reg];
 }
 
-/* encode 'dst_reg' and 'src_reg' registers into x64 opcode 'byte' */
+/* Encode 'dst_reg' and 'src_reg' registers into x86-64 opcode 'byte' */
 static u8 add_2reg(u8 byte, u32 dst_reg, u32 src_reg)
 {
        return byte + reg2hex[dst_reg] + (reg2hex[src_reg] << 3);
@@ -184,27 +178,24 @@ static u8 add_2reg(u8 byte, u32 dst_reg, u32 src_reg)
 
 static void jit_fill_hole(void *area, unsigned int size)
 {
-       /* fill whole space with int3 instructions */
+       /* Fill whole space with INT3 instructions */
        memset(area, 0xcc, size);
 }
 
 struct jit_context {
-       int cleanup_addr; /* epilogue code offset */
-       bool seen_ld_abs;
-       bool seen_ax_reg;
+       int cleanup_addr; /* Epilogue code offset */
 };
 
-/* maximum number of bytes emitted while JITing one eBPF insn */
+/* Maximum number of bytes emitted while JITing one eBPF insn */
 #define BPF_MAX_INSN_SIZE      128
 #define BPF_INSN_SAFETY                64
 
-#define AUX_STACK_SPACE \
-       (32 /* space for rbx, r13, r14, r15 */ + \
-        8 /* space for skb_copy_bits() buffer */)
+#define AUX_STACK_SPACE                40 /* Space for RBX, R13, R14, R15, tailcnt */
 
-#define PROLOGUE_SIZE 37
+#define PROLOGUE_SIZE          37
 
-/* emit x64 prologue code for BPF program and check it's size.
+/*
+ * Emit x86-64 prologue code for BPF program and check its size.
  * bpf_tail_call helper will skip it while jumping into another program
  */
 static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf)
@@ -212,8 +203,11 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf)
        u8 *prog = *pprog;
        int cnt = 0;
 
-       EMIT1(0x55); /* push rbp */
-       EMIT3(0x48, 0x89, 0xE5); /* mov rbp,rsp */
+       /* push rbp */
+       EMIT1(0x55);
+
+       /* mov rbp,rsp */
+       EMIT3(0x48, 0x89, 0xE5);
 
        /* sub rsp, rounded_stack_depth + AUX_STACK_SPACE */
        EMIT3_off32(0x48, 0x81, 0xEC,
@@ -222,19 +216,8 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf)
        /* sub rbp, AUX_STACK_SPACE */
        EMIT4(0x48, 0x83, 0xED, AUX_STACK_SPACE);
 
-       /* all classic BPF filters use R6(rbx) save it */
-
        /* mov qword ptr [rbp+0],rbx */
        EMIT4(0x48, 0x89, 0x5D, 0);
-
-       /* bpf_convert_filter() maps classic BPF register X to R7 and uses R8
-        * as temporary, so all tcpdump filters need to spill/fill R7(r13) and
-        * R8(r14). R9(r15) spill could be made conditional, but there is only
-        * one 'bpf_error' return path out of helper functions inside bpf_jit.S
-        * The overhead of extra spill is negligible for any filter other
-        * than synthetic ones. Therefore not worth adding complexity.
-        */
-
        /* mov qword ptr [rbp+8],r13 */
        EMIT4(0x4C, 0x89, 0x6D, 8);
        /* mov qword ptr [rbp+16],r14 */
@@ -243,9 +226,10 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf)
        EMIT4(0x4C, 0x89, 0x7D, 24);
 
        if (!ebpf_from_cbpf) {
-               /* Clear the tail call counter (tail_call_cnt): for eBPF tail
+               /*
+                * Clear the tail call counter (tail_call_cnt): for eBPF tail
                 * calls we need to reset the counter to 0. It's done in two
-                * instructions, resetting rax register to 0, and moving it
+                * instructions, resetting RAX register to 0, and moving it
                 * to the counter location.
                 */
 
@@ -260,7 +244,9 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf)
        *pprog = prog;
 }
 
-/* generate the following code:
+/*
+ * Generate the following code:
+ *
  * ... bpf_tail_call(void *ctx, struct bpf_array *array, u64 index) ...
  *   if (index >= array->map.max_entries)
  *     goto out;
@@ -278,23 +264,26 @@ static void emit_bpf_tail_call(u8 **pprog)
        int label1, label2, label3;
        int cnt = 0;
 
-       /* rdi - pointer to ctx
+       /*
+        * rdi - pointer to ctx
         * rsi - pointer to bpf_array
         * rdx - index in bpf_array
         */
 
-       /* if (index >= array->map.max_entries)
-        *   goto out;
+       /*
+        * if (index >= array->map.max_entries)
+        *      goto out;
         */
        EMIT2(0x89, 0xD2);                        /* mov edx, edx */
        EMIT3(0x39, 0x56,                         /* cmp dword ptr [rsi + 16], edx */
              offsetof(struct bpf_array, map.max_entries));
-#define OFFSET1 (41 + RETPOLINE_RAX_BPF_JIT_SIZE) /* number of bytes to jump */
+#define OFFSET1 (41 + RETPOLINE_RAX_BPF_JIT_SIZE) /* Number of bytes to jump */
        EMIT2(X86_JBE, OFFSET1);                  /* jbe out */
        label1 = cnt;
 
-       /* if (tail_call_cnt > MAX_TAIL_CALL_CNT)
-        *   goto out;
+       /*
+        * if (tail_call_cnt > MAX_TAIL_CALL_CNT)
+        *      goto out;
         */
        EMIT2_off32(0x8B, 0x85, 36);              /* mov eax, dword ptr [rbp + 36] */
        EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT);     /* cmp eax, MAX_TAIL_CALL_CNT */
@@ -308,8 +297,9 @@ static void emit_bpf_tail_call(u8 **pprog)
        EMIT4_off32(0x48, 0x8B, 0x84, 0xD6,       /* mov rax, [rsi + rdx * 8 + offsetof(...)] */
                    offsetof(struct bpf_array, ptrs));
 
-       /* if (prog == NULL)
-        *   goto out;
+       /*
+        * if (prog == NULL)
+        *      goto out;
         */
        EMIT3(0x48, 0x85, 0xC0);                  /* test rax,rax */
 #define OFFSET3 (8 + RETPOLINE_RAX_BPF_JIT_SIZE)
@@ -321,7 +311,8 @@ static void emit_bpf_tail_call(u8 **pprog)
              offsetof(struct bpf_prog, bpf_func));
        EMIT4(0x48, 0x83, 0xC0, PROLOGUE_SIZE);   /* add rax, prologue_size */
 
-       /* now we're ready to jump into next BPF program
+       /*
+        * Wow we're ready to jump into next BPF program
         * rdi == ctx (1st arg)
         * rax == prog->bpf_func + prologue_size
         */
@@ -334,26 +325,6 @@ static void emit_bpf_tail_call(u8 **pprog)
        *pprog = prog;
 }
 
-
-static void emit_load_skb_data_hlen(u8 **pprog)
-{
-       u8 *prog = *pprog;
-       int cnt = 0;
-
-       /* r9d = skb->len - skb->data_len (headlen)
-        * r10 = skb->data
-        */
-       /* mov %r9d, off32(%rdi) */
-       EMIT3_off32(0x44, 0x8b, 0x8f, offsetof(struct sk_buff, len));
-
-       /* sub %r9d, off32(%rdi) */
-       EMIT3_off32(0x44, 0x2b, 0x8f, offsetof(struct sk_buff, data_len));
-
-       /* mov %r10, off32(%rdi) */
-       EMIT3_off32(0x4c, 0x8b, 0x97, offsetof(struct sk_buff, data));
-       *pprog = prog;
-}
-
 static void emit_mov_imm32(u8 **pprog, bool sign_propagate,
                           u32 dst_reg, const u32 imm32)
 {
@@ -361,7 +332,8 @@ static void emit_mov_imm32(u8 **pprog, bool sign_propagate,
        u8 b1, b2, b3;
        int cnt = 0;
 
-       /* optimization: if imm32 is positive, use 'mov %eax, imm32'
+       /*
+        * Optimization: if imm32 is positive, use 'mov %eax, imm32'
         * (which zero-extends imm32) to save 2 bytes.
         */
        if (sign_propagate && (s32)imm32 < 0) {
@@ -373,7 +345,8 @@ static void emit_mov_imm32(u8 **pprog, bool sign_propagate,
                goto done;
        }
 
-       /* optimization: if imm32 is zero, use 'xor %eax, %eax'
+       /*
+        * Optimization: if imm32 is zero, use 'xor %eax, %eax'
         * to save 3 bytes.
         */
        if (imm32 == 0) {
@@ -400,7 +373,8 @@ static void emit_mov_imm64(u8 **pprog, u32 dst_reg,
        int cnt = 0;
 
        if (is_uimm32(((u64)imm32_hi << 32) | (u32)imm32_lo)) {
-               /* For emitting plain u32, where sign bit must not be
+               /*
+                * For emitting plain u32, where sign bit must not be
                 * propagated LLVM tends to load imm64 over mov32
                 * directly, so save couple of bytes by just doing
                 * 'mov %eax, imm32' instead.
@@ -439,8 +413,6 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 {
        struct bpf_insn *insn = bpf_prog->insnsi;
        int insn_cnt = bpf_prog->len;
-       bool seen_ld_abs = ctx->seen_ld_abs | (oldproglen == 0);
-       bool seen_ax_reg = ctx->seen_ax_reg | (oldproglen == 0);
        bool seen_exit = false;
        u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY];
        int i, cnt = 0;
@@ -450,9 +422,6 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
        emit_prologue(&prog, bpf_prog->aux->stack_depth,
                      bpf_prog_was_classic(bpf_prog));
 
-       if (seen_ld_abs)
-               emit_load_skb_data_hlen(&prog);
-
        for (i = 0; i < insn_cnt; i++, insn++) {
                const s32 imm32 = insn->imm;
                u32 dst_reg = insn->dst_reg;
@@ -460,13 +429,9 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
                u8 b2 = 0, b3 = 0;
                s64 jmp_offset;
                u8 jmp_cond;
-               bool reload_skb_data;
                int ilen;
                u8 *func;
 
-               if (dst_reg == BPF_REG_AX || src_reg == BPF_REG_AX)
-                       ctx->seen_ax_reg = seen_ax_reg = true;
-
                switch (insn->code) {
                        /* ALU */
                case BPF_ALU | BPF_ADD | BPF_X:
@@ -525,7 +490,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
                        else if (is_ereg(dst_reg))
                                EMIT1(add_1mod(0x40, dst_reg));
 
-                       /* b3 holds 'normal' opcode, b2 short form only valid
+                       /*
+                        * b3 holds 'normal' opcode, b2 short form only valid
                         * in case dst is eax/rax.
                         */
                        switch (BPF_OP(insn->code)) {
@@ -593,7 +559,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
                        /* mov rax, dst_reg */
                        EMIT_mov(BPF_REG_0, dst_reg);
 
-                       /* xor edx, edx
+                       /*
+                        * xor edx, edx
                         * equivalent to 'xor rdx, rdx', but one byte less
                         */
                        EMIT2(0x31, 0xd2);
@@ -655,7 +622,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
                        }
                        break;
                }
-                       /* shifts */
+                       /* Shifts */
                case BPF_ALU | BPF_LSH | BPF_K:
                case BPF_ALU | BPF_RSH | BPF_K:
                case BPF_ALU | BPF_ARSH | BPF_K:
@@ -686,7 +653,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
                case BPF_ALU64 | BPF_RSH | BPF_X:
                case BPF_ALU64 | BPF_ARSH | BPF_X:
 
-                       /* check for bad case when dst_reg == rcx */
+                       /* Check for bad case when dst_reg == rcx */
                        if (dst_reg == BPF_REG_4) {
                                /* mov r11, dst_reg */
                                EMIT_mov(AUX_REG, dst_reg);
@@ -724,13 +691,13 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
                case BPF_ALU | BPF_END | BPF_FROM_BE:
                        switch (imm32) {
                        case 16:
-                               /* emit 'ror %ax, 8' to swap lower 2 bytes */
+                               /* Emit 'ror %ax, 8' to swap lower 2 bytes */
                                EMIT1(0x66);
                                if (is_ereg(dst_reg))
                                        EMIT1(0x41);
                                EMIT3(0xC1, add_1reg(0xC8, dst_reg), 8);
 
-                               /* emit 'movzwl eax, ax' */
+                               /* Emit 'movzwl eax, ax' */
                                if (is_ereg(dst_reg))
                                        EMIT3(0x45, 0x0F, 0xB7);
                                else
@@ -738,7 +705,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
                                EMIT1(add_2reg(0xC0, dst_reg, dst_reg));
                                break;
                        case 32:
-                               /* emit 'bswap eax' to swap lower 4 bytes */
+                               /* Emit 'bswap eax' to swap lower 4 bytes */
                                if (is_ereg(dst_reg))
                                        EMIT2(0x41, 0x0F);
                                else
@@ -746,7 +713,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
                                EMIT1(add_1reg(0xC8, dst_reg));
                                break;
                        case 64:
-                               /* emit 'bswap rax' to swap 8 bytes */
+                               /* Emit 'bswap rax' to swap 8 bytes */
                                EMIT3(add_1mod(0x48, dst_reg), 0x0F,
                                      add_1reg(0xC8, dst_reg));
                                break;
@@ -756,7 +723,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
                case BPF_ALU | BPF_END | BPF_FROM_LE:
                        switch (imm32) {
                        case 16:
-                               /* emit 'movzwl eax, ax' to zero extend 16-bit
+                               /*
+                                * Emit 'movzwl eax, ax' to zero extend 16-bit
                                 * into 64 bit
                                 */
                                if (is_ereg(dst_reg))
@@ -766,7 +734,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
                                EMIT1(add_2reg(0xC0, dst_reg, dst_reg));
                                break;
                        case 32:
-                               /* emit 'mov eax, eax' to clear upper 32-bits */
+                               /* Emit 'mov eax, eax' to clear upper 32-bits */
                                if (is_ereg(dst_reg))
                                        EMIT1(0x45);
                                EMIT2(0x89, add_2reg(0xC0, dst_reg, dst_reg));
@@ -809,9 +777,9 @@ st:                 if (is_imm8(insn->off))
 
                        /* STX: *(u8*)(dst_reg + off) = src_reg */
                case BPF_STX | BPF_MEM | BPF_B:
-                       /* emit 'mov byte ptr [rax + off], al' */
+                       /* Emit 'mov byte ptr [rax + off], al' */
                        if (is_ereg(dst_reg) || is_ereg(src_reg) ||
-                           /* have to add extra byte for x86 SIL, DIL regs */
+                           /* We have to add extra byte for x86 SIL, DIL regs */
                            src_reg == BPF_REG_1 || src_reg == BPF_REG_2)
                                EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x88);
                        else
@@ -840,25 +808,26 @@ stx:                      if (is_imm8(insn->off))
 
                        /* LDX: dst_reg = *(u8*)(src_reg + off) */
                case BPF_LDX | BPF_MEM | BPF_B:
-                       /* emit 'movzx rax, byte ptr [rax + off]' */
+                       /* Emit 'movzx rax, byte ptr [rax + off]' */
                        EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB6);
                        goto ldx;
                case BPF_LDX | BPF_MEM | BPF_H:
-                       /* emit 'movzx rax, word ptr [rax + off]' */
+                       /* Emit 'movzx rax, word ptr [rax + off]' */
                        EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB7);
                        goto ldx;
                case BPF_LDX | BPF_MEM | BPF_W:
-                       /* emit 'mov eax, dword ptr [rax+0x14]' */
+                       /* Emit 'mov eax, dword ptr [rax+0x14]' */
                        if (is_ereg(dst_reg) || is_ereg(src_reg))
                                EMIT2(add_2mod(0x40, src_reg, dst_reg), 0x8B);
                        else
                                EMIT1(0x8B);
                        goto ldx;
                case BPF_LDX | BPF_MEM | BPF_DW:
-                       /* emit 'mov rax, qword ptr [rax+0x14]' */
+                       /* Emit 'mov rax, qword ptr [rax+0x14]' */
                        EMIT2(add_2mod(0x48, src_reg, dst_reg), 0x8B);
-ldx:                   /* if insn->off == 0 we can save one extra byte, but
-                        * special case of x86 r13 which always needs an offset
+ldx:                   /*
+                        * If insn->off == 0 we can save one extra byte, but
+                        * special case of x86 R13 which always needs an offset
                         * is not worth the hassle
                         */
                        if (is_imm8(insn->off))
@@ -870,7 +839,7 @@ stx:                        if (is_imm8(insn->off))
 
                        /* STX XADD: lock *(u32*)(dst_reg + off) += src_reg */
                case BPF_STX | BPF_XADD | BPF_W:
-                       /* emit 'lock add dword ptr [rax + off], eax' */
+                       /* Emit 'lock add dword ptr [rax + off], eax' */
                        if (is_ereg(dst_reg) || is_ereg(src_reg))
                                EMIT3(0xF0, add_2mod(0x40, dst_reg, src_reg), 0x01);
                        else
@@ -889,35 +858,12 @@ xadd:                     if (is_imm8(insn->off))
                case BPF_JMP | BPF_CALL:
                        func = (u8 *) __bpf_call_base + imm32;
                        jmp_offset = func - (image + addrs[i]);
-                       if (seen_ld_abs) {
-                               reload_skb_data = bpf_helper_changes_pkt_data(func);
-                               if (reload_skb_data) {
-                                       EMIT1(0x57); /* push %rdi */
-                                       jmp_offset += 22; /* pop, mov, sub, mov */
-                               } else {
-                                       EMIT2(0x41, 0x52); /* push %r10 */
-                                       EMIT2(0x41, 0x51); /* push %r9 */
-                                       /* need to adjust jmp offset, since
-                                        * pop %r9, pop %r10 take 4 bytes after call insn
-                                        */
-                                       jmp_offset += 4;
-                               }
-                       }
                        if (!imm32 || !is_simm32(jmp_offset)) {
-                               pr_err("unsupported bpf func %d addr %p image %p\n",
+                               pr_err("unsupported BPF func %d addr %p image %p\n",
                                       imm32, func, image);
                                return -EINVAL;
                        }
                        EMIT1_off32(0xE8, jmp_offset);
-                       if (seen_ld_abs) {
-                               if (reload_skb_data) {
-                                       EMIT1(0x5F); /* pop %rdi */
-                                       emit_load_skb_data_hlen(&prog);
-                               } else {
-                                       EMIT2(0x41, 0x59); /* pop %r9 */
-                                       EMIT2(0x41, 0x5A); /* pop %r10 */
-                               }
-                       }
                        break;
 
                case BPF_JMP | BPF_TAIL_CALL:
@@ -970,7 +916,7 @@ xadd:                       if (is_imm8(insn->off))
                        else
                                EMIT2_off32(0x81, add_1reg(0xF8, dst_reg), imm32);
 
-emit_cond_jmp:         /* convert BPF opcode to x86 */
+emit_cond_jmp:         /* Convert BPF opcode to x86 */
                        switch (BPF_OP(insn->code)) {
                        case BPF_JEQ:
                                jmp_cond = X86_JE;
@@ -996,22 +942,22 @@ xadd:                     if (is_imm8(insn->off))
                                jmp_cond = X86_JBE;
                                break;
                        case BPF_JSGT:
-                               /* signed '>', GT in x86 */
+                               /* Signed '>', GT in x86 */
                                jmp_cond = X86_JG;
                                break;
                        case BPF_JSLT:
-                               /* signed '<', LT in x86 */
+                               /* Signed '<', LT in x86 */
                                jmp_cond = X86_JL;
                                break;
                        case BPF_JSGE:
-                               /* signed '>=', GE in x86 */
+                               /* Signed '>=', GE in x86 */
                                jmp_cond = X86_JGE;
                                break;
                        case BPF_JSLE:
-                               /* signed '<=', LE in x86 */
+                               /* Signed '<=', LE in x86 */
                                jmp_cond = X86_JLE;
                                break;
-                       default: /* to silence gcc warning */
+                       default: /* to silence GCC warning */
                                return -EFAULT;
                        }
                        jmp_offset = addrs[i + insn->off] - addrs[i];
@@ -1027,9 +973,19 @@ xadd:                     if (is_imm8(insn->off))
                        break;
 
                case BPF_JMP | BPF_JA:
-                       jmp_offset = addrs[i + insn->off] - addrs[i];
+                       if (insn->off == -1)
+                               /* -1 jmp instructions will always jump
+                                * backwards two bytes. Explicitly handling
+                                * this case avoids wasting too many passes
+                                * when there are long sequences of replaced
+                                * dead code.
+                                */
+                               jmp_offset = -2;
+                       else
+                               jmp_offset = addrs[i + insn->off] - addrs[i];
+
                        if (!jmp_offset)
-                               /* optimize out nop jumps */
+                               /* Optimize out nop jumps */
                                break;
 emit_jmp:
                        if (is_imm8(jmp_offset)) {
@@ -1042,66 +998,13 @@ xadd:                    if (is_imm8(insn->off))
                        }
                        break;
 
-               case BPF_LD | BPF_IND | BPF_W:
-                       func = sk_load_word;
-                       goto common_load;
-               case BPF_LD | BPF_ABS | BPF_W:
-                       func = CHOOSE_LOAD_FUNC(imm32, sk_load_word);
-common_load:
-                       ctx->seen_ld_abs = seen_ld_abs = true;
-                       jmp_offset = func - (image + addrs[i]);
-                       if (!func || !is_simm32(jmp_offset)) {
-                               pr_err("unsupported bpf func %d addr %p image %p\n",
-                                      imm32, func, image);
-                               return -EINVAL;
-                       }
-                       if (BPF_MODE(insn->code) == BPF_ABS) {
-                               /* mov %esi, imm32 */
-                               EMIT1_off32(0xBE, imm32);
-                       } else {
-                               /* mov %rsi, src_reg */
-                               EMIT_mov(BPF_REG_2, src_reg);
-                               if (imm32) {
-                                       if (is_imm8(imm32))
-                                               /* add %esi, imm8 */
-                                               EMIT3(0x83, 0xC6, imm32);
-                                       else
-                                               /* add %esi, imm32 */
-                                               EMIT2_off32(0x81, 0xC6, imm32);
-                               }
-                       }
-                       /* skb pointer is in R6 (%rbx), it will be copied into
-                        * %rdi if skb_copy_bits() call is necessary.
-                        * sk_load_* helpers also use %r10 and %r9d.
-                        * See bpf_jit.S
-                        */
-                       if (seen_ax_reg)
-                               /* r10 = skb->data, mov %r10, off32(%rbx) */
-                               EMIT3_off32(0x4c, 0x8b, 0x93,
-                                           offsetof(struct sk_buff, data));
-                       EMIT1_off32(0xE8, jmp_offset); /* call */
-                       break;
-
-               case BPF_LD | BPF_IND | BPF_H:
-                       func = sk_load_half;
-                       goto common_load;
-               case BPF_LD | BPF_ABS | BPF_H:
-                       func = CHOOSE_LOAD_FUNC(imm32, sk_load_half);
-                       goto common_load;
-               case BPF_LD | BPF_IND | BPF_B:
-                       func = sk_load_byte;
-                       goto common_load;
-               case BPF_LD | BPF_ABS | BPF_B:
-                       func = CHOOSE_LOAD_FUNC(imm32, sk_load_byte);
-                       goto common_load;
-
                case BPF_JMP | BPF_EXIT:
                        if (seen_exit) {
                                jmp_offset = ctx->cleanup_addr - addrs[i];
                                goto emit_jmp;
                        }
                        seen_exit = true;
-                       /* update cleanup_addr */
+                       /* Update cleanup_addr */
                        ctx->cleanup_addr = proglen;
                        /* mov rbx, qword ptr [rbp+0] */
                        EMIT4(0x48, 0x8B, 0x5D, 0);
@@ -1119,10 +1022,11 @@ xadd:                   if (is_imm8(insn->off))
                        break;
 
                default:
-                       /* By design x64 JIT should support all BPF instructions
+                       /*
+                        * By design x86-64 JIT should support all BPF instructions.
                         * This error will be seen if new instruction was added
-                        * to interpreter, but not to JIT
-                        * or if there is junk in bpf_prog
+                        * to the interpreter, but not to the JIT, or if there is
+                        * junk in bpf_prog.
                         */
                        pr_err("bpf_jit: unknown opcode %02x\n", insn->code);
                        return -EINVAL;
@@ -1174,7 +1078,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
                return orig_prog;
 
        tmp = bpf_jit_blind_constants(prog);
-       /* If blinding was requested and we failed during blinding,
+       /*
+        * If blinding was requested and we failed during blinding,
         * we must fall back to the interpreter.
         */
        if (IS_ERR(tmp))
@@ -1208,8 +1113,9 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
                goto out_addrs;
        }
 
-       /* Before first pass, make a rough estimation of addrs[]
-        * each bpf instruction is translated to less than 64 bytes
+       /*
+        * Before first pass, make a rough estimation of addrs[]
+        * each BPF instruction is translated to less than 64 bytes
         */
        for (proglen = 0, i = 0; i < prog->len; i++) {
                proglen += 64;
@@ -1218,14 +1124,16 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
        ctx.cleanup_addr = proglen;
 skip_init_addrs:
 
-       /* JITed image shrinks with every pass and the loop iterates
-        * until the image stops shrinking. Very large bpf programs
+       /*
+        * JITed image shrinks with every pass and the loop iterates
+        * until the image stops shrinking. Very large BPF programs
         * may converge on the last pass. In such case do one more
-        * pass to emit the final image
+        * pass to emit the final image.
         */
        for (pass = 0; pass < 20 || image; pass++) {
                proglen = do_jit(prog, addrs, image, oldproglen, &ctx);
                if (proglen <= 0) {
+out_image:
                        image = NULL;
                        if (header)
                                bpf_jit_binary_free(header);
@@ -1236,8 +1144,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
                        if (proglen != oldproglen) {
                                pr_err("bpf_jit: proglen=%d != oldproglen=%d\n",
                                       proglen, oldproglen);
-                               prog = orig_prog;
-                               goto out_addrs;
+                               goto out_image;
                        }
                        break;
                }
@@ -1273,7 +1180,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
                prog = orig_prog;
        }
 
-       if (!prog->is_func || extra_pass) {
+       if (!image || !prog->is_func || extra_pass) {
 out_addrs:
                kfree(addrs);
                kfree(jit_data);
diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c
new file mode 100644 (file)
index 0000000..0cc04e3
--- /dev/null
@@ -0,0 +1,2419 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Just-In-Time compiler for eBPF filters on IA32 (32bit x86)
+ *
+ * Author: Wang YanQing (udknight@gmail.com)
+ * The code based on code and ideas from:
+ * Eric Dumazet (eric.dumazet@gmail.com)
+ * and from:
+ * Shubham Bansal <illusionist.neo@gmail.com>
+ */
+
+#include <linux/netdevice.h>
+#include <linux/filter.h>
+#include <linux/if_vlan.h>
+#include <asm/cacheflush.h>
+#include <asm/set_memory.h>
+#include <asm/nospec-branch.h>
+#include <linux/bpf.h>
+
+/*
+ * eBPF prog stack layout:
+ *
+ *                         high
+ * original ESP =>        +-----+
+ *                        |     | callee saved registers
+ *                        +-----+
+ *                        | ... | eBPF JIT scratch space
+ * BPF_FP,IA32_EBP  =>    +-----+
+ *                        | ... | eBPF prog stack
+ *                        +-----+
+ *                        |RSVD | JIT scratchpad
+ * current ESP =>         +-----+
+ *                        |     |
+ *                        | ... | Function call stack
+ *                        |     |
+ *                        +-----+
+ *                          low
+ *
+ * The callee saved registers:
+ *
+ *                                high
+ * original ESP =>        +------------------+ \
+ *                        |        ebp       | |
+ * current EBP =>         +------------------+ } callee saved registers
+ *                        |    ebx,esi,edi   | |
+ *                        +------------------+ /
+ *                                low
+ */
+
+static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
+{
+       if (len == 1)
+               *ptr = bytes;
+       else if (len == 2)
+               *(u16 *)ptr = bytes;
+       else {
+               *(u32 *)ptr = bytes;
+               barrier();
+       }
+       return ptr + len;
+}
+
+#define EMIT(bytes, len) \
+       do { prog = emit_code(prog, bytes, len); cnt += len; } while (0)
+
+#define EMIT1(b1)              EMIT(b1, 1)
+#define EMIT2(b1, b2)          EMIT((b1) + ((b2) << 8), 2)
+#define EMIT3(b1, b2, b3)      EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3)
+#define EMIT4(b1, b2, b3, b4)   \
+       EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4)
+
+#define EMIT1_off32(b1, off) \
+       do { EMIT1(b1); EMIT(off, 4); } while (0)
+#define EMIT2_off32(b1, b2, off) \
+       do { EMIT2(b1, b2); EMIT(off, 4); } while (0)
+#define EMIT3_off32(b1, b2, b3, off) \
+       do { EMIT3(b1, b2, b3); EMIT(off, 4); } while (0)
+#define EMIT4_off32(b1, b2, b3, b4, off) \
+       do { EMIT4(b1, b2, b3, b4); EMIT(off, 4); } while (0)
+
+#define jmp_label(label, jmp_insn_len) (label - cnt - jmp_insn_len)
+
+static bool is_imm8(int value)
+{
+       return value <= 127 && value >= -128;
+}
+
+static bool is_simm32(s64 value)
+{
+       return value == (s64) (s32) value;
+}
+
+#define STACK_OFFSET(k)        (k)
+#define TCALL_CNT      (MAX_BPF_JIT_REG + 0)   /* Tail Call Count */
+
+#define IA32_EAX       (0x0)
+#define IA32_EBX       (0x3)
+#define IA32_ECX       (0x1)
+#define IA32_EDX       (0x2)
+#define IA32_ESI       (0x6)
+#define IA32_EDI       (0x7)
+#define IA32_EBP       (0x5)
+#define IA32_ESP       (0x4)
+
+/*
+ * List of x86 cond jumps opcodes (. + s8)
+ * Add 0x10 (and an extra 0x0f) to generate far jumps (. + s32)
+ */
+#define IA32_JB  0x72
+#define IA32_JAE 0x73
+#define IA32_JE  0x74
+#define IA32_JNE 0x75
+#define IA32_JBE 0x76
+#define IA32_JA  0x77
+#define IA32_JL  0x7C
+#define IA32_JGE 0x7D
+#define IA32_JLE 0x7E
+#define IA32_JG  0x7F
+
+/*
+ * Map eBPF registers to IA32 32bit registers or stack scratch space.
+ *
+ * 1. All the registers, R0-R10, are mapped to scratch space on stack.
+ * 2. We need two 64 bit temp registers to do complex operations on eBPF
+ *    registers.
+ * 3. For performance reason, the BPF_REG_AX for blinding constant, is
+ *    mapped to real hardware register pair, IA32_ESI and IA32_EDI.
+ *
+ * As the eBPF registers are all 64 bit registers and IA32 has only 32 bit
+ * registers, we have to map each eBPF registers with two IA32 32 bit regs
+ * or scratch memory space and we have to build eBPF 64 bit register from those.
+ *
+ * We use IA32_EAX, IA32_EDX, IA32_ECX, IA32_EBX as temporary registers.
+ */
+static const u8 bpf2ia32[][2] = {
+       /* Return value from in-kernel function, and exit value from eBPF */
+       [BPF_REG_0] = {STACK_OFFSET(0), STACK_OFFSET(4)},
+
+       /* The arguments from eBPF program to in-kernel function */
+       /* Stored on stack scratch space */
+       [BPF_REG_1] = {STACK_OFFSET(8), STACK_OFFSET(12)},
+       [BPF_REG_2] = {STACK_OFFSET(16), STACK_OFFSET(20)},
+       [BPF_REG_3] = {STACK_OFFSET(24), STACK_OFFSET(28)},
+       [BPF_REG_4] = {STACK_OFFSET(32), STACK_OFFSET(36)},
+       [BPF_REG_5] = {STACK_OFFSET(40), STACK_OFFSET(44)},
+
+       /* Callee saved registers that in-kernel function will preserve */
+       /* Stored on stack scratch space */
+       [BPF_REG_6] = {STACK_OFFSET(48), STACK_OFFSET(52)},
+       [BPF_REG_7] = {STACK_OFFSET(56), STACK_OFFSET(60)},
+       [BPF_REG_8] = {STACK_OFFSET(64), STACK_OFFSET(68)},
+       [BPF_REG_9] = {STACK_OFFSET(72), STACK_OFFSET(76)},
+
+       /* Read only Frame Pointer to access Stack */
+       [BPF_REG_FP] = {STACK_OFFSET(80), STACK_OFFSET(84)},
+
+       /* Temporary register for blinding constants. */
+       [BPF_REG_AX] = {IA32_ESI, IA32_EDI},
+
+       /* Tail call count. Stored on stack scratch space. */
+       [TCALL_CNT] = {STACK_OFFSET(88), STACK_OFFSET(92)},
+};
+
+#define dst_lo dst[0]
+#define dst_hi dst[1]
+#define src_lo src[0]
+#define src_hi src[1]
+
+#define STACK_ALIGNMENT        8
+/*
+ * Stack space for BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4,
+ * BPF_REG_5, BPF_REG_6, BPF_REG_7, BPF_REG_8, BPF_REG_9,
+ * BPF_REG_FP, BPF_REG_AX and Tail call counts.
+ */
+#define SCRATCH_SIZE 96
+
+/* Total stack size used in JITed code */
+#define _STACK_SIZE    (stack_depth + SCRATCH_SIZE)
+
+#define STACK_SIZE ALIGN(_STACK_SIZE, STACK_ALIGNMENT)
+
+/* Get the offset of eBPF REGISTERs stored on scratch space. */
+#define STACK_VAR(off) (off)
+
+/* Encode 'dst_reg' register into IA32 opcode 'byte' */
+static u8 add_1reg(u8 byte, u32 dst_reg)
+{
+       return byte + dst_reg;
+}
+
+/* Encode 'dst_reg' and 'src_reg' registers into IA32 opcode 'byte' */
+static u8 add_2reg(u8 byte, u32 dst_reg, u32 src_reg)
+{
+       return byte + dst_reg + (src_reg << 3);
+}
+
+static void jit_fill_hole(void *area, unsigned int size)
+{
+       /* Fill whole space with int3 instructions */
+       memset(area, 0xcc, size);
+}
+
+static inline void emit_ia32_mov_i(const u8 dst, const u32 val, bool dstk,
+                                  u8 **pprog)
+{
+       u8 *prog = *pprog;
+       int cnt = 0;
+
+       if (dstk) {
+               if (val == 0) {
+                       /* xor eax,eax */
+                       EMIT2(0x33, add_2reg(0xC0, IA32_EAX, IA32_EAX));
+                       /* mov dword ptr [ebp+off],eax */
+                       EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
+                             STACK_VAR(dst));
+               } else {
+                       EMIT3_off32(0xC7, add_1reg(0x40, IA32_EBP),
+                                   STACK_VAR(dst), val);
+               }
+       } else {
+               if (val == 0)
+                       EMIT2(0x33, add_2reg(0xC0, dst, dst));
+               else
+                       EMIT2_off32(0xC7, add_1reg(0xC0, dst),
+                                   val);
+       }
+       *pprog = prog;
+}
+
+/* dst = imm (4 bytes)*/
+static inline void emit_ia32_mov_r(const u8 dst, const u8 src, bool dstk,
+                                  bool sstk, u8 **pprog)
+{
+       u8 *prog = *pprog;
+       int cnt = 0;
+       u8 sreg = sstk ? IA32_EAX : src;
+
+       if (sstk)
+               /* mov eax,dword ptr [ebp+off] */
+               EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(src));
+       if (dstk)
+               /* mov dword ptr [ebp+off],eax */
+               EMIT3(0x89, add_2reg(0x40, IA32_EBP, sreg), STACK_VAR(dst));
+       else
+               /* mov dst,sreg */
+               EMIT2(0x89, add_2reg(0xC0, dst, sreg));
+
+       *pprog = prog;
+}
+
+/* dst = src */
+static inline void emit_ia32_mov_r64(const bool is64, const u8 dst[],
+                                    const u8 src[], bool dstk,
+                                    bool sstk, u8 **pprog)
+{
+       emit_ia32_mov_r(dst_lo, src_lo, dstk, sstk, pprog);
+       if (is64)
+               /* complete 8 byte move */
+               emit_ia32_mov_r(dst_hi, src_hi, dstk, sstk, pprog);
+       else
+               /* zero out high 4 bytes */
+               emit_ia32_mov_i(dst_hi, 0, dstk, pprog);
+}
+
+/* Sign extended move */
+static inline void emit_ia32_mov_i64(const bool is64, const u8 dst[],
+                                    const u32 val, bool dstk, u8 **pprog)
+{
+       u32 hi = 0;
+
+       if (is64 && (val & (1<<31)))
+               hi = (u32)~0;
+       emit_ia32_mov_i(dst_lo, val, dstk, pprog);
+       emit_ia32_mov_i(dst_hi, hi, dstk, pprog);
+}
+
+/*
+ * ALU operation (32 bit)
+ * dst = dst * src
+ */
+static inline void emit_ia32_mul_r(const u8 dst, const u8 src, bool dstk,
+                                  bool sstk, u8 **pprog)
+{
+       u8 *prog = *pprog;
+       int cnt = 0;
+       u8 sreg = sstk ? IA32_ECX : src;
+
+       if (sstk)
+               /* mov ecx,dword ptr [ebp+off] */
+               EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src));
+
+       if (dstk)
+               /* mov eax,dword ptr [ebp+off] */
+               EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(dst));
+       else
+               /* mov eax,dst */
+               EMIT2(0x8B, add_2reg(0xC0, dst, IA32_EAX));
+
+
+       EMIT2(0xF7, add_1reg(0xE0, sreg));
+
+       if (dstk)
+               /* mov dword ptr [ebp+off],eax */
+               EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
+                     STACK_VAR(dst));
+       else
+               /* mov dst,eax */
+               EMIT2(0x89, add_2reg(0xC0, dst, IA32_EAX));
+
+       *pprog = prog;
+}
+
+static inline void emit_ia32_to_le_r64(const u8 dst[], s32 val,
+                                        bool dstk, u8 **pprog)
+{
+       u8 *prog = *pprog;
+       int cnt = 0;
+       u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
+       u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
+
+       if (dstk && val != 64) {
+               EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
+                     STACK_VAR(dst_lo));
+               EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
+                     STACK_VAR(dst_hi));
+       }
+       switch (val) {
+       case 16:
+               /*
+                * Emit 'movzwl eax,ax' to zero extend 16-bit
+                * into 64 bit
+                */
+               EMIT2(0x0F, 0xB7);
+               EMIT1(add_2reg(0xC0, dreg_lo, dreg_lo));
+               /* xor dreg_hi,dreg_hi */
+               EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
+               break;
+       case 32:
+               /* xor dreg_hi,dreg_hi */
+               EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
+               break;
+       case 64:
+               /* nop */
+               break;
+       }
+
+       if (dstk && val != 64) {
+               /* mov dword ptr [ebp+off],dreg_lo */
+               EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
+                     STACK_VAR(dst_lo));
+               /* mov dword ptr [ebp+off],dreg_hi */
+               EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
+                     STACK_VAR(dst_hi));
+       }
+       *pprog = prog;
+}
+
+static inline void emit_ia32_to_be_r64(const u8 dst[], s32 val,
+                                      bool dstk, u8 **pprog)
+{
+       u8 *prog = *pprog;
+       int cnt = 0;
+       u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
+       u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
+
+       if (dstk) {
+               EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
+                     STACK_VAR(dst_lo));
+               EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
+                     STACK_VAR(dst_hi));
+       }
+       switch (val) {
+       case 16:
+               /* Emit 'ror %ax, 8' to swap lower 2 bytes */
+               EMIT1(0x66);
+               EMIT3(0xC1, add_1reg(0xC8, dreg_lo), 8);
+
+               EMIT2(0x0F, 0xB7);
+               EMIT1(add_2reg(0xC0, dreg_lo, dreg_lo));
+
+               /* xor dreg_hi,dreg_hi */
+               EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
+               break;
+       case 32:
+               /* Emit 'bswap eax' to swap lower 4 bytes */
+               EMIT1(0x0F);
+               EMIT1(add_1reg(0xC8, dreg_lo));
+
+               /* xor dreg_hi,dreg_hi */
+               EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
+               break;
+       case 64:
+               /* Emit 'bswap eax' to swap lower 4 bytes */
+               EMIT1(0x0F);
+               EMIT1(add_1reg(0xC8, dreg_lo));
+
+               /* Emit 'bswap edx' to swap lower 4 bytes */
+               EMIT1(0x0F);
+               EMIT1(add_1reg(0xC8, dreg_hi));
+
+               /* mov ecx,dreg_hi */
+               EMIT2(0x89, add_2reg(0xC0, IA32_ECX, dreg_hi));
+               /* mov dreg_hi,dreg_lo */
+               EMIT2(0x89, add_2reg(0xC0, dreg_hi, dreg_lo));
+               /* mov dreg_lo,ecx */
+               EMIT2(0x89, add_2reg(0xC0, dreg_lo, IA32_ECX));
+
+               break;
+       }
+       if (dstk) {
+               /* mov dword ptr [ebp+off],dreg_lo */
+               EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
+                     STACK_VAR(dst_lo));
+               /* mov dword ptr [ebp+off],dreg_hi */
+               EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
+                     STACK_VAR(dst_hi));
+       }
+       *pprog = prog;
+}
+
+/*
+ * ALU operation (32 bit)
+ * dst = dst (div|mod) src
+ */
+static inline void emit_ia32_div_mod_r(const u8 op, const u8 dst, const u8 src,
+                                      bool dstk, bool sstk, u8 **pprog)
+{
+       u8 *prog = *pprog;
+       int cnt = 0;
+
+       if (sstk)
+               /* mov ecx,dword ptr [ebp+off] */
+               EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
+                     STACK_VAR(src));
+       else if (src != IA32_ECX)
+               /* mov ecx,src */
+               EMIT2(0x8B, add_2reg(0xC0, src, IA32_ECX));
+
+       if (dstk)
+               /* mov eax,dword ptr [ebp+off] */
+               EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
+                     STACK_VAR(dst));
+       else
+               /* mov eax,dst */
+               EMIT2(0x8B, add_2reg(0xC0, dst, IA32_EAX));
+
+       /* xor edx,edx */
+       EMIT2(0x31, add_2reg(0xC0, IA32_EDX, IA32_EDX));
+       /* div ecx */
+       EMIT2(0xF7, add_1reg(0xF0, IA32_ECX));
+
+       if (op == BPF_MOD) {
+               if (dstk)
+                       EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EDX),
+                             STACK_VAR(dst));
+               else
+                       EMIT2(0x89, add_2reg(0xC0, dst, IA32_EDX));
+       } else {
+               if (dstk)
+                       EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
+                             STACK_VAR(dst));
+               else
+                       EMIT2(0x89, add_2reg(0xC0, dst, IA32_EAX));
+       }
+       *pprog = prog;
+}
+
+/*
+ * ALU operation (32 bit)
+ * dst = dst (shift) src
+ */
+static inline void emit_ia32_shift_r(const u8 op, const u8 dst, const u8 src,
+                                    bool dstk, bool sstk, u8 **pprog)
+{
+       u8 *prog = *pprog;
+       int cnt = 0;
+       u8 dreg = dstk ? IA32_EAX : dst;
+       u8 b2;
+
+       if (dstk)
+               /* mov eax,dword ptr [ebp+off] */
+               EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(dst));
+
+       if (sstk)
+               /* mov ecx,dword ptr [ebp+off] */
+               EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src));
+       else if (src != IA32_ECX)
+               /* mov ecx,src */
+               EMIT2(0x8B, add_2reg(0xC0, src, IA32_ECX));
+
+       switch (op) {
+       case BPF_LSH:
+               b2 = 0xE0; break;
+       case BPF_RSH:
+               b2 = 0xE8; break;
+       case BPF_ARSH:
+               b2 = 0xF8; break;
+       default:
+               return;
+       }
+       EMIT2(0xD3, add_1reg(b2, dreg));
+
+       if (dstk)
+               /* mov dword ptr [ebp+off],dreg */
+               EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg), STACK_VAR(dst));
+       *pprog = prog;
+}
+
+/*
+ * ALU operation (32 bit)
+ * dst = dst (op) src
+ */
+static inline void emit_ia32_alu_r(const bool is64, const bool hi, const u8 op,
+                                  const u8 dst, const u8 src, bool dstk,
+                                  bool sstk, u8 **pprog)
+{
+       u8 *prog = *pprog;
+       int cnt = 0;
+       u8 sreg = sstk ? IA32_EAX : src;
+       u8 dreg = dstk ? IA32_EDX : dst;
+
+       if (sstk)
+               /* mov eax,dword ptr [ebp+off] */
+               EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(src));
+
+       if (dstk)
+               /* mov eax,dword ptr [ebp+off] */
+               EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), STACK_VAR(dst));
+
+       switch (BPF_OP(op)) {
+       /* dst = dst + src */
+       case BPF_ADD:
+               if (hi && is64)
+                       EMIT2(0x11, add_2reg(0xC0, dreg, sreg));
+               else
+                       EMIT2(0x01, add_2reg(0xC0, dreg, sreg));
+               break;
+       /* dst = dst - src */
+       case BPF_SUB:
+               if (hi && is64)
+                       EMIT2(0x19, add_2reg(0xC0, dreg, sreg));
+               else
+                       EMIT2(0x29, add_2reg(0xC0, dreg, sreg));
+               break;
+       /* dst = dst | src */
+       case BPF_OR:
+               EMIT2(0x09, add_2reg(0xC0, dreg, sreg));
+               break;
+       /* dst = dst & src */
+       case BPF_AND:
+               EMIT2(0x21, add_2reg(0xC0, dreg, sreg));
+               break;
+       /* dst = dst ^ src */
+       case BPF_XOR:
+               EMIT2(0x31, add_2reg(0xC0, dreg, sreg));
+               break;
+       }
+
+       if (dstk)
+               /* mov dword ptr [ebp+off],dreg */
+               EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg),
+                     STACK_VAR(dst));
+       *pprog = prog;
+}
+
+/* ALU operation (64 bit) */
+static inline void emit_ia32_alu_r64(const bool is64, const u8 op,
+                                    const u8 dst[], const u8 src[],
+                                    bool dstk,  bool sstk,
+                                    u8 **pprog)
+{
+       u8 *prog = *pprog;
+
+       emit_ia32_alu_r(is64, false, op, dst_lo, src_lo, dstk, sstk, &prog);
+       if (is64)
+               emit_ia32_alu_r(is64, true, op, dst_hi, src_hi, dstk, sstk,
+                               &prog);
+       else
+               emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
+       *pprog = prog;
+}
+
+/*
+ * ALU operation (32 bit)
+ * dst = dst (op) val
+ */
+static inline void emit_ia32_alu_i(const bool is64, const bool hi, const u8 op,
+                                  const u8 dst, const s32 val, bool dstk,
+                                  u8 **pprog)
+{
+       u8 *prog = *pprog;
+       int cnt = 0;
+       u8 dreg = dstk ? IA32_EAX : dst;
+       u8 sreg = IA32_EDX;
+
+       if (dstk)
+               /* mov eax,dword ptr [ebp+off] */
+               EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(dst));
+
+       if (!is_imm8(val))
+               /* mov edx,imm32*/
+               EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EDX), val);
+
+       switch (op) {
+       /* dst = dst + val */
+       case BPF_ADD:
+               if (hi && is64) {
+                       if (is_imm8(val))
+                               EMIT3(0x83, add_1reg(0xD0, dreg), val);
+                       else
+                               EMIT2(0x11, add_2reg(0xC0, dreg, sreg));
+               } else {
+                       if (is_imm8(val))
+                               EMIT3(0x83, add_1reg(0xC0, dreg), val);
+                       else
+                               EMIT2(0x01, add_2reg(0xC0, dreg, sreg));
+               }
+               break;
+       /* dst = dst - val */
+       case BPF_SUB:
+               if (hi && is64) {
+                       if (is_imm8(val))
+                               EMIT3(0x83, add_1reg(0xD8, dreg), val);
+                       else
+                               EMIT2(0x19, add_2reg(0xC0, dreg, sreg));
+               } else {
+                       if (is_imm8(val))
+                               EMIT3(0x83, add_1reg(0xE8, dreg), val);
+                       else
+                               EMIT2(0x29, add_2reg(0xC0, dreg, sreg));
+               }
+               break;
+       /* dst = dst | val */
+       case BPF_OR:
+               if (is_imm8(val))
+                       EMIT3(0x83, add_1reg(0xC8, dreg), val);
+               else
+                       EMIT2(0x09, add_2reg(0xC0, dreg, sreg));
+               break;
+       /* dst = dst & val */
+       case BPF_AND:
+               if (is_imm8(val))
+                       EMIT3(0x83, add_1reg(0xE0, dreg), val);
+               else
+                       EMIT2(0x21, add_2reg(0xC0, dreg, sreg));
+               break;
+       /* dst = dst ^ val */
+       case BPF_XOR:
+               if (is_imm8(val))
+                       EMIT3(0x83, add_1reg(0xF0, dreg), val);
+               else
+                       EMIT2(0x31, add_2reg(0xC0, dreg, sreg));
+               break;
+       case BPF_NEG:
+               EMIT2(0xF7, add_1reg(0xD8, dreg));
+               break;
+       }
+
+       if (dstk)
+               /* mov dword ptr [ebp+off],dreg */
+               EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg),
+                     STACK_VAR(dst));
+       *pprog = prog;
+}
+
+/* ALU operation (64 bit) */
+static inline void emit_ia32_alu_i64(const bool is64, const u8 op,
+                                    const u8 dst[], const u32 val,
+                                    bool dstk, u8 **pprog)
+{
+       u8 *prog = *pprog;
+       u32 hi = 0;
+
+       if (is64 && (val & (1<<31)))
+               hi = (u32)~0;
+
+       emit_ia32_alu_i(is64, false, op, dst_lo, val, dstk, &prog);
+       if (is64)
+               emit_ia32_alu_i(is64, true, op, dst_hi, hi, dstk, &prog);
+       else
+               emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
+
+       *pprog = prog;
+}
+
+/* dst = ~dst (64 bit) */
+static inline void emit_ia32_neg64(const u8 dst[], bool dstk, u8 **pprog)
+{
+       u8 *prog = *pprog;
+       int cnt = 0;
+       u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
+       u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
+
+       if (dstk) {
+               EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
+                     STACK_VAR(dst_lo));
+               EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
+                     STACK_VAR(dst_hi));
+       }
+
+       /* xor ecx,ecx */
+       EMIT2(0x31, add_2reg(0xC0, IA32_ECX, IA32_ECX));
+       /* sub dreg_lo,ecx */
+       EMIT2(0x2B, add_2reg(0xC0, dreg_lo, IA32_ECX));
+       /* mov dreg_lo,ecx */
+       EMIT2(0x89, add_2reg(0xC0, dreg_lo, IA32_ECX));
+
+       /* xor ecx,ecx */
+       EMIT2(0x31, add_2reg(0xC0, IA32_ECX, IA32_ECX));
+       /* sbb dreg_hi,ecx */
+       EMIT2(0x19, add_2reg(0xC0, dreg_hi, IA32_ECX));
+       /* mov dreg_hi,ecx */
+       EMIT2(0x89, add_2reg(0xC0, dreg_hi, IA32_ECX));
+
+       if (dstk) {
+               /* mov dword ptr [ebp+off],dreg_lo */
+               EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
+                     STACK_VAR(dst_lo));
+               /* mov dword ptr [ebp+off],dreg_hi */
+               EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
+                     STACK_VAR(dst_hi));
+       }
+       *pprog = prog;
+}
+
+/* dst = dst << src */
+static inline void emit_ia32_lsh_r64(const u8 dst[], const u8 src[],
+                                    bool dstk, bool sstk, u8 **pprog)
+{
+       u8 *prog = *pprog;
+       int cnt = 0;
+       static int jmp_label1 = -1;
+       static int jmp_label2 = -1;
+       static int jmp_label3 = -1;
+       u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
+       u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
+
+       if (dstk) {
+               EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
+                     STACK_VAR(dst_lo));
+               EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
+                     STACK_VAR(dst_hi));
+       }
+
+       if (sstk)
+               /* mov ecx,dword ptr [ebp+off] */
+               EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
+                     STACK_VAR(src_lo));
+       else
+               /* mov ecx,src_lo */
+               EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_ECX));
+
+       /* cmp ecx,32 */
+       EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32);
+       /* Jumps when >= 32 */
+       if (is_imm8(jmp_label(jmp_label1, 2)))
+               EMIT2(IA32_JAE, jmp_label(jmp_label1, 2));
+       else
+               EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label1, 6));
+
+       /* < 32 */
+       /* shl dreg_hi,cl */
+       EMIT2(0xD3, add_1reg(0xE0, dreg_hi));
+       /* mov ebx,dreg_lo */
+       EMIT2(0x8B, add_2reg(0xC0, dreg_lo, IA32_EBX));
+       /* shl dreg_lo,cl */
+       EMIT2(0xD3, add_1reg(0xE0, dreg_lo));
+
+       /* IA32_ECX = -IA32_ECX + 32 */
+       /* neg ecx */
+       EMIT2(0xF7, add_1reg(0xD8, IA32_ECX));
+       /* add ecx,32 */
+       EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32);
+
+       /* shr ebx,cl */
+       EMIT2(0xD3, add_1reg(0xE8, IA32_EBX));
+       /* or dreg_hi,ebx */
+       EMIT2(0x09, add_2reg(0xC0, dreg_hi, IA32_EBX));
+
+       /* goto out; */
+       if (is_imm8(jmp_label(jmp_label3, 2)))
+               EMIT2(0xEB, jmp_label(jmp_label3, 2));
+       else
+               EMIT1_off32(0xE9, jmp_label(jmp_label3, 5));
+
+       /* >= 32 */
+       if (jmp_label1 == -1)
+               jmp_label1 = cnt;
+
+       /* cmp ecx,64 */
+       EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 64);
+       /* Jumps when >= 64 */
+       if (is_imm8(jmp_label(jmp_label2, 2)))
+               EMIT2(IA32_JAE, jmp_label(jmp_label2, 2));
+       else
+               EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label2, 6));
+
+       /* >= 32 && < 64 */
+       /* sub ecx,32 */
+       EMIT3(0x83, add_1reg(0xE8, IA32_ECX), 32);
+       /* shl dreg_lo,cl */
+       EMIT2(0xD3, add_1reg(0xE0, dreg_lo));
+       /* mov dreg_hi,dreg_lo */
+       EMIT2(0x89, add_2reg(0xC0, dreg_hi, dreg_lo));
+
+       /* xor dreg_lo,dreg_lo */
+       EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
+
+       /* goto out; */
+       if (is_imm8(jmp_label(jmp_label3, 2)))
+               EMIT2(0xEB, jmp_label(jmp_label3, 2));
+       else
+               EMIT1_off32(0xE9, jmp_label(jmp_label3, 5));
+
+       /* >= 64 */
+       if (jmp_label2 == -1)
+               jmp_label2 = cnt;
+       /* xor dreg_lo,dreg_lo */
+       EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
+       /* xor dreg_hi,dreg_hi */
+       EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
+
+       if (jmp_label3 == -1)
+               jmp_label3 = cnt;
+
+       if (dstk) {
+               /* mov dword ptr [ebp+off],dreg_lo */
+               EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
+                     STACK_VAR(dst_lo));
+               /* mov dword ptr [ebp+off],dreg_hi */
+               EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
+                     STACK_VAR(dst_hi));
+       }
+       /* out: */
+       *pprog = prog;
+}
+
+/* dst = dst >> src (signed)*/
+static inline void emit_ia32_arsh_r64(const u8 dst[], const u8 src[],
+                                     bool dstk, bool sstk, u8 **pprog)
+{
+       u8 *prog = *pprog;
+       int cnt = 0;
+       static int jmp_label1 = -1;
+       static int jmp_label2 = -1;
+       static int jmp_label3 = -1;
+       u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
+       u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
+
+       if (dstk) {
+               EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
+                     STACK_VAR(dst_lo));
+               EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
+                     STACK_VAR(dst_hi));
+       }
+
+       if (sstk)
+               /* mov ecx,dword ptr [ebp+off] */
+               EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
+                     STACK_VAR(src_lo));
+       else
+               /* mov ecx,src_lo */
+               EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_ECX));
+
+       /* cmp ecx,32 */
+       EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32);
+       /* Jumps when >= 32 */
+       if (is_imm8(jmp_label(jmp_label1, 2)))
+               EMIT2(IA32_JAE, jmp_label(jmp_label1, 2));
+       else
+               EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label1, 6));
+
+       /* < 32 */
+       /* lshr dreg_lo,cl */
+       EMIT2(0xD3, add_1reg(0xE8, dreg_lo));
+       /* mov ebx,dreg_hi */
+       EMIT2(0x8B, add_2reg(0xC0, dreg_hi, IA32_EBX));
+       /* ashr dreg_hi,cl */
+       EMIT2(0xD3, add_1reg(0xF8, dreg_hi));
+
+       /* IA32_ECX = -IA32_ECX + 32 */
+       /* neg ecx */
+       EMIT2(0xF7, add_1reg(0xD8, IA32_ECX));
+       /* add ecx,32 */
+       EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32);
+
+       /* shl ebx,cl */
+       EMIT2(0xD3, add_1reg(0xE0, IA32_EBX));
+       /* or dreg_lo,ebx */
+       EMIT2(0x09, add_2reg(0xC0, dreg_lo, IA32_EBX));
+
+       /* goto out; */
+       if (is_imm8(jmp_label(jmp_label3, 2)))
+               EMIT2(0xEB, jmp_label(jmp_label3, 2));
+       else
+               EMIT1_off32(0xE9, jmp_label(jmp_label3, 5));
+
+       /* >= 32 */
+       if (jmp_label1 == -1)
+               jmp_label1 = cnt;
+
+       /* cmp ecx,64 */
+       EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 64);
+       /* Jumps when >= 64 */
+       if (is_imm8(jmp_label(jmp_label2, 2)))
+               EMIT2(IA32_JAE, jmp_label(jmp_label2, 2));
+       else
+               EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label2, 6));
+
+       /* >= 32 && < 64 */
+       /* sub ecx,32 */
+       EMIT3(0x83, add_1reg(0xE8, IA32_ECX), 32);
+       /* ashr dreg_hi,cl */
+       EMIT2(0xD3, add_1reg(0xF8, dreg_hi));
+       /* mov dreg_lo,dreg_hi */
+       EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
+
+       /* ashr dreg_hi,imm8 */
+       EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31);
+
+       /* goto out; */
+       if (is_imm8(jmp_label(jmp_label3, 2)))
+               EMIT2(0xEB, jmp_label(jmp_label3, 2));
+       else
+               EMIT1_off32(0xE9, jmp_label(jmp_label3, 5));
+
+       /* >= 64 */
+       if (jmp_label2 == -1)
+               jmp_label2 = cnt;
+       /* ashr dreg_hi,imm8 */
+       EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31);
+       /* mov dreg_lo,dreg_hi */
+       EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
+
+       if (jmp_label3 == -1)
+               jmp_label3 = cnt;
+
+       if (dstk) {
+               /* mov dword ptr [ebp+off],dreg_lo */
+               EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
+                     STACK_VAR(dst_lo));
+               /* mov dword ptr [ebp+off],dreg_hi */
+               EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
+                     STACK_VAR(dst_hi));
+       }
+       /* out: */
+       *pprog = prog;
+}
+
+/* dst = dst >> src */
+static inline void emit_ia32_rsh_r64(const u8 dst[], const u8 src[], bool dstk,
+                                    bool sstk, u8 **pprog)
+{
+       u8 *prog = *pprog;
+       int cnt = 0;
+       static int jmp_label1 = -1;
+       static int jmp_label2 = -1;
+       static int jmp_label3 = -1;
+       u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
+       u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
+
+       if (dstk) {
+               EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
+                     STACK_VAR(dst_lo));
+               EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
+                     STACK_VAR(dst_hi));
+       }
+
+       if (sstk)
+               /* mov ecx,dword ptr [ebp+off] */
+               EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
+                     STACK_VAR(src_lo));
+       else
+               /* mov ecx,src_lo */
+               EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_ECX));
+
+       /* cmp ecx,32 */
+       EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32);
+       /* Jumps when >= 32 */
+       if (is_imm8(jmp_label(jmp_label1, 2)))
+               EMIT2(IA32_JAE, jmp_label(jmp_label1, 2));
+       else
+               EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label1, 6));
+
+       /* < 32 */
+       /* lshr dreg_lo,cl */
+       EMIT2(0xD3, add_1reg(0xE8, dreg_lo));
+       /* mov ebx,dreg_hi */
+       EMIT2(0x8B, add_2reg(0xC0, dreg_hi, IA32_EBX));
+       /* shr dreg_hi,cl */
+       EMIT2(0xD3, add_1reg(0xE8, dreg_hi));
+
+       /* IA32_ECX = -IA32_ECX + 32 */
+       /* neg ecx */
+       EMIT2(0xF7, add_1reg(0xD8, IA32_ECX));
+       /* add ecx,32 */
+       EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32);
+
+       /* shl ebx,cl */
+       EMIT2(0xD3, add_1reg(0xE0, IA32_EBX));
+       /* or dreg_lo,ebx */
+       EMIT2(0x09, add_2reg(0xC0, dreg_lo, IA32_EBX));
+
+       /* goto out; */
+       if (is_imm8(jmp_label(jmp_label3, 2)))
+               EMIT2(0xEB, jmp_label(jmp_label3, 2));
+       else
+               EMIT1_off32(0xE9, jmp_label(jmp_label3, 5));
+
+       /* >= 32 */
+       if (jmp_label1 == -1)
+               jmp_label1 = cnt;
+       /* cmp ecx,64 */
+       EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 64);
+       /* Jumps when >= 64 */
+       if (is_imm8(jmp_label(jmp_label2, 2)))
+               EMIT2(IA32_JAE, jmp_label(jmp_label2, 2));
+       else
+               EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label2, 6));
+
+       /* >= 32 && < 64 */
+       /* sub ecx,32 */
+       EMIT3(0x83, add_1reg(0xE8, IA32_ECX), 32);
+       /* shr dreg_hi,cl */
+       EMIT2(0xD3, add_1reg(0xE8, dreg_hi));
+       /* mov dreg_lo,dreg_hi */
+       EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
+       /* xor dreg_hi,dreg_hi */
+       EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
+
+       /* goto out; */
+       if (is_imm8(jmp_label(jmp_label3, 2)))
+               EMIT2(0xEB, jmp_label(jmp_label3, 2));
+       else
+               EMIT1_off32(0xE9, jmp_label(jmp_label3, 5));
+
+       /* >= 64 */
+       if (jmp_label2 == -1)
+               jmp_label2 = cnt;
+       /* xor dreg_lo,dreg_lo */
+       EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
+       /* xor dreg_hi,dreg_hi */
+       EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
+
+       if (jmp_label3 == -1)
+               jmp_label3 = cnt;
+
+       if (dstk) {
+               /* mov dword ptr [ebp+off],dreg_lo */
+               EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
+                     STACK_VAR(dst_lo));
+               /* mov dword ptr [ebp+off],dreg_hi */
+               EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
+                     STACK_VAR(dst_hi));
+       }
+       /* out: */
+       *pprog = prog;
+}
+
+/* dst = dst << val */
+static inline void emit_ia32_lsh_i64(const u8 dst[], const u32 val,
+                                    bool dstk, u8 **pprog)
+{
+       u8 *prog = *pprog;
+       int cnt = 0;
+       u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
+       u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
+
+       if (dstk) {
+               EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
+                     STACK_VAR(dst_lo));
+               EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
+                     STACK_VAR(dst_hi));
+       }
+       /* Do LSH operation */
+       if (val < 32) {
+               /* shl dreg_hi,imm8 */
+               EMIT3(0xC1, add_1reg(0xE0, dreg_hi), val);
+               /* mov ebx,dreg_lo */
+               EMIT2(0x8B, add_2reg(0xC0, dreg_lo, IA32_EBX));
+               /* shl dreg_lo,imm8 */
+               EMIT3(0xC1, add_1reg(0xE0, dreg_lo), val);
+
+               /* IA32_ECX = 32 - val */
+               /* mov ecx,val */
+               EMIT2(0xB1, val);
+               /* movzx ecx,ecx */
+               EMIT3(0x0F, 0xB6, add_2reg(0xC0, IA32_ECX, IA32_ECX));
+               /* neg ecx */
+               EMIT2(0xF7, add_1reg(0xD8, IA32_ECX));
+               /* add ecx,32 */
+               EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32);
+
+               /* shr ebx,cl */
+               EMIT2(0xD3, add_1reg(0xE8, IA32_EBX));
+               /* or dreg_hi,ebx */
+               EMIT2(0x09, add_2reg(0xC0, dreg_hi, IA32_EBX));
+       } else if (val >= 32 && val < 64) {
+               u32 value = val - 32;
+
+               /* shl dreg_lo,imm8 */
+               EMIT3(0xC1, add_1reg(0xE0, dreg_lo), value);
+               /* mov dreg_hi,dreg_lo */
+               EMIT2(0x89, add_2reg(0xC0, dreg_hi, dreg_lo));
+               /* xor dreg_lo,dreg_lo */
+               EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
+       } else {
+               /* xor dreg_lo,dreg_lo */
+               EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
+               /* xor dreg_hi,dreg_hi */
+               EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
+       }
+
+       if (dstk) {
+               /* mov dword ptr [ebp+off],dreg_lo */
+               EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
+                     STACK_VAR(dst_lo));
+               /* mov dword ptr [ebp+off],dreg_hi */
+               EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
+                     STACK_VAR(dst_hi));
+       }
+       *pprog = prog;
+}
+
+/* dst = dst >> val */
+static inline void emit_ia32_rsh_i64(const u8 dst[], const u32 val,
+                                    bool dstk, u8 **pprog)
+{
+       u8 *prog = *pprog;
+       int cnt = 0;
+       u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
+       u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
+
+       if (dstk) {
+               EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
+                     STACK_VAR(dst_lo));
+               EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
+                     STACK_VAR(dst_hi));
+       }
+
+       /* Do RSH operation */
+       if (val < 32) {
+               /* shr dreg_lo,imm8 */
+               EMIT3(0xC1, add_1reg(0xE8, dreg_lo), val);
+               /* mov ebx,dreg_hi */
+               EMIT2(0x8B, add_2reg(0xC0, dreg_hi, IA32_EBX));
+               /* shr dreg_hi,imm8 */
+               EMIT3(0xC1, add_1reg(0xE8, dreg_hi), val);
+
+               /* IA32_ECX = 32 - val */
+               /* mov ecx,val */
+               EMIT2(0xB1, val);
+               /* movzx ecx,ecx */
+               EMIT3(0x0F, 0xB6, add_2reg(0xC0, IA32_ECX, IA32_ECX));
+               /* neg ecx */
+               EMIT2(0xF7, add_1reg(0xD8, IA32_ECX));
+               /* add ecx,32 */
+               EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32);
+
+               /* shl ebx,cl */
+               EMIT2(0xD3, add_1reg(0xE0, IA32_EBX));
+               /* or dreg_lo,ebx */
+               EMIT2(0x09, add_2reg(0xC0, dreg_lo, IA32_EBX));
+       } else if (val >= 32 && val < 64) {
+               u32 value = val - 32;
+
+               /* shr dreg_hi,imm8 */
+               EMIT3(0xC1, add_1reg(0xE8, dreg_hi), value);
+               /* mov dreg_lo,dreg_hi */
+               EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
+               /* xor dreg_hi,dreg_hi */
+               EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
+       } else {
+               /* xor dreg_lo,dreg_lo */
+               EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
+               /* xor dreg_hi,dreg_hi */
+               EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
+       }
+
+       if (dstk) {
+               /* mov dword ptr [ebp+off],dreg_lo */
+               EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
+                     STACK_VAR(dst_lo));
+               /* mov dword ptr [ebp+off],dreg_hi */
+               EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
+                     STACK_VAR(dst_hi));
+       }
+       *pprog = prog;
+}
+
+/* dst = dst >> val (signed) */
+static inline void emit_ia32_arsh_i64(const u8 dst[], const u32 val,
+                                     bool dstk, u8 **pprog)
+{
+       u8 *prog = *pprog;
+       int cnt = 0;
+       u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
+       u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
+
+       if (dstk) {
+               EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
+                     STACK_VAR(dst_lo));
+               EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
+                     STACK_VAR(dst_hi));
+       }
+       /* Do RSH operation */
+       if (val < 32) {
+               /* shr dreg_lo,imm8 */
+               EMIT3(0xC1, add_1reg(0xE8, dreg_lo), val);
+               /* mov ebx,dreg_hi */
+               EMIT2(0x8B, add_2reg(0xC0, dreg_hi, IA32_EBX));
+               /* ashr dreg_hi,imm8 */
+               EMIT3(0xC1, add_1reg(0xF8, dreg_hi), val);
+
+               /* IA32_ECX = 32 - val */
+               /* mov ecx,val */
+               EMIT2(0xB1, val);
+               /* movzx ecx,ecx */
+               EMIT3(0x0F, 0xB6, add_2reg(0xC0, IA32_ECX, IA32_ECX));
+               /* neg ecx */
+               EMIT2(0xF7, add_1reg(0xD8, IA32_ECX));
+               /* add ecx,32 */
+               EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32);
+
+               /* shl ebx,cl */
+               EMIT2(0xD3, add_1reg(0xE0, IA32_EBX));
+               /* or dreg_lo,ebx */
+               EMIT2(0x09, add_2reg(0xC0, dreg_lo, IA32_EBX));
+       } else if (val >= 32 && val < 64) {
+               u32 value = val - 32;
+
+               /* ashr dreg_hi,imm8 */
+               EMIT3(0xC1, add_1reg(0xF8, dreg_hi), value);
+               /* mov dreg_lo,dreg_hi */
+               EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
+
+               /* ashr dreg_hi,imm8 */
+               EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31);
+       } else {
+               /* ashr dreg_hi,imm8 */
+               EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31);
+               /* mov dreg_lo,dreg_hi */
+               EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
+       }
+
+       if (dstk) {
+               /* mov dword ptr [ebp+off],dreg_lo */
+               EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
+                     STACK_VAR(dst_lo));
+               /* mov dword ptr [ebp+off],dreg_hi */
+               EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
+                     STACK_VAR(dst_hi));
+       }
+       *pprog = prog;
+}
+
+static inline void emit_ia32_mul_r64(const u8 dst[], const u8 src[], bool dstk,
+                                    bool sstk, u8 **pprog)
+{
+       u8 *prog = *pprog;
+       int cnt = 0;
+
+       if (dstk)
+               /* mov eax,dword ptr [ebp+off] */
+               EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
+                     STACK_VAR(dst_hi));
+       else
+               /* mov eax,dst_hi */
+               EMIT2(0x8B, add_2reg(0xC0, dst_hi, IA32_EAX));
+
+       if (sstk)
+               /* mul dword ptr [ebp+off] */
+               EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(src_lo));
+       else
+               /* mul src_lo */
+               EMIT2(0xF7, add_1reg(0xE0, src_lo));
+
+       /* mov ecx,eax */
+       EMIT2(0x89, add_2reg(0xC0, IA32_ECX, IA32_EAX));
+
+       if (dstk)
+               /* mov eax,dword ptr [ebp+off] */
+               EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
+                     STACK_VAR(dst_lo));
+       else
+               /* mov eax,dst_lo */
+               EMIT2(0x8B, add_2reg(0xC0, dst_lo, IA32_EAX));
+
+       if (sstk)
+               /* mul dword ptr [ebp+off] */
+               EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(src_hi));
+       else
+               /* mul src_hi */
+               EMIT2(0xF7, add_1reg(0xE0, src_hi));
+
+       /* add eax,eax */
+       EMIT2(0x01, add_2reg(0xC0, IA32_ECX, IA32_EAX));
+
+       if (dstk)
+               /* mov eax,dword ptr [ebp+off] */
+               EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
+                     STACK_VAR(dst_lo));
+       else
+               /* mov eax,dst_lo */
+               EMIT2(0x8B, add_2reg(0xC0, dst_lo, IA32_EAX));
+
+       if (sstk)
+               /* mul dword ptr [ebp+off] */
+               EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(src_lo));
+       else
+               /* mul src_lo */
+               EMIT2(0xF7, add_1reg(0xE0, src_lo));
+
+       /* add ecx,edx */
+       EMIT2(0x01, add_2reg(0xC0, IA32_ECX, IA32_EDX));
+
+       if (dstk) {
+               /* mov dword ptr [ebp+off],eax */
+               EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
+                     STACK_VAR(dst_lo));
+               /* mov dword ptr [ebp+off],ecx */
+               EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_ECX),
+                     STACK_VAR(dst_hi));
+       } else {
+               /* mov dst_lo,eax */
+               EMIT2(0x89, add_2reg(0xC0, dst_lo, IA32_EAX));
+               /* mov dst_hi,ecx */
+               EMIT2(0x89, add_2reg(0xC0, dst_hi, IA32_ECX));
+       }
+
+       *pprog = prog;
+}
+
+static inline void emit_ia32_mul_i64(const u8 dst[], const u32 val,
+                                    bool dstk, u8 **pprog)
+{
+       u8 *prog = *pprog;
+       int cnt = 0;
+       u32 hi;
+
+       hi = val & (1<<31) ? (u32)~0 : 0;
+       /* movl eax,imm32 */
+       EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EAX), val);
+       if (dstk)
+               /* mul dword ptr [ebp+off] */
+               EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(dst_hi));
+       else
+               /* mul dst_hi */
+               EMIT2(0xF7, add_1reg(0xE0, dst_hi));
+
+       /* mov ecx,eax */
+       EMIT2(0x89, add_2reg(0xC0, IA32_ECX, IA32_EAX));
+
+       /* movl eax,imm32 */
+       EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EAX), hi);
+       if (dstk)
+               /* mul dword ptr [ebp+off] */
+               EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(dst_lo));
+       else
+               /* mul dst_lo */
+               EMIT2(0xF7, add_1reg(0xE0, dst_lo));
+       /* add ecx,eax */
+       EMIT2(0x01, add_2reg(0xC0, IA32_ECX, IA32_EAX));
+
+       /* movl eax,imm32 */
+       EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EAX), val);
+       if (dstk)
+               /* mul dword ptr [ebp+off] */
+               EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(dst_lo));
+       else
+               /* mul dst_lo */
+               EMIT2(0xF7, add_1reg(0xE0, dst_lo));
+
+       /* add ecx,edx */
+       EMIT2(0x01, add_2reg(0xC0, IA32_ECX, IA32_EDX));
+
+       if (dstk) {
+               /* mov dword ptr [ebp+off],eax */
+               EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
+                     STACK_VAR(dst_lo));
+               /* mov dword ptr [ebp+off],ecx */
+               EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_ECX),
+                     STACK_VAR(dst_hi));
+       } else {
+               /* mov dword ptr [ebp+off],eax */
+               EMIT2(0x89, add_2reg(0xC0, dst_lo, IA32_EAX));
+               /* mov dword ptr [ebp+off],ecx */
+               EMIT2(0x89, add_2reg(0xC0, dst_hi, IA32_ECX));
+       }
+
+       *pprog = prog;
+}
+
+static int bpf_size_to_x86_bytes(int bpf_size)
+{
+       if (bpf_size == BPF_W)
+               return 4;
+       else if (bpf_size == BPF_H)
+               return 2;
+       else if (bpf_size == BPF_B)
+               return 1;
+       else if (bpf_size == BPF_DW)
+               return 4; /* imm32 */
+       else
+               return 0;
+}
+
+struct jit_context {
+       int cleanup_addr; /* Epilogue code offset */
+};
+
+/* Maximum number of bytes emitted while JITing one eBPF insn */
+#define BPF_MAX_INSN_SIZE      128
+#define BPF_INSN_SAFETY                64
+
+#define PROLOGUE_SIZE 35
+
+/*
+ * Emit prologue code for BPF program and check it's size.
+ * bpf_tail_call helper will skip it while jumping into another program.
+ */
+static void emit_prologue(u8 **pprog, u32 stack_depth)
+{
+       u8 *prog = *pprog;
+       int cnt = 0;
+       const u8 *r1 = bpf2ia32[BPF_REG_1];
+       const u8 fplo = bpf2ia32[BPF_REG_FP][0];
+       const u8 fphi = bpf2ia32[BPF_REG_FP][1];
+       const u8 *tcc = bpf2ia32[TCALL_CNT];
+
+       /* push ebp */
+       EMIT1(0x55);
+       /* mov ebp,esp */
+       EMIT2(0x89, 0xE5);
+       /* push edi */
+       EMIT1(0x57);
+       /* push esi */
+       EMIT1(0x56);
+       /* push ebx */
+       EMIT1(0x53);
+
+       /* sub esp,STACK_SIZE */
+       EMIT2_off32(0x81, 0xEC, STACK_SIZE);
+       /* sub ebp,SCRATCH_SIZE+4+12*/
+       EMIT3(0x83, add_1reg(0xE8, IA32_EBP), SCRATCH_SIZE + 16);
+       /* xor ebx,ebx */
+       EMIT2(0x31, add_2reg(0xC0, IA32_EBX, IA32_EBX));
+
+       /* Set up BPF prog stack base register */
+       EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBP), STACK_VAR(fplo));
+       EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(fphi));
+
+       /* Move BPF_CTX (EAX) to BPF_REG_R1 */
+       /* mov dword ptr [ebp+off],eax */
+       EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(r1[0]));
+       EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(r1[1]));
+
+       /* Initialize Tail Count */
+       EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(tcc[0]));
+       EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(tcc[1]));
+
+       BUILD_BUG_ON(cnt != PROLOGUE_SIZE);
+       *pprog = prog;
+}
+
+/* Emit epilogue code for BPF program */
+static void emit_epilogue(u8 **pprog, u32 stack_depth)
+{
+       u8 *prog = *pprog;
+       const u8 *r0 = bpf2ia32[BPF_REG_0];
+       int cnt = 0;
+
+       /* mov eax,dword ptr [ebp+off]*/
+       EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(r0[0]));
+       /* mov edx,dword ptr [ebp+off]*/
+       EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), STACK_VAR(r0[1]));
+
+       /* add ebp,SCRATCH_SIZE+4+12*/
+       EMIT3(0x83, add_1reg(0xC0, IA32_EBP), SCRATCH_SIZE + 16);
+
+       /* mov ebx,dword ptr [ebp-12]*/
+       EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EBX), -12);
+       /* mov esi,dword ptr [ebp-8]*/
+       EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ESI), -8);
+       /* mov edi,dword ptr [ebp-4]*/
+       EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDI), -4);
+
+       EMIT1(0xC9); /* leave */
+       EMIT1(0xC3); /* ret */
+       *pprog = prog;
+}
+
+/*
+ * Generate the following code:
+ * ... bpf_tail_call(void *ctx, struct bpf_array *array, u64 index) ...
+ *   if (index >= array->map.max_entries)
+ *     goto out;
+ *   if (++tail_call_cnt > MAX_TAIL_CALL_CNT)
+ *     goto out;
+ *   prog = array->ptrs[index];
+ *   if (prog == NULL)
+ *     goto out;
+ *   goto *(prog->bpf_func + prologue_size);
+ * out:
+ */
+static void emit_bpf_tail_call(u8 **pprog)
+{
+       u8 *prog = *pprog;
+       int cnt = 0;
+       const u8 *r1 = bpf2ia32[BPF_REG_1];
+       const u8 *r2 = bpf2ia32[BPF_REG_2];
+       const u8 *r3 = bpf2ia32[BPF_REG_3];
+       const u8 *tcc = bpf2ia32[TCALL_CNT];
+       u32 lo, hi;
+       static int jmp_label1 = -1;
+
+       /*
+        * if (index >= array->map.max_entries)
+        *     goto out;
+        */
+       /* mov eax,dword ptr [ebp+off] */
+       EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(r2[0]));
+       /* mov edx,dword ptr [ebp+off] */
+       EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), STACK_VAR(r3[0]));
+
+       /* cmp dword ptr [eax+off],edx */
+       EMIT3(0x39, add_2reg(0x40, IA32_EAX, IA32_EDX),
+             offsetof(struct bpf_array, map.max_entries));
+       /* jbe out */
+       EMIT2(IA32_JBE, jmp_label(jmp_label1, 2));
+
+       /*
+        * if (tail_call_cnt > MAX_TAIL_CALL_CNT)
+        *     goto out;
+        */
+       lo = (u32)MAX_TAIL_CALL_CNT;
+       hi = (u32)((u64)MAX_TAIL_CALL_CNT >> 32);
+       EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(tcc[0]));
+       EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(tcc[1]));
+
+       /* cmp edx,hi */
+       EMIT3(0x83, add_1reg(0xF8, IA32_EBX), hi);
+       EMIT2(IA32_JNE, 3);
+       /* cmp ecx,lo */
+       EMIT3(0x83, add_1reg(0xF8, IA32_ECX), lo);
+
+       /* ja out */
+       EMIT2(IA32_JAE, jmp_label(jmp_label1, 2));
+
+       /* add eax,0x1 */
+       EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 0x01);
+       /* adc ebx,0x0 */
+       EMIT3(0x83, add_1reg(0xD0, IA32_EBX), 0x00);
+
+       /* mov dword ptr [ebp+off],eax */
+       EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(tcc[0]));
+       /* mov dword ptr [ebp+off],edx */
+       EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(tcc[1]));
+
+       /* prog = array->ptrs[index]; */
+       /* mov edx, [eax + edx * 4 + offsetof(...)] */
+       EMIT3_off32(0x8B, 0x94, 0x90, offsetof(struct bpf_array, ptrs));
+
+       /*
+        * if (prog == NULL)
+        *     goto out;
+        */
+       /* test edx,edx */
+       EMIT2(0x85, add_2reg(0xC0, IA32_EDX, IA32_EDX));
+       /* je out */
+       EMIT2(IA32_JE, jmp_label(jmp_label1, 2));
+
+       /* goto *(prog->bpf_func + prologue_size); */
+       /* mov edx, dword ptr [edx + 32] */
+       EMIT3(0x8B, add_2reg(0x40, IA32_EDX, IA32_EDX),
+             offsetof(struct bpf_prog, bpf_func));
+       /* add edx,prologue_size */
+       EMIT3(0x83, add_1reg(0xC0, IA32_EDX), PROLOGUE_SIZE);
+
+       /* mov eax,dword ptr [ebp+off] */
+       EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(r1[0]));
+
+       /*
+        * Now we're ready to jump into next BPF program:
+        * eax == ctx (1st arg)
+        * edx == prog->bpf_func + prologue_size
+        */
+       RETPOLINE_EDX_BPF_JIT();
+
+       if (jmp_label1 == -1)
+               jmp_label1 = cnt;
+
+       /* out: */
+       *pprog = prog;
+}
+
+/* Push the scratch stack register on top of the stack. */
+static inline void emit_push_r64(const u8 src[], u8 **pprog)
+{
+       u8 *prog = *pprog;
+       int cnt = 0;
+
+       /* mov ecx,dword ptr [ebp+off] */
+       EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src_hi));
+       /* push ecx */
+       EMIT1(0x51);
+
+       /* mov ecx,dword ptr [ebp+off] */
+       EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src_lo));
+       /* push ecx */
+       EMIT1(0x51);
+
+       *pprog = prog;
+}
+
+static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
+                 int oldproglen, struct jit_context *ctx)
+{
+       struct bpf_insn *insn = bpf_prog->insnsi;
+       int insn_cnt = bpf_prog->len;
+       bool seen_exit = false;
+       u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY];
+       int i, cnt = 0;
+       int proglen = 0;
+       u8 *prog = temp;
+
+       emit_prologue(&prog, bpf_prog->aux->stack_depth);
+
+       for (i = 0; i < insn_cnt; i++, insn++) {
+               const s32 imm32 = insn->imm;
+               const bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
+               const bool dstk = insn->dst_reg == BPF_REG_AX ? false : true;
+               const bool sstk = insn->src_reg == BPF_REG_AX ? false : true;
+               const u8 code = insn->code;
+               const u8 *dst = bpf2ia32[insn->dst_reg];
+               const u8 *src = bpf2ia32[insn->src_reg];
+               const u8 *r0 = bpf2ia32[BPF_REG_0];
+               s64 jmp_offset;
+               u8 jmp_cond;
+               int ilen;
+               u8 *func;
+
+               switch (code) {
+               /* ALU operations */
+               /* dst = src */
+               case BPF_ALU | BPF_MOV | BPF_K:
+               case BPF_ALU | BPF_MOV | BPF_X:
+               case BPF_ALU64 | BPF_MOV | BPF_K:
+               case BPF_ALU64 | BPF_MOV | BPF_X:
+                       switch (BPF_SRC(code)) {
+                       case BPF_X:
+                               emit_ia32_mov_r64(is64, dst, src, dstk,
+                                                 sstk, &prog);
+                               break;
+                       case BPF_K:
+                               /* Sign-extend immediate value to dst reg */
+                               emit_ia32_mov_i64(is64, dst, imm32,
+                                                 dstk, &prog);
+                               break;
+                       }
+                       break;
+               /* dst = dst + src/imm */
+               /* dst = dst - src/imm */
+               /* dst = dst | src/imm */
+               /* dst = dst & src/imm */
+               /* dst = dst ^ src/imm */
+               /* dst = dst * src/imm */
+               /* dst = dst << src */
+               /* dst = dst >> src */
+               case BPF_ALU | BPF_ADD | BPF_K:
+               case BPF_ALU | BPF_ADD | BPF_X:
+               case BPF_ALU | BPF_SUB | BPF_K:
+               case BPF_ALU | BPF_SUB | BPF_X:
+               case BPF_ALU | BPF_OR | BPF_K:
+               case BPF_ALU | BPF_OR | BPF_X:
+               case BPF_ALU | BPF_AND | BPF_K:
+               case BPF_ALU | BPF_AND | BPF_X:
+               case BPF_ALU | BPF_XOR | BPF_K:
+               case BPF_ALU | BPF_XOR | BPF_X:
+               case BPF_ALU64 | BPF_ADD | BPF_K:
+               case BPF_ALU64 | BPF_ADD | BPF_X:
+               case BPF_ALU64 | BPF_SUB | BPF_K:
+               case BPF_ALU64 | BPF_SUB | BPF_X:
+               case BPF_ALU64 | BPF_OR | BPF_K:
+               case BPF_ALU64 | BPF_OR | BPF_X:
+               case BPF_ALU64 | BPF_AND | BPF_K:
+               case BPF_ALU64 | BPF_AND | BPF_X:
+               case BPF_ALU64 | BPF_XOR | BPF_K:
+               case BPF_ALU64 | BPF_XOR | BPF_X:
+                       switch (BPF_SRC(code)) {
+                       case BPF_X:
+                               emit_ia32_alu_r64(is64, BPF_OP(code), dst,
+                                                 src, dstk, sstk, &prog);
+                               break;
+                       case BPF_K:
+                               emit_ia32_alu_i64(is64, BPF_OP(code), dst,
+                                                 imm32, dstk, &prog);
+                               break;
+                       }
+                       break;
+               case BPF_ALU | BPF_MUL | BPF_K:
+               case BPF_ALU | BPF_MUL | BPF_X:
+                       switch (BPF_SRC(code)) {
+                       case BPF_X:
+                               emit_ia32_mul_r(dst_lo, src_lo, dstk,
+                                               sstk, &prog);
+                               break;
+                       case BPF_K:
+                               /* mov ecx,imm32*/
+                               EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX),
+                                           imm32);
+                               emit_ia32_mul_r(dst_lo, IA32_ECX, dstk,
+                                               false, &prog);
+                               break;
+                       }
+                       emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
+                       break;
+               case BPF_ALU | BPF_LSH | BPF_X:
+               case BPF_ALU | BPF_RSH | BPF_X:
+               case BPF_ALU | BPF_ARSH | BPF_K:
+               case BPF_ALU | BPF_ARSH | BPF_X:
+                       switch (BPF_SRC(code)) {
+                       case BPF_X:
+                               emit_ia32_shift_r(BPF_OP(code), dst_lo, src_lo,
+                                                 dstk, sstk, &prog);
+                               break;
+                       case BPF_K:
+                               /* mov ecx,imm32*/
+                               EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX),
+                                           imm32);
+                               emit_ia32_shift_r(BPF_OP(code), dst_lo,
+                                                 IA32_ECX, dstk, false,
+                                                 &prog);
+                               break;
+                       }
+                       emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
+                       break;
+               /* dst = dst / src(imm) */
+               /* dst = dst % src(imm) */
+               case BPF_ALU | BPF_DIV | BPF_K:
+               case BPF_ALU | BPF_DIV | BPF_X:
+               case BPF_ALU | BPF_MOD | BPF_K:
+               case BPF_ALU | BPF_MOD | BPF_X:
+                       switch (BPF_SRC(code)) {
+                       case BPF_X:
+                               emit_ia32_div_mod_r(BPF_OP(code), dst_lo,
+                                                   src_lo, dstk, sstk, &prog);
+                               break;
+                       case BPF_K:
+                               /* mov ecx,imm32*/
+                               EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX),
+                                           imm32);
+                               emit_ia32_div_mod_r(BPF_OP(code), dst_lo,
+                                                   IA32_ECX, dstk, false,
+                                                   &prog);
+                               break;
+                       }
+                       emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
+                       break;
+               case BPF_ALU64 | BPF_DIV | BPF_K:
+               case BPF_ALU64 | BPF_DIV | BPF_X:
+               case BPF_ALU64 | BPF_MOD | BPF_K:
+               case BPF_ALU64 | BPF_MOD | BPF_X:
+                       goto notyet;
+               /* dst = dst >> imm */
+               /* dst = dst << imm */
+               case BPF_ALU | BPF_RSH | BPF_K:
+               case BPF_ALU | BPF_LSH | BPF_K:
+                       if (unlikely(imm32 > 31))
+                               return -EINVAL;
+                       /* mov ecx,imm32*/
+                       EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32);
+                       emit_ia32_shift_r(BPF_OP(code), dst_lo, IA32_ECX, dstk,
+                                         false, &prog);
+                       emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
+                       break;
+               /* dst = dst << imm */
+               case BPF_ALU64 | BPF_LSH | BPF_K:
+                       if (unlikely(imm32 > 63))
+                               return -EINVAL;
+                       emit_ia32_lsh_i64(dst, imm32, dstk, &prog);
+                       break;
+               /* dst = dst >> imm */
+               case BPF_ALU64 | BPF_RSH | BPF_K:
+                       if (unlikely(imm32 > 63))
+                               return -EINVAL;
+                       emit_ia32_rsh_i64(dst, imm32, dstk, &prog);
+                       break;
+               /* dst = dst << src */
+               case BPF_ALU64 | BPF_LSH | BPF_X:
+                       emit_ia32_lsh_r64(dst, src, dstk, sstk, &prog);
+                       break;
+               /* dst = dst >> src */
+               case BPF_ALU64 | BPF_RSH | BPF_X:
+                       emit_ia32_rsh_r64(dst, src, dstk, sstk, &prog);
+                       break;
+               /* dst = dst >> src (signed) */
+               case BPF_ALU64 | BPF_ARSH | BPF_X:
+                       emit_ia32_arsh_r64(dst, src, dstk, sstk, &prog);
+                       break;
+               /* dst = dst >> imm (signed) */
+               case BPF_ALU64 | BPF_ARSH | BPF_K:
+                       if (unlikely(imm32 > 63))
+                               return -EINVAL;
+                       emit_ia32_arsh_i64(dst, imm32, dstk, &prog);
+                       break;
+               /* dst = ~dst */
+               case BPF_ALU | BPF_NEG:
+                       emit_ia32_alu_i(is64, false, BPF_OP(code),
+                                       dst_lo, 0, dstk, &prog);
+                       emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
+                       break;
+               /* dst = ~dst (64 bit) */
+               case BPF_ALU64 | BPF_NEG:
+                       emit_ia32_neg64(dst, dstk, &prog);
+                       break;
+               /* dst = dst * src/imm */
+               case BPF_ALU64 | BPF_MUL | BPF_X:
+               case BPF_ALU64 | BPF_MUL | BPF_K:
+                       switch (BPF_SRC(code)) {
+                       case BPF_X:
+                               emit_ia32_mul_r64(dst, src, dstk, sstk, &prog);
+                               break;
+                       case BPF_K:
+                               emit_ia32_mul_i64(dst, imm32, dstk, &prog);
+                               break;
+                       }
+                       break;
+               /* dst = htole(dst) */
+               case BPF_ALU | BPF_END | BPF_FROM_LE:
+                       emit_ia32_to_le_r64(dst, imm32, dstk, &prog);
+                       break;
+               /* dst = htobe(dst) */
+               case BPF_ALU | BPF_END | BPF_FROM_BE:
+                       emit_ia32_to_be_r64(dst, imm32, dstk, &prog);
+                       break;
+               /* dst = imm64 */
+               case BPF_LD | BPF_IMM | BPF_DW: {
+                       s32 hi, lo = imm32;
+
+                       hi = insn[1].imm;
+                       emit_ia32_mov_i(dst_lo, lo, dstk, &prog);
+                       emit_ia32_mov_i(dst_hi, hi, dstk, &prog);
+                       insn++;
+                       i++;
+                       break;
+               }
+               /* ST: *(u8*)(dst_reg + off) = imm */
+               case BPF_ST | BPF_MEM | BPF_H:
+               case BPF_ST | BPF_MEM | BPF_B:
+               case BPF_ST | BPF_MEM | BPF_W:
+               case BPF_ST | BPF_MEM | BPF_DW:
+                       if (dstk)
+                               /* mov eax,dword ptr [ebp+off] */
+                               EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
+                                     STACK_VAR(dst_lo));
+                       else
+                               /* mov eax,dst_lo */
+                               EMIT2(0x8B, add_2reg(0xC0, dst_lo, IA32_EAX));
+
+                       switch (BPF_SIZE(code)) {
+                       case BPF_B:
+                               EMIT(0xC6, 1); break;
+                       case BPF_H:
+                               EMIT2(0x66, 0xC7); break;
+                       case BPF_W:
+                       case BPF_DW:
+                               EMIT(0xC7, 1); break;
+                       }
+
+                       if (is_imm8(insn->off))
+                               EMIT2(add_1reg(0x40, IA32_EAX), insn->off);
+                       else
+                               EMIT1_off32(add_1reg(0x80, IA32_EAX),
+                                           insn->off);
+                       EMIT(imm32, bpf_size_to_x86_bytes(BPF_SIZE(code)));
+
+                       if (BPF_SIZE(code) == BPF_DW) {
+                               u32 hi;
+
+                               hi = imm32 & (1<<31) ? (u32)~0 : 0;
+                               EMIT2_off32(0xC7, add_1reg(0x80, IA32_EAX),
+                                           insn->off + 4);
+                               EMIT(hi, 4);
+                       }
+                       break;
+
+               /* STX: *(u8*)(dst_reg + off) = src_reg */
+               case BPF_STX | BPF_MEM | BPF_B:
+               case BPF_STX | BPF_MEM | BPF_H:
+               case BPF_STX | BPF_MEM | BPF_W:
+               case BPF_STX | BPF_MEM | BPF_DW:
+                       if (dstk)
+                               /* mov eax,dword ptr [ebp+off] */
+                               EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
+                                     STACK_VAR(dst_lo));
+                       else
+                               /* mov eax,dst_lo */
+                               EMIT2(0x8B, add_2reg(0xC0, dst_lo, IA32_EAX));
+
+                       if (sstk)
+                               /* mov edx,dword ptr [ebp+off] */
+                               EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
+                                     STACK_VAR(src_lo));
+                       else
+                               /* mov edx,src_lo */
+                               EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_EDX));
+
+                       switch (BPF_SIZE(code)) {
+                       case BPF_B:
+                               EMIT(0x88, 1); break;
+                       case BPF_H:
+                               EMIT2(0x66, 0x89); break;
+                       case BPF_W:
+                       case BPF_DW:
+                               EMIT(0x89, 1); break;
+                       }
+
+                       if (is_imm8(insn->off))
+                               EMIT2(add_2reg(0x40, IA32_EAX, IA32_EDX),
+                                     insn->off);
+                       else
+                               EMIT1_off32(add_2reg(0x80, IA32_EAX, IA32_EDX),
+                                           insn->off);
+
+                       if (BPF_SIZE(code) == BPF_DW) {
+                               if (sstk)
+                                       /* mov edi,dword ptr [ebp+off] */
+                                       EMIT3(0x8B, add_2reg(0x40, IA32_EBP,
+                                                            IA32_EDX),
+                                             STACK_VAR(src_hi));
+                               else
+                                       /* mov edi,src_hi */
+                                       EMIT2(0x8B, add_2reg(0xC0, src_hi,
+                                                            IA32_EDX));
+                               EMIT1(0x89);
+                               if (is_imm8(insn->off + 4)) {
+                                       EMIT2(add_2reg(0x40, IA32_EAX,
+                                                      IA32_EDX),
+                                             insn->off + 4);
+                               } else {
+                                       EMIT1(add_2reg(0x80, IA32_EAX,
+                                                      IA32_EDX));
+                                       EMIT(insn->off + 4, 4);
+                               }
+                       }
+                       break;
+
+               /* LDX: dst_reg = *(u8*)(src_reg + off) */
+               case BPF_LDX | BPF_MEM | BPF_B:
+               case BPF_LDX | BPF_MEM | BPF_H:
+               case BPF_LDX | BPF_MEM | BPF_W:
+               case BPF_LDX | BPF_MEM | BPF_DW:
+                       if (sstk)
+                               /* mov eax,dword ptr [ebp+off] */
+                               EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
+                                     STACK_VAR(src_lo));
+                       else
+                               /* mov eax,dword ptr [ebp+off] */
+                               EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_EAX));
+
+                       switch (BPF_SIZE(code)) {
+                       case BPF_B:
+                               EMIT2(0x0F, 0xB6); break;
+                       case BPF_H:
+                               EMIT2(0x0F, 0xB7); break;
+                       case BPF_W:
+                       case BPF_DW:
+                               EMIT(0x8B, 1); break;
+                       }
+
+                       if (is_imm8(insn->off))
+                               EMIT2(add_2reg(0x40, IA32_EAX, IA32_EDX),
+                                     insn->off);
+                       else
+                               EMIT1_off32(add_2reg(0x80, IA32_EAX, IA32_EDX),
+                                           insn->off);
+
+                       if (dstk)
+                               /* mov dword ptr [ebp+off],edx */
+                               EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EDX),
+                                     STACK_VAR(dst_lo));
+                       else
+                               /* mov dst_lo,edx */
+                               EMIT2(0x89, add_2reg(0xC0, dst_lo, IA32_EDX));
+                       switch (BPF_SIZE(code)) {
+                       case BPF_B:
+                       case BPF_H:
+                       case BPF_W:
+                               if (dstk) {
+                                       EMIT3(0xC7, add_1reg(0x40, IA32_EBP),
+                                             STACK_VAR(dst_hi));
+                                       EMIT(0x0, 4);
+                               } else {
+                                       EMIT3(0xC7, add_1reg(0xC0, dst_hi), 0);
+                               }
+                               break;
+                       case BPF_DW:
+                               EMIT2_off32(0x8B,
+                                           add_2reg(0x80, IA32_EAX, IA32_EDX),
+                                           insn->off + 4);
+                               if (dstk)
+                                       EMIT3(0x89,
+                                             add_2reg(0x40, IA32_EBP,
+                                                      IA32_EDX),
+                                             STACK_VAR(dst_hi));
+                               else
+                                       EMIT2(0x89,
+                                             add_2reg(0xC0, dst_hi, IA32_EDX));
+                               break;
+                       default:
+                               break;
+                       }
+                       break;
+               /* call */
+               case BPF_JMP | BPF_CALL:
+               {
+                       const u8 *r1 = bpf2ia32[BPF_REG_1];
+                       const u8 *r2 = bpf2ia32[BPF_REG_2];
+                       const u8 *r3 = bpf2ia32[BPF_REG_3];
+                       const u8 *r4 = bpf2ia32[BPF_REG_4];
+                       const u8 *r5 = bpf2ia32[BPF_REG_5];
+
+                       if (insn->src_reg == BPF_PSEUDO_CALL)
+                               goto notyet;
+
+                       func = (u8 *) __bpf_call_base + imm32;
+                       jmp_offset = func - (image + addrs[i]);
+
+                       if (!imm32 || !is_simm32(jmp_offset)) {
+                               pr_err("unsupported BPF func %d addr %p image %p\n",
+                                      imm32, func, image);
+                               return -EINVAL;
+                       }
+
+                       /* mov eax,dword ptr [ebp+off] */
+                       EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
+                             STACK_VAR(r1[0]));
+                       /* mov edx,dword ptr [ebp+off] */
+                       EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
+                             STACK_VAR(r1[1]));
+
+                       emit_push_r64(r5, &prog);
+                       emit_push_r64(r4, &prog);
+                       emit_push_r64(r3, &prog);
+                       emit_push_r64(r2, &prog);
+
+                       EMIT1_off32(0xE8, jmp_offset + 9);
+
+                       /* mov dword ptr [ebp+off],eax */
+                       EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
+                             STACK_VAR(r0[0]));
+                       /* mov dword ptr [ebp+off],edx */
+                       EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EDX),
+                             STACK_VAR(r0[1]));
+
+                       /* add esp,32 */
+                       EMIT3(0x83, add_1reg(0xC0, IA32_ESP), 32);
+                       break;
+               }
+               case BPF_JMP | BPF_TAIL_CALL:
+                       emit_bpf_tail_call(&prog);
+                       break;
+
+               /* cond jump */
+               case BPF_JMP | BPF_JEQ | BPF_X:
+               case BPF_JMP | BPF_JNE | BPF_X:
+               case BPF_JMP | BPF_JGT | BPF_X:
+               case BPF_JMP | BPF_JLT | BPF_X:
+               case BPF_JMP | BPF_JGE | BPF_X:
+               case BPF_JMP | BPF_JLE | BPF_X:
+               case BPF_JMP | BPF_JSGT | BPF_X:
+               case BPF_JMP | BPF_JSLE | BPF_X:
+               case BPF_JMP | BPF_JSLT | BPF_X:
+               case BPF_JMP | BPF_JSGE | BPF_X: {
+                       u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
+                       u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
+                       u8 sreg_lo = sstk ? IA32_ECX : src_lo;
+                       u8 sreg_hi = sstk ? IA32_EBX : src_hi;
+
+                       if (dstk) {
+                               EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
+                                     STACK_VAR(dst_lo));
+                               EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
+                                     STACK_VAR(dst_hi));
+                       }
+
+                       if (sstk) {
+                               EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
+                                     STACK_VAR(src_lo));
+                               EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EBX),
+                                     STACK_VAR(src_hi));
+                       }
+
+                       /* cmp dreg_hi,sreg_hi */
+                       EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi));
+                       EMIT2(IA32_JNE, 2);
+                       /* cmp dreg_lo,sreg_lo */
+                       EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo));
+                       goto emit_cond_jmp;
+               }
+               case BPF_JMP | BPF_JSET | BPF_X: {
+                       u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
+                       u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
+                       u8 sreg_lo = sstk ? IA32_ECX : src_lo;
+                       u8 sreg_hi = sstk ? IA32_EBX : src_hi;
+
+                       if (dstk) {
+                               EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
+                                     STACK_VAR(dst_lo));
+                               EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
+                                     STACK_VAR(dst_hi));
+                       }
+
+                       if (sstk) {
+                               EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
+                                     STACK_VAR(src_lo));
+                               EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EBX),
+                                     STACK_VAR(src_hi));
+                       }
+                       /* and dreg_lo,sreg_lo */
+                       EMIT2(0x23, add_2reg(0xC0, sreg_lo, dreg_lo));
+                       /* and dreg_hi,sreg_hi */
+                       EMIT2(0x23, add_2reg(0xC0, sreg_hi, dreg_hi));
+                       /* or dreg_lo,dreg_hi */
+                       EMIT2(0x09, add_2reg(0xC0, dreg_lo, dreg_hi));
+                       goto emit_cond_jmp;
+               }
+               case BPF_JMP | BPF_JSET | BPF_K: {
+                       u32 hi;
+                       u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
+                       u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
+                       u8 sreg_lo = IA32_ECX;
+                       u8 sreg_hi = IA32_EBX;
+
+                       if (dstk) {
+                               EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
+                                     STACK_VAR(dst_lo));
+                               EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
+                                     STACK_VAR(dst_hi));
+                       }
+                       hi = imm32 & (1<<31) ? (u32)~0 : 0;
+
+                       /* mov ecx,imm32 */
+                       EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32);
+                       /* mov ebx,imm32 */
+                       EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EBX), hi);
+
+                       /* and dreg_lo,sreg_lo */
+                       EMIT2(0x23, add_2reg(0xC0, sreg_lo, dreg_lo));
+                       /* and dreg_hi,sreg_hi */
+                       EMIT2(0x23, add_2reg(0xC0, sreg_hi, dreg_hi));
+                       /* or dreg_lo,dreg_hi */
+                       EMIT2(0x09, add_2reg(0xC0, dreg_lo, dreg_hi));
+                       goto emit_cond_jmp;
+               }
+               case BPF_JMP | BPF_JEQ | BPF_K:
+               case BPF_JMP | BPF_JNE | BPF_K:
+               case BPF_JMP | BPF_JGT | BPF_K:
+               case BPF_JMP | BPF_JLT | BPF_K:
+               case BPF_JMP | BPF_JGE | BPF_K:
+               case BPF_JMP | BPF_JLE | BPF_K:
+               case BPF_JMP | BPF_JSGT | BPF_K:
+               case BPF_JMP | BPF_JSLE | BPF_K:
+               case BPF_JMP | BPF_JSLT | BPF_K:
+               case BPF_JMP | BPF_JSGE | BPF_K: {
+                       u32 hi;
+                       u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
+                       u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
+                       u8 sreg_lo = IA32_ECX;
+                       u8 sreg_hi = IA32_EBX;
+
+                       if (dstk) {
+                               EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
+                                     STACK_VAR(dst_lo));
+                               EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
+                                     STACK_VAR(dst_hi));
+                       }
+
+                       hi = imm32 & (1<<31) ? (u32)~0 : 0;
+                       /* mov ecx,imm32 */
+                       EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32);
+                       /* mov ebx,imm32 */
+                       EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EBX), hi);
+
+                       /* cmp dreg_hi,sreg_hi */
+                       EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi));
+                       EMIT2(IA32_JNE, 2);
+                       /* cmp dreg_lo,sreg_lo */
+                       EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo));
+
+emit_cond_jmp:         /* Convert BPF opcode to x86 */
+                       switch (BPF_OP(code)) {
+                       case BPF_JEQ:
+                               jmp_cond = IA32_JE;
+                               break;
+                       case BPF_JSET:
+                       case BPF_JNE:
+                               jmp_cond = IA32_JNE;
+                               break;
+                       case BPF_JGT:
+                               /* GT is unsigned '>', JA in x86 */
+                               jmp_cond = IA32_JA;
+                               break;
+                       case BPF_JLT:
+                               /* LT is unsigned '<', JB in x86 */
+                               jmp_cond = IA32_JB;
+                               break;
+                       case BPF_JGE:
+                               /* GE is unsigned '>=', JAE in x86 */
+                               jmp_cond = IA32_JAE;
+                               break;
+                       case BPF_JLE:
+                               /* LE is unsigned '<=', JBE in x86 */
+                               jmp_cond = IA32_JBE;
+                               break;
+                       case BPF_JSGT:
+                               /* Signed '>', GT in x86 */
+                               jmp_cond = IA32_JG;
+                               break;
+                       case BPF_JSLT:
+                               /* Signed '<', LT in x86 */
+                               jmp_cond = IA32_JL;
+                               break;
+                       case BPF_JSGE:
+                               /* Signed '>=', GE in x86 */
+                               jmp_cond = IA32_JGE;
+                               break;
+                       case BPF_JSLE:
+                               /* Signed '<=', LE in x86 */
+                               jmp_cond = IA32_JLE;
+                               break;
+                       default: /* to silence GCC warning */
+                               return -EFAULT;
+                       }
+                       jmp_offset = addrs[i + insn->off] - addrs[i];
+                       if (is_imm8(jmp_offset)) {
+                               EMIT2(jmp_cond, jmp_offset);
+                       } else if (is_simm32(jmp_offset)) {
+                               EMIT2_off32(0x0F, jmp_cond + 0x10, jmp_offset);
+                       } else {
+                               pr_err("cond_jmp gen bug %llx\n", jmp_offset);
+                               return -EFAULT;
+                       }
+
+                       break;
+               }
+               case BPF_JMP | BPF_JA:
+                       if (insn->off == -1)
+                               /* -1 jmp instructions will always jump
+                                * backwards two bytes. Explicitly handling
+                                * this case avoids wasting too many passes
+                                * when there are long sequences of replaced
+                                * dead code.
+                                */
+                               jmp_offset = -2;
+                       else
+                               jmp_offset = addrs[i + insn->off] - addrs[i];
+
+                       if (!jmp_offset)
+                               /* Optimize out nop jumps */
+                               break;
+emit_jmp:
+                       if (is_imm8(jmp_offset)) {
+                               EMIT2(0xEB, jmp_offset);
+                       } else if (is_simm32(jmp_offset)) {
+                               EMIT1_off32(0xE9, jmp_offset);
+                       } else {
+                               pr_err("jmp gen bug %llx\n", jmp_offset);
+                               return -EFAULT;
+                       }
+                       break;
+               /* STX XADD: lock *(u32 *)(dst + off) += src */
+               case BPF_STX | BPF_XADD | BPF_W:
+               /* STX XADD: lock *(u64 *)(dst + off) += src */
+               case BPF_STX | BPF_XADD | BPF_DW:
+                       goto notyet;
+               case BPF_JMP | BPF_EXIT:
+                       if (seen_exit) {
+                               jmp_offset = ctx->cleanup_addr - addrs[i];
+                               goto emit_jmp;
+                       }
+                       seen_exit = true;
+                       /* Update cleanup_addr */
+                       ctx->cleanup_addr = proglen;
+                       emit_epilogue(&prog, bpf_prog->aux->stack_depth);
+                       break;
+notyet:
+                       pr_info_once("*** NOT YET: opcode %02x ***\n", code);
+                       return -EFAULT;
+               default:
+                       /*
+                        * This error will be seen if new instruction was added
+                        * to interpreter, but not to JIT or if there is junk in
+                        * bpf_prog
+                        */
+                       pr_err("bpf_jit: unknown opcode %02x\n", code);
+                       return -EINVAL;
+               }
+
+               ilen = prog - temp;
+               if (ilen > BPF_MAX_INSN_SIZE) {
+                       pr_err("bpf_jit: fatal insn size error\n");
+                       return -EFAULT;
+               }
+
+               if (image) {
+                       if (unlikely(proglen + ilen > oldproglen)) {
+                               pr_err("bpf_jit: fatal error\n");
+                               return -EFAULT;
+                       }
+                       memcpy(image + proglen, temp, ilen);
+               }
+               proglen += ilen;
+               addrs[i] = proglen;
+               prog = temp;
+       }
+       return proglen;
+}
+
+struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
+{
+       struct bpf_binary_header *header = NULL;
+       struct bpf_prog *tmp, *orig_prog = prog;
+       int proglen, oldproglen = 0;
+       struct jit_context ctx = {};
+       bool tmp_blinded = false;
+       u8 *image = NULL;
+       int *addrs;
+       int pass;
+       int i;
+
+       if (!prog->jit_requested)
+               return orig_prog;
+
+       tmp = bpf_jit_blind_constants(prog);
+       /*
+        * If blinding was requested and we failed during blinding,
+        * we must fall back to the interpreter.
+        */
+       if (IS_ERR(tmp))
+               return orig_prog;
+       if (tmp != prog) {
+               tmp_blinded = true;
+               prog = tmp;
+       }
+
+       addrs = kmalloc(prog->len * sizeof(*addrs), GFP_KERNEL);
+       if (!addrs) {
+               prog = orig_prog;
+               goto out;
+       }
+
+       /*
+        * Before first pass, make a rough estimation of addrs[]
+        * each BPF instruction is translated to less than 64 bytes
+        */
+       for (proglen = 0, i = 0; i < prog->len; i++) {
+               proglen += 64;
+               addrs[i] = proglen;
+       }
+       ctx.cleanup_addr = proglen;
+
+       /*
+        * JITed image shrinks with every pass and the loop iterates
+        * until the image stops shrinking. Very large BPF programs
+        * may converge on the last pass. In such case do one more
+        * pass to emit the final image.
+        */
+       for (pass = 0; pass < 20 || image; pass++) {
+               proglen = do_jit(prog, addrs, image, oldproglen, &ctx);
+               if (proglen <= 0) {
+out_image:
+                       image = NULL;
+                       if (header)
+                               bpf_jit_binary_free(header);
+                       prog = orig_prog;
+                       goto out_addrs;
+               }
+               if (image) {
+                       if (proglen != oldproglen) {
+                               pr_err("bpf_jit: proglen=%d != oldproglen=%d\n",
+                                      proglen, oldproglen);
+                               goto out_image;
+                       }
+                       break;
+               }
+               if (proglen == oldproglen) {
+                       header = bpf_jit_binary_alloc(proglen, &image,
+                                                     1, jit_fill_hole);
+                       if (!header) {
+                               prog = orig_prog;
+                               goto out_addrs;
+                       }
+               }
+               oldproglen = proglen;
+               cond_resched();
+       }
+
+       if (bpf_jit_enable > 1)
+               bpf_jit_dump(prog->len, proglen, pass + 1, image);
+
+       if (image) {
+               bpf_jit_binary_lock_ro(header);
+               prog->bpf_func = (void *)image;
+               prog->jited = 1;
+               prog->jited_len = proglen;
+       } else {
+               prog = orig_prog;
+       }
+
+out_addrs:
+       kfree(addrs);
+out:
+       if (tmp_blinded)
+               bpf_jit_prog_release_other(prog, prog == orig_prog ?
+                                          tmp : orig_prog);
+       return prog;
+}
index 48b14b534897e21a29e5cc4b6829e00389b90091..ccf4a49bb065e1cc3f78f57f9780c1e8ee8d67d9 100644 (file)
@@ -98,7 +98,7 @@ static int set_up_temporary_text_mapping(pgd_t *pgd)
                set_pgd(pgd + pgd_index(restore_jump_address), new_pgd);
        } else {
                /* No p4d for 4-level paging: point the pgd to the pud page table */
-               pgd_t new_pgd = __pgd(__pa(p4d) | pgprot_val(pgtable_prot));
+               pgd_t new_pgd = __pgd(__pa(pud) | pgprot_val(pgtable_prot));
                set_pgd(pgd + pgd_index(restore_jump_address), new_pgd);
        }
 
index 8268987010458489b386da80c09ed5164fff8b77..19c1ff54238758b34ad1ce828667528cb7ea7418 100644 (file)
@@ -65,6 +65,19 @@ static void __init xen_hvm_init_mem_mapping(void)
 {
        early_memunmap(HYPERVISOR_shared_info, PAGE_SIZE);
        HYPERVISOR_shared_info = __va(PFN_PHYS(shared_info_pfn));
+
+       /*
+        * The virtual address of the shared_info page has changed, so
+        * the vcpu_info pointer for VCPU 0 is now stale.
+        *
+        * The prepare_boot_cpu callback will re-initialize it via
+        * xen_vcpu_setup, but we can't rely on that to be called for
+        * old Xen versions (xen_have_vector_callback == 0).
+        *
+        * It is, in any case, bad to have a stale vcpu_info pointer
+        * so reset it now.
+        */
+       xen_vcpu_info_reset(0);
 }
 
 static void __init init_hvm_pv_info(void)
index c36d23aa6c3502a004d1357027dca057fca7e3a6..357969a3697cc7af6e08c12144ec06f43a8841ad 100644 (file)
@@ -421,45 +421,33 @@ static void xen_load_gdt(const struct desc_ptr *dtr)
 {
        unsigned long va = dtr->address;
        unsigned int size = dtr->size + 1;
-       unsigned pages = DIV_ROUND_UP(size, PAGE_SIZE);
-       unsigned long frames[pages];
-       int f;
-
-       /*
-        * A GDT can be up to 64k in size, which corresponds to 8192
-        * 8-byte entries, or 16 4k pages..
-        */
+       unsigned long pfn, mfn;
+       int level;
+       pte_t *ptep;
+       void *virt;
 
-       BUG_ON(size > 65536);
+       /* @size should be at most GDT_SIZE which is smaller than PAGE_SIZE. */
+       BUG_ON(size > PAGE_SIZE);
        BUG_ON(va & ~PAGE_MASK);
 
-       for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) {
-               int level;
-               pte_t *ptep;
-               unsigned long pfn, mfn;
-               void *virt;
-
-               /*
-                * The GDT is per-cpu and is in the percpu data area.
-                * That can be virtually mapped, so we need to do a
-                * page-walk to get the underlying MFN for the
-                * hypercall.  The page can also be in the kernel's
-                * linear range, so we need to RO that mapping too.
-                */
-               ptep = lookup_address(va, &level);
-               BUG_ON(ptep == NULL);
-
-               pfn = pte_pfn(*ptep);
-               mfn = pfn_to_mfn(pfn);
-               virt = __va(PFN_PHYS(pfn));
+       /*
+        * The GDT is per-cpu and is in the percpu data area.
+        * That can be virtually mapped, so we need to do a
+        * page-walk to get the underlying MFN for the
+        * hypercall.  The page can also be in the kernel's
+        * linear range, so we need to RO that mapping too.
+        */
+       ptep = lookup_address(va, &level);
+       BUG_ON(ptep == NULL);
 
-               frames[f] = mfn;
+       pfn = pte_pfn(*ptep);
+       mfn = pfn_to_mfn(pfn);
+       virt = __va(PFN_PHYS(pfn));
 
-               make_lowmem_page_readonly((void *)va);
-               make_lowmem_page_readonly(virt);
-       }
+       make_lowmem_page_readonly((void *)va);
+       make_lowmem_page_readonly(virt);
 
-       if (HYPERVISOR_set_gdt(frames, size / sizeof(struct desc_struct)))
+       if (HYPERVISOR_set_gdt(&mfn, size / sizeof(struct desc_struct)))
                BUG();
 }
 
@@ -470,34 +458,22 @@ static void __init xen_load_gdt_boot(const struct desc_ptr *dtr)
 {
        unsigned long va = dtr->address;
        unsigned int size = dtr->size + 1;
-       unsigned pages = DIV_ROUND_UP(size, PAGE_SIZE);
-       unsigned long frames[pages];
-       int f;
-
-       /*
-        * A GDT can be up to 64k in size, which corresponds to 8192
-        * 8-byte entries, or 16 4k pages..
-        */
+       unsigned long pfn, mfn;
+       pte_t pte;
 
-       BUG_ON(size > 65536);
+       /* @size should be at most GDT_SIZE which is smaller than PAGE_SIZE. */
+       BUG_ON(size > PAGE_SIZE);
        BUG_ON(va & ~PAGE_MASK);
 
-       for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) {
-               pte_t pte;
-               unsigned long pfn, mfn;
+       pfn = virt_to_pfn(va);
+       mfn = pfn_to_mfn(pfn);
 
-               pfn = virt_to_pfn(va);
-               mfn = pfn_to_mfn(pfn);
+       pte = pfn_pte(pfn, PAGE_KERNEL_RO);
 
-               pte = pfn_pte(pfn, PAGE_KERNEL_RO);
-
-               if (HYPERVISOR_update_va_mapping((unsigned long)va, pte, 0))
-                       BUG();
-
-               frames[f] = mfn;
-       }
+       if (HYPERVISOR_update_va_mapping((unsigned long)va, pte, 0))
+               BUG();
 
-       if (HYPERVISOR_set_gdt(frames, size / sizeof(struct desc_struct)))
+       if (HYPERVISOR_set_gdt(&mfn, size / sizeof(struct desc_struct)))
                BUG();
 }
 
index d33e7dbe3129fc327fe7f24fb0fb84c802b11edf..2d76106788a31c5638459bc35306445e59f2c02d 100644 (file)
@@ -42,13 +42,11 @@ xmaddr_t arbitrary_virt_to_machine(void *vaddr)
 }
 EXPORT_SYMBOL_GPL(arbitrary_virt_to_machine);
 
-static void xen_flush_tlb_all(void)
+static noinline void xen_flush_tlb_all(void)
 {
        struct mmuext_op *op;
        struct multicall_space mcs;
 
-       trace_xen_mmu_flush_tlb_all(0);
-
        preempt_disable();
 
        mcs = xen_mc_entry(sizeof(*op));
index 486c0a34d00b2b75a27467aa46910142c108e373..2c30cabfda90fb94b36c88f5f35aef91d8f119d0 100644 (file)
@@ -1310,13 +1310,11 @@ unsigned long xen_read_cr2_direct(void)
        return this_cpu_read(xen_vcpu_info.arch.cr2);
 }
 
-static void xen_flush_tlb(void)
+static noinline void xen_flush_tlb(void)
 {
        struct mmuext_op *op;
        struct multicall_space mcs;
 
-       trace_xen_mmu_flush_tlb(0);
-
        preempt_disable();
 
        mcs = xen_mc_entry(sizeof(*op));
index f0ecd98509d849fd1011b619d6917d9a3da6dd4d..771ae9730ac6869430f8f8d196eb804d9f356053 100644 (file)
@@ -4934,8 +4934,16 @@ static void bfq_prepare_request(struct request *rq, struct bio *bio)
        bool new_queue = false;
        bool bfqq_already_existing = false, split = false;
 
-       if (!rq->elv.icq)
+       /*
+        * Even if we don't have an icq attached, we should still clear
+        * the scheduler pointers, as they might point to previously
+        * allocated bic/bfqq structs.
+        */
+       if (!rq->elv.icq) {
+               rq->elv.priv[0] = rq->elv.priv[1] = NULL;
                return;
+       }
+
        bic = icq_to_bic(rq->elv.icq);
 
        spin_lock_irq(&bfqd->lock);
index 1c16694ae14523c144f7be2af43c3c697cc13f7a..eb85cb87c40f46f54b96cd06ae078600e8e8cac4 100644 (file)
@@ -1177,26 +1177,20 @@ int blkcg_init_queue(struct request_queue *q)
 
        preloaded = !radix_tree_preload(GFP_KERNEL);
 
-       /*
-        * Make sure the root blkg exists and count the existing blkgs.  As
-        * @q is bypassing at this point, blkg_lookup_create() can't be
-        * used.  Open code insertion.
-        */
+       /* Make sure the root blkg exists. */
        rcu_read_lock();
        spin_lock_irq(q->queue_lock);
        blkg = blkg_create(&blkcg_root, q, new_blkg);
+       if (IS_ERR(blkg))
+               goto err_unlock;
+       q->root_blkg = blkg;
+       q->root_rl.blkg = blkg;
        spin_unlock_irq(q->queue_lock);
        rcu_read_unlock();
 
        if (preloaded)
                radix_tree_preload_end();
 
-       if (IS_ERR(blkg))
-               return PTR_ERR(blkg);
-
-       q->root_blkg = blkg;
-       q->root_rl.blkg = blkg;
-
        ret = blk_throtl_init(q);
        if (ret) {
                spin_lock_irq(q->queue_lock);
@@ -1204,6 +1198,13 @@ int blkcg_init_queue(struct request_queue *q)
                spin_unlock_irq(q->queue_lock);
        }
        return ret;
+
+err_unlock:
+       spin_unlock_irq(q->queue_lock);
+       rcu_read_unlock();
+       if (preloaded)
+               radix_tree_preload_end();
+       return PTR_ERR(blkg);
 }
 
 /**
@@ -1410,9 +1411,6 @@ void blkcg_deactivate_policy(struct request_queue *q,
        __clear_bit(pol->plid, q->blkcg_pols);
 
        list_for_each_entry(blkg, &q->blkg_list, q_node) {
-               /* grab blkcg lock too while removing @pd from @blkg */
-               spin_lock(&blkg->blkcg->lock);
-
                if (blkg->pd[pol->plid]) {
                        if (!blkg->pd[pol->plid]->offline &&
                            pol->pd_offline_fn) {
@@ -1422,8 +1420,6 @@ void blkcg_deactivate_policy(struct request_queue *q,
                        pol->pd_free_fn(blkg->pd[pol->plid]);
                        blkg->pd[pol->plid] = NULL;
                }
-
-               spin_unlock(&blkg->blkcg->lock);
        }
 
        spin_unlock_irq(q->queue_lock);
index 806ce2442819c751f8a36ed83efc659c416d510f..85909b431eb0d52d5f29633fcb2ccfff0b14d9e0 100644 (file)
@@ -201,6 +201,10 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
        rq->part = NULL;
        seqcount_init(&rq->gstate_seq);
        u64_stats_init(&rq->aborted_gstate_sync);
+       /*
+        * See comment of blk_mq_init_request
+        */
+       WRITE_ONCE(rq->gstate, MQ_RQ_GEN_INC);
 }
 EXPORT_SYMBOL(blk_rq_init);
 
@@ -915,7 +919,6 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags)
 
        while (true) {
                bool success = false;
-               int ret;
 
                rcu_read_lock();
                if (percpu_ref_tryget_live(&q->q_usage_counter)) {
@@ -947,14 +950,12 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags)
                 */
                smp_rmb();
 
-               ret = wait_event_interruptible(q->mq_freeze_wq,
-                               (atomic_read(&q->mq_freeze_depth) == 0 &&
-                                (preempt || !blk_queue_preempt_only(q))) ||
-                               blk_queue_dying(q));
+               wait_event(q->mq_freeze_wq,
+                          (atomic_read(&q->mq_freeze_depth) == 0 &&
+                           (preempt || !blk_queue_preempt_only(q))) ||
+                          blk_queue_dying(q));
                if (blk_queue_dying(q))
                        return -ENODEV;
-               if (ret)
-                       return ret;
        }
 }
 
index 0dc9e341c2a72c65f6de9dbb5b019903ce95d723..9ce9cac16c3f5a7376f00b98e641e49d3e838592 100644 (file)
@@ -95,18 +95,15 @@ static void blk_mq_check_inflight(struct blk_mq_hw_ctx *hctx,
 {
        struct mq_inflight *mi = priv;
 
-       if (blk_mq_rq_state(rq) == MQ_RQ_IN_FLIGHT) {
-               /*
-                * index[0] counts the specific partition that was asked
-                * for. index[1] counts the ones that are active on the
-                * whole device, so increment that if mi->part is indeed
-                * a partition, and not a whole device.
-                */
-               if (rq->part == mi->part)
-                       mi->inflight[0]++;
-               if (mi->part->partno)
-                       mi->inflight[1]++;
-       }
+       /*
+        * index[0] counts the specific partition that was asked for. index[1]
+        * counts the ones that are active on the whole device, so increment
+        * that if mi->part is indeed a partition, and not a whole device.
+        */
+       if (rq->part == mi->part)
+               mi->inflight[0]++;
+       if (mi->part->partno)
+               mi->inflight[1]++;
 }
 
 void blk_mq_in_flight(struct request_queue *q, struct hd_struct *part,
@@ -118,6 +115,25 @@ void blk_mq_in_flight(struct request_queue *q, struct hd_struct *part,
        blk_mq_queue_tag_busy_iter(q, blk_mq_check_inflight, &mi);
 }
 
+static void blk_mq_check_inflight_rw(struct blk_mq_hw_ctx *hctx,
+                                    struct request *rq, void *priv,
+                                    bool reserved)
+{
+       struct mq_inflight *mi = priv;
+
+       if (rq->part == mi->part)
+               mi->inflight[rq_data_dir(rq)]++;
+}
+
+void blk_mq_in_flight_rw(struct request_queue *q, struct hd_struct *part,
+                        unsigned int inflight[2])
+{
+       struct mq_inflight mi = { .part = part, .inflight = inflight, };
+
+       inflight[0] = inflight[1] = 0;
+       blk_mq_queue_tag_busy_iter(q, blk_mq_check_inflight_rw, &mi);
+}
+
 void blk_freeze_queue_start(struct request_queue *q)
 {
        int freeze_depth;
@@ -2042,6 +2058,13 @@ static int blk_mq_init_request(struct blk_mq_tag_set *set, struct request *rq,
 
        seqcount_init(&rq->gstate_seq);
        u64_stats_init(&rq->aborted_gstate_sync);
+       /*
+        * start gstate with gen 1 instead of 0, otherwise it will be equal
+        * to aborted_gstate, and be identified timed out by
+        * blk_mq_terminate_expired.
+        */
+       WRITE_ONCE(rq->gstate, MQ_RQ_GEN_INC);
+
        return 0;
 }
 
@@ -2329,7 +2352,7 @@ static void blk_mq_free_map_and_requests(struct blk_mq_tag_set *set,
 
 static void blk_mq_map_swqueue(struct request_queue *q)
 {
-       unsigned int i;
+       unsigned int i, hctx_idx;
        struct blk_mq_hw_ctx *hctx;
        struct blk_mq_ctx *ctx;
        struct blk_mq_tag_set *set = q->tag_set;
@@ -2346,8 +2369,23 @@ static void blk_mq_map_swqueue(struct request_queue *q)
 
        /*
         * Map software to hardware queues.
+        *
+        * If the cpu isn't present, the cpu is mapped to first hctx.
         */
        for_each_possible_cpu(i) {
+               hctx_idx = q->mq_map[i];
+               /* unmapped hw queue can be remapped after CPU topo changed */
+               if (!set->tags[hctx_idx] &&
+                   !__blk_mq_alloc_rq_map(set, hctx_idx)) {
+                       /*
+                        * If tags initialization fail for some hctx,
+                        * that hctx won't be brought online.  In this
+                        * case, remap the current ctx to hctx[0] which
+                        * is guaranteed to always have tags allocated
+                        */
+                       q->mq_map[i] = 0;
+               }
+
                ctx = per_cpu_ptr(q->queue_ctx, i);
                hctx = blk_mq_map_queue(q, i);
 
@@ -2359,8 +2397,21 @@ static void blk_mq_map_swqueue(struct request_queue *q)
        mutex_unlock(&q->sysfs_lock);
 
        queue_for_each_hw_ctx(q, hctx, i) {
-               /* every hctx should get mapped by at least one CPU */
-               WARN_ON(!hctx->nr_ctx);
+               /*
+                * If no software queues are mapped to this hardware queue,
+                * disable it and free the request entries.
+                */
+               if (!hctx->nr_ctx) {
+                       /* Never unmap queue 0.  We need it as a
+                        * fallback in case of a new remap fails
+                        * allocation
+                        */
+                       if (i && set->tags[i])
+                               blk_mq_free_map_and_requests(set, i);
+
+                       hctx->tags = NULL;
+                       continue;
+               }
 
                hctx->tags = set->tags[i];
                WARN_ON(!hctx->tags);
index 88c558f718190f88e123bba0444b0c50b296e572..e1bb420dc5d6cdcea146d381e7d9f823b3d0b542 100644 (file)
@@ -7,6 +7,9 @@
 
 struct blk_mq_tag_set;
 
+/**
+ * struct blk_mq_ctx - State for a software queue facing the submitting CPUs
+ */
 struct blk_mq_ctx {
        struct {
                spinlock_t              lock;
@@ -185,7 +188,9 @@ static inline bool blk_mq_hw_queue_mapped(struct blk_mq_hw_ctx *hctx)
 }
 
 void blk_mq_in_flight(struct request_queue *q, struct hd_struct *part,
-                       unsigned int inflight[2]);
+                     unsigned int inflight[2]);
+void blk_mq_in_flight_rw(struct request_queue *q, struct hd_struct *part,
+                        unsigned int inflight[2]);
 
 static inline void blk_mq_put_dispatch_budget(struct blk_mq_hw_ctx *hctx)
 {
index dc7e089373b9444a0b83e176b196e2539e6ff59b..c4513fe1adda0761ac16db45d4f8bdfc30824504 100644 (file)
@@ -82,6 +82,18 @@ void part_in_flight(struct request_queue *q, struct hd_struct *part,
        }
 }
 
+void part_in_flight_rw(struct request_queue *q, struct hd_struct *part,
+                      unsigned int inflight[2])
+{
+       if (q->mq_ops) {
+               blk_mq_in_flight_rw(q, part, inflight);
+               return;
+       }
+
+       inflight[0] = atomic_read(&part->in_flight[0]);
+       inflight[1] = atomic_read(&part->in_flight[1]);
+}
+
 struct hd_struct *__disk_get_part(struct gendisk *disk, int partno)
 {
        struct disk_part_tbl *ptbl = rcu_dereference(disk->part_tbl);
index 08dabcd8b6aefc6844bbb9d9e9c001e6ff71fb33..db57cced9b987371e6c8a3c72ff6721b9d540bda 100644 (file)
@@ -145,13 +145,15 @@ ssize_t part_stat_show(struct device *dev,
                jiffies_to_msecs(part_stat_read(p, time_in_queue)));
 }
 
-ssize_t part_inflight_show(struct device *dev,
-                       struct device_attribute *attr, char *buf)
+ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr,
+                          char *buf)
 {
        struct hd_struct *p = dev_to_part(dev);
+       struct request_queue *q = part_to_disk(p)->queue;
+       unsigned int inflight[2];
 
-       return sprintf(buf, "%8u %8u\n", atomic_read(&p->in_flight[0]),
-               atomic_read(&p->in_flight[1]));
+       part_in_flight_rw(q, p, inflight);
+       return sprintf(buf, "%8u %8u\n", inflight[0], inflight[1]);
 }
 
 #ifdef CONFIG_FAIL_MAKE_REQUEST
index 1d5290c67108316f6495f33674094b5a476e2e14..0ee632bba06461021d7e4eb43127e7781a5114ee 100644 (file)
@@ -204,9 +204,14 @@ static struct crypto_alg *crypto_alg_lookup(const char *name, u32 type,
 
        down_read(&crypto_alg_sem);
        alg = __crypto_alg_lookup(name, type | test, mask | test);
-       if (!alg && test)
-               alg = __crypto_alg_lookup(name, type, mask) ?
-                     ERR_PTR(-ELIBBAD) : NULL;
+       if (!alg && test) {
+               alg = __crypto_alg_lookup(name, type, mask);
+               if (alg && !crypto_is_larval(alg)) {
+                       /* Test failed */
+                       crypto_mod_put(alg);
+                       alg = ERR_PTR(-ELIBBAD);
+               }
+       }
        up_read(&crypto_alg_sem);
 
        return alg;
index 4faa2781c964e290f8b28e752993b9a408d927b4..466a112a4446820ff655c3b72076c86d3c7e4f2d 100644 (file)
@@ -1134,8 +1134,10 @@ static inline void drbg_dealloc_state(struct drbg_state *drbg)
        if (!drbg)
                return;
        kzfree(drbg->Vbuf);
+       drbg->Vbuf = NULL;
        drbg->V = NULL;
        kzfree(drbg->Cbuf);
+       drbg->Cbuf = NULL;
        drbg->C = NULL;
        kzfree(drbg->scratchpadbuf);
        drbg->scratchpadbuf = NULL;
index 76fb96966f7b111a8b084cbb9ef7c6084e750695..2f2e737be0f84a966020102ba21ce906c21b9584 100644 (file)
@@ -2123,6 +2123,25 @@ static int __init intel_opregion_present(void)
        return opregion;
 }
 
+static bool dmi_is_desktop(void)
+{
+       const char *chassis_type;
+
+       chassis_type = dmi_get_system_info(DMI_CHASSIS_TYPE);
+       if (!chassis_type)
+               return false;
+
+       if (!strcmp(chassis_type, "3") || /*  3: Desktop */
+           !strcmp(chassis_type, "4") || /*  4: Low Profile Desktop */
+           !strcmp(chassis_type, "5") || /*  5: Pizza Box */
+           !strcmp(chassis_type, "6") || /*  6: Mini Tower */
+           !strcmp(chassis_type, "7") || /*  7: Tower */
+           !strcmp(chassis_type, "11"))  /* 11: Main Server Chassis */
+               return true;
+
+       return false;
+}
+
 int acpi_video_register(void)
 {
        int ret = 0;
@@ -2143,8 +2162,12 @@ int acpi_video_register(void)
         * win8 ready (where we also prefer the native backlight driver, so
         * normally the acpi_video code should not register there anyways).
         */
-       if (only_lcd == -1)
-               only_lcd = acpi_osi_is_win8();
+       if (only_lcd == -1) {
+               if (dmi_is_desktop() && acpi_osi_is_win8())
+                       only_lcd = true;
+               else
+                       only_lcd = false;
+       }
 
        dmi_check_system(video_dmi_table);
 
index ebb626ffb5fa2d38c853ddcbbe7227aa09c7f7dd..4bde16fb97d8818f59e893adf9bb642fed5f9d5c 100644 (file)
 #define pr_fmt(fmt) "ACPI: watchdog: " fmt
 
 #include <linux/acpi.h>
+#include <linux/dmi.h>
 #include <linux/ioport.h>
 #include <linux/platform_device.h>
 
 #include "internal.h"
 
+static const struct dmi_system_id acpi_watchdog_skip[] = {
+       {
+               /*
+                * On Lenovo Z50-70 there are two issues with the WDAT
+                * table. First some of the instructions use RTC SRAM
+                * to store persistent information. This does not work well
+                * with Linux RTC driver. Second, more important thing is
+                * that the instructions do not actually reset the system.
+                *
+                * On this particular system iTCO_wdt seems to work just
+                * fine so we prefer that over WDAT for now.
+                *
+                * See also https://bugzilla.kernel.org/show_bug.cgi?id=199033.
+                */
+               .ident = "Lenovo Z50-70",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "20354"),
+                       DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo Z50-70"),
+               },
+       },
+       {}
+};
+
+static const struct acpi_table_wdat *acpi_watchdog_get_wdat(void)
+{
+       const struct acpi_table_wdat *wdat = NULL;
+       acpi_status status;
+
+       if (acpi_disabled)
+               return NULL;
+
+       if (dmi_check_system(acpi_watchdog_skip))
+               return NULL;
+
+       status = acpi_get_table(ACPI_SIG_WDAT, 0,
+                               (struct acpi_table_header **)&wdat);
+       if (ACPI_FAILURE(status)) {
+               /* It is fine if there is no WDAT */
+               return NULL;
+       }
+
+       return wdat;
+}
+
 /**
  * Returns true if this system should prefer ACPI based watchdog instead of
  * the native one (which are typically the same hardware).
  */
 bool acpi_has_watchdog(void)
 {
-       struct acpi_table_header hdr;
-
-       if (acpi_disabled)
-               return false;
-
-       return ACPI_SUCCESS(acpi_get_table_header(ACPI_SIG_WDAT, 0, &hdr));
+       return !!acpi_watchdog_get_wdat();
 }
 EXPORT_SYMBOL_GPL(acpi_has_watchdog);
 
@@ -41,12 +82,10 @@ void __init acpi_watchdog_init(void)
        struct platform_device *pdev;
        struct resource *resources;
        size_t nresources = 0;
-       acpi_status status;
        int i;
 
-       status = acpi_get_table(ACPI_SIG_WDAT, 0,
-                               (struct acpi_table_header **)&wdat);
-       if (ACPI_FAILURE(status)) {
+       wdat = acpi_watchdog_get_wdat();
+       if (!wdat) {
                /* It is fine if there is no WDAT */
                return;
        }
index 514aaf948ea900a584b0bb9293ac45b9f5759d04..3825df9234803a84cd5bc9ddd6892452eb87160f 100644 (file)
@@ -56,6 +56,10 @@ acpi_status acpi_ns_initialize_objects(void);
 
 acpi_status acpi_ns_initialize_devices(u32 flags);
 
+acpi_status
+acpi_ns_init_one_package(acpi_handle obj_handle,
+                        u32 level, void *context, void **return_value);
+
 /*
  * nsload -  Namespace loading
  */
index 99d92cb3280372bae6bb92d2077a90071a8cd9cb..f85c6f3271f64fb060fbf5029a4b41ee151389d5 100644 (file)
@@ -174,6 +174,13 @@ acpi_ex_load_table_op(struct acpi_walk_state *walk_state,
                return_ACPI_STATUS(status);
        }
 
+       /* Complete the initialization/resolution of package objects */
+
+       status = acpi_ns_walk_namespace(ACPI_TYPE_PACKAGE, ACPI_ROOT_OBJECT,
+                                       ACPI_UINT32_MAX, 0,
+                                       acpi_ns_init_one_package, NULL, NULL,
+                                       NULL);
+
        /* Parameter Data (optional) */
 
        if (parameter_node) {
@@ -430,6 +437,13 @@ acpi_ex_load_op(union acpi_operand_object *obj_desc,
                return_ACPI_STATUS(status);
        }
 
+       /* Complete the initialization/resolution of package objects */
+
+       status = acpi_ns_walk_namespace(ACPI_TYPE_PACKAGE, ACPI_ROOT_OBJECT,
+                                       ACPI_UINT32_MAX, 0,
+                                       acpi_ns_init_one_package, NULL, NULL,
+                                       NULL);
+
        /* Store the ddb_handle into the Target operand */
 
        status = acpi_ex_store(ddb_handle, target, walk_state);
index 77f2b5f4948ad3aa9d2567b010291549f1a81789..d77257d1c827b39f2efc0576ac8a08b433881d83 100644 (file)
@@ -240,6 +240,58 @@ acpi_status acpi_ns_initialize_devices(u32 flags)
        return_ACPI_STATUS(status);
 }
 
+/*******************************************************************************
+ *
+ * FUNCTION:    acpi_ns_init_one_package
+ *
+ * PARAMETERS:  obj_handle      - Node
+ *              level           - Current nesting level
+ *              context         - Not used
+ *              return_value    - Not used
+ *
+ * RETURN:      Status
+ *
+ * DESCRIPTION: Callback from acpi_walk_namespace. Invoked for every package
+ *              within the namespace. Used during dynamic load of an SSDT.
+ *
+ ******************************************************************************/
+
+acpi_status
+acpi_ns_init_one_package(acpi_handle obj_handle,
+                        u32 level, void *context, void **return_value)
+{
+       acpi_status status;
+       union acpi_operand_object *obj_desc;
+       struct acpi_namespace_node *node =
+           (struct acpi_namespace_node *)obj_handle;
+
+       obj_desc = acpi_ns_get_attached_object(node);
+       if (!obj_desc) {
+               return (AE_OK);
+       }
+
+       /* Exit if package is already initialized */
+
+       if (obj_desc->package.flags & AOPOBJ_DATA_VALID) {
+               return (AE_OK);
+       }
+
+       status = acpi_ds_get_package_arguments(obj_desc);
+       if (ACPI_FAILURE(status)) {
+               return (AE_OK);
+       }
+
+       status =
+           acpi_ut_walk_package_tree(obj_desc, NULL,
+                                     acpi_ds_init_package_element, NULL);
+       if (ACPI_FAILURE(status)) {
+               return (AE_OK);
+       }
+
+       obj_desc->package.flags |= AOPOBJ_DATA_VALID;
+       return (AE_OK);
+}
+
 /*******************************************************************************
  *
  * FUNCTION:    acpi_ns_init_one_object
@@ -360,27 +412,11 @@ acpi_ns_init_one_object(acpi_handle obj_handle,
 
        case ACPI_TYPE_PACKAGE:
 
-               info->package_init++;
-               status = acpi_ds_get_package_arguments(obj_desc);
-               if (ACPI_FAILURE(status)) {
-                       break;
-               }
-
-               ACPI_DEBUG_PRINT_RAW((ACPI_DB_PARSE,
-                                     "%s: Completing resolution of Package elements\n",
-                                     ACPI_GET_FUNCTION_NAME));
+               /* Complete the initialization/resolution of the package object */
 
-               /*
-                * Resolve all named references in package objects (and all
-                * sub-packages). This action has been deferred until the entire
-                * namespace has been loaded, in order to support external and
-                * forward references from individual package elements (05/2017).
-                */
-               status = acpi_ut_walk_package_tree(obj_desc, NULL,
-                                                  acpi_ds_init_package_element,
-                                                  NULL);
-
-               obj_desc->package.flags |= AOPOBJ_DATA_VALID;
+               info->package_init++;
+               status =
+                   acpi_ns_init_one_package(obj_handle, level, NULL, NULL);
                break;
 
        default:
index e1eee7a60fadda50f666ab93d2969cb66576e0be..f1cc4f9d31cd92a39ec603f1ac7db5031f156ef4 100644 (file)
@@ -635,4 +635,26 @@ module_param_call(lid_init_state,
                  NULL, 0644);
 MODULE_PARM_DESC(lid_init_state, "Behavior for reporting LID initial state");
 
-module_acpi_driver(acpi_button_driver);
+static int acpi_button_register_driver(struct acpi_driver *driver)
+{
+       /*
+        * Modules such as nouveau.ko and i915.ko have a link time dependency
+        * on acpi_lid_open(), and would therefore not be loadable on ACPI
+        * capable kernels booted in non-ACPI mode if the return value of
+        * acpi_bus_register_driver() is returned from here with ACPI disabled
+        * when this driver is built as a module.
+        */
+       if (acpi_disabled)
+               return 0;
+
+       return acpi_bus_register_driver(driver);
+}
+
+static void acpi_button_unregister_driver(struct acpi_driver *driver)
+{
+       if (!acpi_disabled)
+               acpi_bus_unregister_driver(driver);
+}
+
+module_driver(acpi_button_driver, acpi_button_register_driver,
+              acpi_button_unregister_driver);
index cc234e6a6297586a5c64b8252fb827474ae1e348..970dd87d347c786fbc25922464e6bc20c1dbb4ee 100644 (file)
@@ -2166,10 +2166,10 @@ int __init acpi_scan_init(void)
        acpi_cmos_rtc_init();
        acpi_container_init();
        acpi_memory_hotplug_init();
+       acpi_watchdog_init();
        acpi_pnp_init();
        acpi_int340x_thermal_init();
        acpi_amba_init();
-       acpi_watchdog_init();
        acpi_init_lpit();
 
        acpi_scan_add_handler(&generic_device_handler);
index 99a1a650326d016e171fee4e56c6dc43f192d8a6..974e58457697fce086248a4af23aed15fc608c4e 100644 (file)
@@ -364,6 +364,19 @@ static const struct dmi_system_id acpisleep_dmi_table[] __initconst = {
                DMI_MATCH(DMI_PRODUCT_NAME, "XPS 13 9360"),
                },
        },
+       /*
+        * ThinkPad X1 Tablet(2016) cannot do suspend-to-idle using
+        * the Low Power S0 Idle firmware interface (see
+        * https://bugzilla.kernel.org/show_bug.cgi?id=199057).
+        */
+       {
+       .callback = init_no_lps0,
+       .ident = "ThinkPad X1 Tablet(2016)",
+       .matches = {
+               DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+               DMI_MATCH(DMI_PRODUCT_NAME, "20GGA00L00"),
+               },
+       },
        {},
 };
 
index 594c228d2f02112345ebc2ad345cd02a7e563a52..4a3ac31c07d0ee49615a00af001e49ff8e30f005 100644 (file)
@@ -69,11 +69,12 @@ static ssize_t driver_override_show(struct device *_dev,
                                    struct device_attribute *attr, char *buf)
 {
        struct amba_device *dev = to_amba_device(_dev);
+       ssize_t len;
 
-       if (!dev->driver_override)
-               return 0;
-
-       return sprintf(buf, "%s\n", dev->driver_override);
+       device_lock(_dev);
+       len = sprintf(buf, "%s\n", dev->driver_override);
+       device_unlock(_dev);
+       return len;
 }
 
 static ssize_t driver_override_store(struct device *_dev,
@@ -81,9 +82,10 @@ static ssize_t driver_override_store(struct device *_dev,
                                     const char *buf, size_t count)
 {
        struct amba_device *dev = to_amba_device(_dev);
-       char *driver_override, *old = dev->driver_override, *cp;
+       char *driver_override, *old, *cp;
 
-       if (count > PATH_MAX)
+       /* We need to keep extra room for a newline */
+       if (count >= (PAGE_SIZE - 1))
                return -EINVAL;
 
        driver_override = kstrndup(buf, count, GFP_KERNEL);
@@ -94,12 +96,15 @@ static ssize_t driver_override_store(struct device *_dev,
        if (cp)
                *cp = '\0';
 
+       device_lock(_dev);
+       old = dev->driver_override;
        if (strlen(driver_override)) {
                dev->driver_override = driver_override;
        } else {
               kfree(driver_override);
               dev->driver_override = NULL;
        }
+       device_unlock(_dev);
 
        kfree(old);
 
index 764b63a5aadefe5c73ef849ee5b377b134e3841e..e578eee315895d8c2783b933f5c252f5ec53824b 100644 (file)
@@ -2839,6 +2839,14 @@ static void binder_transaction(struct binder_proc *proc,
                        else
                                return_error = BR_DEAD_REPLY;
                        mutex_unlock(&context->context_mgr_node_lock);
+                       if (target_node && target_proc == proc) {
+                               binder_user_error("%d:%d got transaction to context manager from process owning it\n",
+                                                 proc->pid, thread->pid);
+                               return_error = BR_FAILED_REPLY;
+                               return_error_param = -EINVAL;
+                               return_error_line = __LINE__;
+                               goto err_invalid_target_handle;
+                       }
                }
                if (!target_node) {
                        /*
index 1ff17799769d0b2372d2b9d385af57dab15acf10..6389c88b3500a0bb220b92efa85531cf564a974c 100644 (file)
@@ -698,7 +698,7 @@ static int ahci_vt8251_hardreset(struct ata_link *link, unsigned int *class,
 
        DPRINTK("ENTER\n");
 
-       ahci_stop_engine(ap);
+       hpriv->stop_engine(ap);
 
        rc = sata_link_hardreset(link, sata_ehc_deb_timing(&link->eh_context),
                                 deadline, &online, NULL);
@@ -724,7 +724,7 @@ static int ahci_p5wdh_hardreset(struct ata_link *link, unsigned int *class,
        bool online;
        int rc;
 
-       ahci_stop_engine(ap);
+       hpriv->stop_engine(ap);
 
        /* clear D2H reception area to properly wait for D2H FIS */
        ata_tf_init(link->device, &tf);
@@ -788,7 +788,7 @@ static int ahci_avn_hardreset(struct ata_link *link, unsigned int *class,
 
        DPRINTK("ENTER\n");
 
-       ahci_stop_engine(ap);
+       hpriv->stop_engine(ap);
 
        for (i = 0; i < 2; i++) {
                u16 val;
index 4356ef1d28a88f66be04f5e431e9c55d768bb866..824bd399f02ea4b350a85d004d2c3fefdf39d0ff 100644 (file)
@@ -350,7 +350,6 @@ struct ahci_host_priv {
        u32                     em_msg_type;    /* EM message type */
        bool                    got_runtime_pm; /* Did we do pm_runtime_get? */
        struct clk              *clks[AHCI_MAX_CLKS]; /* Optional */
-       struct reset_control    *rsts;          /* Optional */
        struct regulator        **target_pwrs;  /* Optional */
        /*
         * If platform uses PHYs. There is a 1:1 relation between the port number and
@@ -366,6 +365,13 @@ struct ahci_host_priv {
         * be overridden anytime before the host is activated.
         */
        void                    (*start_engine)(struct ata_port *ap);
+       /*
+        * Optional ahci_stop_engine override, if not set this gets set to the
+        * default ahci_stop_engine during ahci_save_initial_config, this can
+        * be overridden anytime before the host is activated.
+        */
+       int                     (*stop_engine)(struct ata_port *ap);
+
        irqreturn_t             (*irq_handler)(int irq, void *dev_instance);
 
        /* only required for per-port MSI(-X) support */
index de7128d81e9ccbc168627a02bb2a39d3e4f11c5c..0045dacd814b44ec21f87e4acceb07e69056f214 100644 (file)
@@ -62,6 +62,60 @@ static void ahci_mvebu_regret_option(struct ahci_host_priv *hpriv)
        writel(0x80, hpriv->mmio + AHCI_VENDOR_SPECIFIC_0_DATA);
 }
 
+/**
+ * ahci_mvebu_stop_engine
+ *
+ * @ap:        Target ata port
+ *
+ * Errata Ref#226 - SATA Disk HOT swap issue when connected through
+ * Port Multiplier in FIS-based Switching mode.
+ *
+ * To avoid the issue, according to design, the bits[11:8, 0] of
+ * register PxFBS are cleared when Port Command and Status (0x18) bit[0]
+ * changes its value from 1 to 0, i.e. falling edge of Port
+ * Command and Status bit[0] sends PULSE that resets PxFBS
+ * bits[11:8; 0].
+ *
+ * This function is used to override function of "ahci_stop_engine"
+ * from libahci.c by adding the mvebu work around(WA) to save PxFBS
+ * value before the PxCMD ST write of 0, then restore PxFBS value.
+ *
+ * Return: 0 on success; Error code otherwise.
+ */
+int ahci_mvebu_stop_engine(struct ata_port *ap)
+{
+       void __iomem *port_mmio = ahci_port_base(ap);
+       u32 tmp, port_fbs;
+
+       tmp = readl(port_mmio + PORT_CMD);
+
+       /* check if the HBA is idle */
+       if ((tmp & (PORT_CMD_START | PORT_CMD_LIST_ON)) == 0)
+               return 0;
+
+       /* save the port PxFBS register for later restore */
+       port_fbs = readl(port_mmio + PORT_FBS);
+
+       /* setting HBA to idle */
+       tmp &= ~PORT_CMD_START;
+       writel(tmp, port_mmio + PORT_CMD);
+
+       /*
+        * bit #15 PxCMD signal doesn't clear PxFBS,
+        * restore the PxFBS register right after clearing the PxCMD ST,
+        * no need to wait for the PxCMD bit #15.
+        */
+       writel(port_fbs, port_mmio + PORT_FBS);
+
+       /* wait for engine to stop. This could be as long as 500 msec */
+       tmp = ata_wait_register(ap, port_mmio + PORT_CMD,
+                               PORT_CMD_LIST_ON, PORT_CMD_LIST_ON, 1, 500);
+       if (tmp & PORT_CMD_LIST_ON)
+               return -EIO;
+
+       return 0;
+}
+
 #ifdef CONFIG_PM_SLEEP
 static int ahci_mvebu_suspend(struct platform_device *pdev, pm_message_t state)
 {
@@ -112,6 +166,8 @@ static int ahci_mvebu_probe(struct platform_device *pdev)
        if (rc)
                return rc;
 
+       hpriv->stop_engine = ahci_mvebu_stop_engine;
+
        if (of_device_is_compatible(pdev->dev.of_node,
                                    "marvell,armada-380-ahci")) {
                dram = mv_mbus_dram_info();
index 2685f28160f70764ee4013930239566031c9b058..cfdef4d44ae92cdf5a335c99a7d049df8a8cb5ae 100644 (file)
@@ -96,7 +96,7 @@ static int ahci_qoriq_hardreset(struct ata_link *link, unsigned int *class,
 
        DPRINTK("ENTER\n");
 
-       ahci_stop_engine(ap);
+       hpriv->stop_engine(ap);
 
        /*
         * There is a errata on ls1021a Rev1.0 and Rev2.0 which is:
index c2b5941d9184db2637604864baf4597d398c4911..ad58da7c9affd8e4ec381d8bb0fd7f23d6fa0310 100644 (file)
@@ -165,7 +165,7 @@ static int xgene_ahci_restart_engine(struct ata_port *ap)
                                    PORT_CMD_ISSUE, 0x0, 1, 100))
                  return -EBUSY;
 
-       ahci_stop_engine(ap);
+       hpriv->stop_engine(ap);
        ahci_start_fis_rx(ap);
 
        /*
@@ -421,7 +421,7 @@ static int xgene_ahci_hardreset(struct ata_link *link, unsigned int *class,
        portrxfis_saved = readl(port_mmio + PORT_FIS_ADDR);
        portrxfishi_saved = readl(port_mmio + PORT_FIS_ADDR_HI);
 
-       ahci_stop_engine(ap);
+       hpriv->stop_engine(ap);
 
        rc = xgene_ahci_do_hardreset(link, deadline, &online);
 
index 7adcf3caabd00abbb08ef76c0e4e604f0ba8c97c..e5d90977caec267084cf5eab7b18d8008f9047e2 100644 (file)
@@ -560,6 +560,9 @@ void ahci_save_initial_config(struct device *dev, struct ahci_host_priv *hpriv)
        if (!hpriv->start_engine)
                hpriv->start_engine = ahci_start_engine;
 
+       if (!hpriv->stop_engine)
+               hpriv->stop_engine = ahci_stop_engine;
+
        if (!hpriv->irq_handler)
                hpriv->irq_handler = ahci_single_level_irq_intr;
 }
@@ -897,9 +900,10 @@ static void ahci_start_port(struct ata_port *ap)
 static int ahci_deinit_port(struct ata_port *ap, const char **emsg)
 {
        int rc;
+       struct ahci_host_priv *hpriv = ap->host->private_data;
 
        /* disable DMA */
-       rc = ahci_stop_engine(ap);
+       rc = hpriv->stop_engine(ap);
        if (rc) {
                *emsg = "failed to stop engine";
                return rc;
@@ -1310,7 +1314,7 @@ int ahci_kick_engine(struct ata_port *ap)
        int busy, rc;
 
        /* stop engine */
-       rc = ahci_stop_engine(ap);
+       rc = hpriv->stop_engine(ap);
        if (rc)
                goto out_restart;
 
@@ -1549,7 +1553,7 @@ int ahci_do_hardreset(struct ata_link *link, unsigned int *class,
 
        DPRINTK("ENTER\n");
 
-       ahci_stop_engine(ap);
+       hpriv->stop_engine(ap);
 
        /* clear D2H reception area to properly wait for D2H FIS */
        ata_tf_init(link->device, &tf);
@@ -2075,14 +2079,14 @@ void ahci_error_handler(struct ata_port *ap)
 
        if (!(ap->pflags & ATA_PFLAG_FROZEN)) {
                /* restart engine */
-               ahci_stop_engine(ap);
+               hpriv->stop_engine(ap);
                hpriv->start_engine(ap);
        }
 
        sata_pmp_error_handler(ap);
 
        if (!ata_dev_enabled(ap->link.device))
-               ahci_stop_engine(ap);
+               hpriv->stop_engine(ap);
 }
 EXPORT_SYMBOL_GPL(ahci_error_handler);
 
@@ -2129,7 +2133,7 @@ static void ahci_set_aggressive_devslp(struct ata_port *ap, bool sleep)
                return;
 
        /* set DITO, MDAT, DETO and enable DevSlp, need to stop engine first */
-       rc = ahci_stop_engine(ap);
+       rc = hpriv->stop_engine(ap);
        if (rc)
                return;
 
@@ -2189,7 +2193,7 @@ static void ahci_enable_fbs(struct ata_port *ap)
                return;
        }
 
-       rc = ahci_stop_engine(ap);
+       rc = hpriv->stop_engine(ap);
        if (rc)
                return;
 
@@ -2222,7 +2226,7 @@ static void ahci_disable_fbs(struct ata_port *ap)
                return;
        }
 
-       rc = ahci_stop_engine(ap);
+       rc = hpriv->stop_engine(ap);
        if (rc)
                return;
 
index 46a762442dc512f0bd24392952414bf3582badde..30cc8f1a31e1299f3cc68659a5be959492249382 100644 (file)
@@ -25,7 +25,6 @@
 #include <linux/phy/phy.h>
 #include <linux/pm_runtime.h>
 #include <linux/of_platform.h>
-#include <linux/reset.h>
 #include "ahci.h"
 
 static void ahci_host_stop(struct ata_host *host);
@@ -196,8 +195,7 @@ EXPORT_SYMBOL_GPL(ahci_platform_disable_regulators);
  * following order:
  * 1) Regulator
  * 2) Clocks (through ahci_platform_enable_clks)
- * 3) Resets
- * 4) Phys
+ * 3) Phys
  *
  * If resource enabling fails at any point the previous enabled resources
  * are disabled in reverse order.
@@ -217,19 +215,12 @@ int ahci_platform_enable_resources(struct ahci_host_priv *hpriv)
        if (rc)
                goto disable_regulator;
 
-       rc = reset_control_deassert(hpriv->rsts);
-       if (rc)
-               goto disable_clks;
-
        rc = ahci_platform_enable_phys(hpriv);
        if (rc)
-               goto disable_resets;
+               goto disable_clks;
 
        return 0;
 
-disable_resets:
-       reset_control_assert(hpriv->rsts);
-
 disable_clks:
        ahci_platform_disable_clks(hpriv);
 
@@ -248,15 +239,12 @@ EXPORT_SYMBOL_GPL(ahci_platform_enable_resources);
  * following order:
  * 1) Phys
  * 2) Clocks (through ahci_platform_disable_clks)
- * 3) Resets
- * 4) Regulator
+ * 3) Regulator
  */
 void ahci_platform_disable_resources(struct ahci_host_priv *hpriv)
 {
        ahci_platform_disable_phys(hpriv);
 
-       reset_control_assert(hpriv->rsts);
-
        ahci_platform_disable_clks(hpriv);
 
        ahci_platform_disable_regulators(hpriv);
@@ -405,12 +393,6 @@ struct ahci_host_priv *ahci_platform_get_resources(struct platform_device *pdev)
                hpriv->clks[i] = clk;
        }
 
-       hpriv->rsts = devm_reset_control_array_get_optional_shared(dev);
-       if (IS_ERR(hpriv->rsts)) {
-               rc = PTR_ERR(hpriv->rsts);
-               goto err_out;
-       }
-
        hpriv->nports = child_nodes = of_get_child_count(dev->of_node);
 
        /*
index 8bc71ca61e7f836d44f6874660738974dd3c947b..68596bd4cf06c2362692f7ded557b669efc7f983 100644 (file)
@@ -4549,6 +4549,12 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
                                                ATA_HORKAGE_ZERO_AFTER_TRIM |
                                                ATA_HORKAGE_NOLPM, },
 
+       /* This specific Samsung model/firmware-rev does not handle LPM well */
+       { "SAMSUNG MZMPC128HBFU-000MV", "CXM14M1Q", ATA_HORKAGE_NOLPM, },
+
+       /* Sandisk devices which are known to not handle LPM well */
+       { "SanDisk SD7UB3Q*G1001",      NULL,   ATA_HORKAGE_NOLPM, },
+
        /* devices that don't properly handle queued TRIM commands */
        { "Micron_M500_*",              NULL,   ATA_HORKAGE_NO_NCQ_TRIM |
                                                ATA_HORKAGE_ZERO_AFTER_TRIM, },
index c016829a38fd21798e263cdc678517b3dbf728f7..513b260bcff1ef7cc74119b066ef8a7f8dd6c97f 100644 (file)
@@ -175,8 +175,8 @@ static void ata_eh_handle_port_resume(struct ata_port *ap)
 { }
 #endif /* CONFIG_PM */
 
-static void __ata_ehi_pushv_desc(struct ata_eh_info *ehi, const char *fmt,
-                                va_list args)
+static __printf(2, 0) void __ata_ehi_pushv_desc(struct ata_eh_info *ehi,
+                                const char *fmt, va_list args)
 {
        ehi->desc_len += vscnprintf(ehi->desc + ehi->desc_len,
                                     ATA_EH_DESC_LEN - ehi->desc_len,
index aafb8cc03523212dee4d8d3fe66434af2406ae76..e67815b896fcc40772ac45fdf8e8be7253cecafa 100644 (file)
@@ -410,7 +410,7 @@ static int ahci_highbank_hardreset(struct ata_link *link, unsigned int *class,
        int rc;
        int retry = 100;
 
-       ahci_stop_engine(ap);
+       hpriv->stop_engine(ap);
 
        /* clear D2H reception area to properly wait for D2H FIS */
        ata_tf_init(link->device, &tf);
index 4b1995e2d044ba7e4da5f6e36ec0c2d2dedafd50..010ca101d412380f7492ebb2c87eaca0ffce3e3b 100644 (file)
@@ -285,13 +285,13 @@ static const struct sil24_cerr_info {
        [PORT_CERR_INCONSISTENT] = { AC_ERR_HSM, ATA_EH_RESET,
                                     "protocol mismatch" },
        [PORT_CERR_DIRECTION]   = { AC_ERR_HSM, ATA_EH_RESET,
-                                   "data directon mismatch" },
+                                   "data direction mismatch" },
        [PORT_CERR_UNDERRUN]    = { AC_ERR_HSM, ATA_EH_RESET,
                                    "ran out of SGEs while writing" },
        [PORT_CERR_OVERRUN]     = { AC_ERR_HSM, ATA_EH_RESET,
                                    "ran out of SGEs while reading" },
        [PORT_CERR_PKT_PROT]    = { AC_ERR_HSM, ATA_EH_RESET,
-                                   "invalid data directon for ATAPI CDB" },
+                                   "invalid data direction for ATAPI CDB" },
        [PORT_CERR_SGT_BOUNDARY] = { AC_ERR_SYSTEM, ATA_EH_RESET,
                                     "SGT not on qword boundary" },
        [PORT_CERR_SGT_TGTABRT] = { AC_ERR_HOST_BUS, ATA_EH_RESET,
index d97c05690faa99363ac709fb237335a0678af4c0..4e46dc9e41ad01142ffdcfe1c38921df9db0ebda 100644 (file)
@@ -191,7 +191,7 @@ static char *res_strings[] = {
        "reserved 37",
        "reserved 38",
        "reserved 39",
-       "reseverd 40",
+       "reserved 40",
        "reserved 41", 
        "reserved 42", 
        "reserved 43", 
index 1ef67db03c8e618d7d2425a33e5d359e89f85607..9c9a229587176d560419c0c9c4df9562c1279a75 100644 (file)
@@ -28,6 +28,7 @@
 #include <asm/io.h>
 #include <linux/atomic.h>
 #include <linux/uaccess.h>
+#include <linux/nospec.h>
 
 #include "uPD98401.h"
 #include "uPD98402.h"
@@ -1458,6 +1459,8 @@ static int zatm_ioctl(struct atm_dev *dev,unsigned int cmd,void __user *arg)
                                        return -EFAULT;
                                if (pool < 0 || pool > ZATM_LAST_POOL)
                                        return -EINVAL;
+                               pool = array_index_nospec(pool,
+                                                         ZATM_LAST_POOL + 1);
                                spin_lock_irqsave(&zatm_dev->lock, flags);
                                info = zatm_dev->pool_info[pool];
                                if (cmd == ZATM_GETPOOLZ) {
index 1e6396bb807b1255ab61e0a168e1b7eac40f892c..597d40893862696ed76457c7071c8d5fd074f612 100644 (file)
@@ -312,8 +312,9 @@ static int __dma_mmap_from_coherent(struct dma_coherent_mem *mem,
  * This checks whether the memory was allocated from the per-device
  * coherent memory pool and if so, maps that memory to the provided vma.
  *
- * Returns 1 if we correctly mapped the memory, or 0 if the caller should
- * proceed with mapping memory from generic pools.
+ * Returns 1 if @vaddr belongs to the device coherent pool and the caller
+ * should return @ret, or 0 if they should proceed with mapping memory from
+ * generic areas.
  */
 int dma_mmap_from_dev_coherent(struct device *dev, struct vm_area_struct *vma,
                           void *vaddr, size_t size, int *ret)
index 3b118353ea176af270d72a95ca1f79772a3f5602..d82566d6e2378f4f4ba074260223b398fa7860c1 100644 (file)
@@ -226,7 +226,6 @@ int dma_common_mmap(struct device *dev, struct vm_area_struct *vma,
 #ifndef CONFIG_ARCH_NO_COHERENT_DMA_MMAP
        unsigned long user_count = vma_pages(vma);
        unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT;
-       unsigned long pfn = page_to_pfn(virt_to_page(cpu_addr));
        unsigned long off = vma->vm_pgoff;
 
        vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
@@ -234,12 +233,11 @@ int dma_common_mmap(struct device *dev, struct vm_area_struct *vma,
        if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret))
                return ret;
 
-       if (off < count && user_count <= (count - off)) {
+       if (off < count && user_count <= (count - off))
                ret = remap_pfn_range(vma, vma->vm_start,
-                                     pfn + off,
+                                     page_to_pfn(virt_to_page(cpu_addr)) + off,
                                      user_count << PAGE_SHIFT,
                                      vma->vm_page_prot);
-       }
 #endif /* !CONFIG_ARCH_NO_COHERENT_DMA_MMAP */
 
        return ret;
index 31b5015b59fecb456167e57ce697a4228bc77680..358354148decc4b4e49ff6df805426a2d72a63d4 100644 (file)
@@ -537,8 +537,8 @@ fw_create_instance(struct firmware *firmware, const char *fw_name,
 }
 
 /**
- * fw_load_sysfs_fallback - load a firmware via the syfs fallback mechanism
- * @fw_sysfs: firmware syfs information for the firmware to load
+ * fw_load_sysfs_fallback - load a firmware via the sysfs fallback mechanism
+ * @fw_sysfs: firmware sysfs information for the firmware to load
  * @opt_flags: flags of options, FW_OPT_*
  * @timeout: timeout to wait for the load
  *
index dfebc644ed35a1a410db821455ef54dcb8e025b6..f8255670a66352023225377e41d30b22bb35d1ea 100644 (file)
@@ -6,7 +6,7 @@
 #include <linux/device.h>
 
 /**
- * struct firmware_fallback_config - firmware fallback configuratioon settings
+ * struct firmware_fallback_config - firmware fallback configuration settings
  *
  * Helps describe and fine tune the fallback mechanism.
  *
index c9d04497a415c436b8486630d9b3c5a0812d19d2..5d4e31655d9629732c42b02a602ebdc20bec2cda 100644 (file)
@@ -451,25 +451,47 @@ static int lo_req_flush(struct loop_device *lo, struct request *rq)
 static void lo_complete_rq(struct request *rq)
 {
        struct loop_cmd *cmd = blk_mq_rq_to_pdu(rq);
+       blk_status_t ret = BLK_STS_OK;
 
-       if (unlikely(req_op(cmd->rq) == REQ_OP_READ && cmd->use_aio &&
-                    cmd->ret >= 0 && cmd->ret < blk_rq_bytes(cmd->rq))) {
-               struct bio *bio = cmd->rq->bio;
-
-               bio_advance(bio, cmd->ret);
-               zero_fill_bio(bio);
+       if (!cmd->use_aio || cmd->ret < 0 || cmd->ret == blk_rq_bytes(rq) ||
+           req_op(rq) != REQ_OP_READ) {
+               if (cmd->ret < 0)
+                       ret = BLK_STS_IOERR;
+               goto end_io;
        }
 
-       blk_mq_end_request(rq, cmd->ret < 0 ? BLK_STS_IOERR : BLK_STS_OK);
+       /*
+        * Short READ - if we got some data, advance our request and
+        * retry it. If we got no data, end the rest with EIO.
+        */
+       if (cmd->ret) {
+               blk_update_request(rq, BLK_STS_OK, cmd->ret);
+               cmd->ret = 0;
+               blk_mq_requeue_request(rq, true);
+       } else {
+               if (cmd->use_aio) {
+                       struct bio *bio = rq->bio;
+
+                       while (bio) {
+                               zero_fill_bio(bio);
+                               bio = bio->bi_next;
+                       }
+               }
+               ret = BLK_STS_IOERR;
+end_io:
+               blk_mq_end_request(rq, ret);
+       }
 }
 
 static void lo_rw_aio_do_completion(struct loop_cmd *cmd)
 {
+       struct request *rq = blk_mq_rq_from_pdu(cmd);
+
        if (!atomic_dec_and_test(&cmd->ref))
                return;
        kfree(cmd->bvec);
        cmd->bvec = NULL;
-       blk_mq_complete_request(cmd->rq);
+       blk_mq_complete_request(rq);
 }
 
 static void lo_rw_aio_complete(struct kiocb *iocb, long ret, long ret2)
@@ -487,7 +509,7 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
 {
        struct iov_iter iter;
        struct bio_vec *bvec;
-       struct request *rq = cmd->rq;
+       struct request *rq = blk_mq_rq_from_pdu(cmd);
        struct bio *bio = rq->bio;
        struct file *file = lo->lo_backing_file;
        unsigned int offset;
@@ -1702,15 +1724,16 @@ EXPORT_SYMBOL(loop_unregister_transfer);
 static blk_status_t loop_queue_rq(struct blk_mq_hw_ctx *hctx,
                const struct blk_mq_queue_data *bd)
 {
-       struct loop_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
-       struct loop_device *lo = cmd->rq->q->queuedata;
+       struct request *rq = bd->rq;
+       struct loop_cmd *cmd = blk_mq_rq_to_pdu(rq);
+       struct loop_device *lo = rq->q->queuedata;
 
-       blk_mq_start_request(bd->rq);
+       blk_mq_start_request(rq);
 
        if (lo->lo_state != Lo_bound)
                return BLK_STS_IOERR;
 
-       switch (req_op(cmd->rq)) {
+       switch (req_op(rq)) {
        case REQ_OP_FLUSH:
        case REQ_OP_DISCARD:
        case REQ_OP_WRITE_ZEROES:
@@ -1723,8 +1746,8 @@ static blk_status_t loop_queue_rq(struct blk_mq_hw_ctx *hctx,
 
        /* always use the first bio's css */
 #ifdef CONFIG_BLK_CGROUP
-       if (cmd->use_aio && cmd->rq->bio && cmd->rq->bio->bi_css) {
-               cmd->css = cmd->rq->bio->bi_css;
+       if (cmd->use_aio && rq->bio && rq->bio->bi_css) {
+               cmd->css = rq->bio->bi_css;
                css_get(cmd->css);
        } else
 #endif
@@ -1736,8 +1759,9 @@ static blk_status_t loop_queue_rq(struct blk_mq_hw_ctx *hctx,
 
 static void loop_handle_cmd(struct loop_cmd *cmd)
 {
-       const bool write = op_is_write(req_op(cmd->rq));
-       struct loop_device *lo = cmd->rq->q->queuedata;
+       struct request *rq = blk_mq_rq_from_pdu(cmd);
+       const bool write = op_is_write(req_op(rq));
+       struct loop_device *lo = rq->q->queuedata;
        int ret = 0;
 
        if (write && (lo->lo_flags & LO_FLAGS_READ_ONLY)) {
@@ -1745,12 +1769,12 @@ static void loop_handle_cmd(struct loop_cmd *cmd)
                goto failed;
        }
 
-       ret = do_req_filebacked(lo, cmd->rq);
+       ret = do_req_filebacked(lo, rq);
  failed:
        /* complete non-aio request */
        if (!cmd->use_aio || ret) {
                cmd->ret = ret ? -EIO : 0;
-               blk_mq_complete_request(cmd->rq);
+               blk_mq_complete_request(rq);
        }
 }
 
@@ -1767,9 +1791,7 @@ static int loop_init_request(struct blk_mq_tag_set *set, struct request *rq,
 {
        struct loop_cmd *cmd = blk_mq_rq_to_pdu(rq);
 
-       cmd->rq = rq;
        kthread_init_work(&cmd->work, loop_queue_work);
-
        return 0;
 }
 
index 0f45416e4fcfe4cbe0ea9d1baa179f89cbfadeb7..b78de9879f4f2f95409425d14d2197bc38d0c6c1 100644 (file)
@@ -66,7 +66,6 @@ struct loop_device {
 
 struct loop_cmd {
        struct kthread_work work;
-       struct request *rq;
        bool use_aio; /* use AIO interface to handle I/O */
        atomic_t ref; /* only for aio */
        long ret;
index 8e8b04cc569a44222c1e3f4ee4a3a188455ddf04..33b36fea1d73f0e957dcd7ef4b8107526e764893 100644 (file)
@@ -2366,7 +2366,9 @@ static int rbd_obj_issue_copyup(struct rbd_obj_request *obj_req, u32 bytes)
        osd_req_op_cls_init(obj_req->osd_req, 0, CEPH_OSD_OP_CALL, "rbd",
                            "copyup");
        osd_req_op_cls_request_data_bvecs(obj_req->osd_req, 0,
-                                         obj_req->copyup_bvecs, bytes);
+                                         obj_req->copyup_bvecs,
+                                         obj_req->copyup_bvec_count,
+                                         bytes);
 
        switch (obj_req->img_request->op_type) {
        case OBJ_OP_WRITE:
index 64e066eba72e03abec38d49036d0f3677aafb2a3..0e31884a9519614398c1f1f6b048934a09ac5906 100644 (file)
@@ -110,7 +110,7 @@ struct iwm {
 /* Select values for swim_select and swim_readbit */
 
 #define READ_DATA_0    0x074
-#define TWOMEG_DRIVE   0x075
+#define ONEMEG_DRIVE   0x075
 #define SINGLE_SIDED   0x076
 #define DRIVE_PRESENT  0x077
 #define DISK_IN                0x170
@@ -118,9 +118,9 @@ struct iwm {
 #define TRACK_ZERO     0x172
 #define TACHO          0x173
 #define READ_DATA_1    0x174
-#define MFM_MODE       0x175
+#define GCR_MODE       0x175
 #define SEEK_COMPLETE  0x176
-#define ONEMEG_MEDIA   0x177
+#define TWOMEG_MEDIA   0x177
 
 /* Bits in handshake register */
 
@@ -612,7 +612,6 @@ static void setup_medium(struct floppy_state *fs)
                struct floppy_struct *g;
                fs->disk_in = 1;
                fs->write_protected = swim_readbit(base, WRITE_PROT);
-               fs->type = swim_readbit(base, ONEMEG_MEDIA);
 
                if (swim_track00(base))
                        printk(KERN_ERR
@@ -620,6 +619,9 @@ static void setup_medium(struct floppy_state *fs)
 
                swim_track00(base);
 
+               fs->type = swim_readbit(base, TWOMEG_MEDIA) ?
+                       HD_MEDIA : DD_MEDIA;
+               fs->head_number = swim_readbit(base, SINGLE_SIDED) ? 1 : 2;
                get_floppy_geometry(fs, 0, &g);
                fs->total_secs = g->size;
                fs->secpercyl = g->head * g->sect;
@@ -646,7 +648,7 @@ static int floppy_open(struct block_device *bdev, fmode_t mode)
 
        swim_write(base, setup, S_IBM_DRIVE  | S_FCLK_DIV2);
        udelay(10);
-       swim_drive(base, INTERNAL_DRIVE);
+       swim_drive(base, fs->location);
        swim_motor(base, ON);
        swim_action(base, SETMFM);
        if (fs->ejected)
@@ -656,6 +658,8 @@ static int floppy_open(struct block_device *bdev, fmode_t mode)
                goto out;
        }
 
+       set_capacity(fs->disk, fs->total_secs);
+
        if (mode & FMODE_NDELAY)
                return 0;
 
@@ -727,14 +731,9 @@ static int floppy_ioctl(struct block_device *bdev, fmode_t mode,
                if (copy_to_user((void __user *) param, (void *) &floppy_type,
                                 sizeof(struct floppy_struct)))
                        return -EFAULT;
-               break;
-
-       default:
-               printk(KERN_DEBUG "SWIM floppy_ioctl: unknown cmd %d\n",
-                      cmd);
-               return -ENOSYS;
+               return 0;
        }
-       return 0;
+       return -ENOTTY;
 }
 
 static int floppy_getgeo(struct block_device *bdev, struct hd_geometry *geo)
@@ -795,7 +794,7 @@ static struct kobject *floppy_find(dev_t dev, int *part, void *data)
        struct swim_priv *swd = data;
        int drive = (*part & 3);
 
-       if (drive > swd->floppy_count)
+       if (drive >= swd->floppy_count)
                return NULL;
 
        *part = 0;
@@ -813,10 +812,9 @@ static int swim_add_floppy(struct swim_priv *swd, enum drive_location location)
 
        swim_motor(base, OFF);
 
-       if (swim_readbit(base, SINGLE_SIDED))
-               fs->head_number = 1;
-       else
-               fs->head_number = 2;
+       fs->type = HD_MEDIA;
+       fs->head_number = 2;
+
        fs->ref_count = 0;
        fs->ejected = 1;
 
@@ -834,10 +832,12 @@ static int swim_floppy_init(struct swim_priv *swd)
        /* scan floppy drives */
 
        swim_drive(base, INTERNAL_DRIVE);
-       if (swim_readbit(base, DRIVE_PRESENT))
+       if (swim_readbit(base, DRIVE_PRESENT) &&
+           !swim_readbit(base, ONEMEG_DRIVE))
                swim_add_floppy(swd, INTERNAL_DRIVE);
        swim_drive(base, EXTERNAL_DRIVE);
-       if (swim_readbit(base, DRIVE_PRESENT))
+       if (swim_readbit(base, DRIVE_PRESENT) &&
+           !swim_readbit(base, ONEMEG_DRIVE))
                swim_add_floppy(swd, EXTERNAL_DRIVE);
 
        /* register floppy drives */
@@ -861,7 +861,6 @@ static int swim_floppy_init(struct swim_priv *swd)
                                                              &swd->lock);
                if (!swd->unit[drive].disk->queue) {
                        err = -ENOMEM;
-                       put_disk(swd->unit[drive].disk);
                        goto exit_put_disks;
                }
                blk_queue_bounce_limit(swd->unit[drive].disk->queue,
@@ -911,7 +910,7 @@ static int swim_probe(struct platform_device *dev)
                goto out;
        }
 
-       swim_base = ioremap(res->start, resource_size(res));
+       swim_base = (struct swim __iomem *)res->start;
        if (!swim_base) {
                ret = -ENOMEM;
                goto out_release_io;
@@ -923,7 +922,7 @@ static int swim_probe(struct platform_device *dev)
        if (!get_swim_mode(swim_base)) {
                printk(KERN_INFO "SWIM device not found !\n");
                ret = -ENODEV;
-               goto out_iounmap;
+               goto out_release_io;
        }
 
        /* set platform driver data */
@@ -931,7 +930,7 @@ static int swim_probe(struct platform_device *dev)
        swd = kzalloc(sizeof(struct swim_priv), GFP_KERNEL);
        if (!swd) {
                ret = -ENOMEM;
-               goto out_iounmap;
+               goto out_release_io;
        }
        platform_set_drvdata(dev, swd);
 
@@ -945,8 +944,6 @@ static int swim_probe(struct platform_device *dev)
 
 out_kfree:
        kfree(swd);
-out_iounmap:
-       iounmap(swim_base);
 out_release_io:
        release_mem_region(res->start, resource_size(res));
 out:
@@ -974,8 +971,6 @@ static int swim_remove(struct platform_device *dev)
        for (drive = 0; drive < swd->floppy_count; drive++)
                floppy_eject(&swd->unit[drive]);
 
-       iounmap(swd->base);
-
        res = platform_get_resource(dev, IORESOURCE_MEM, 0);
        if (res)
                release_mem_region(res->start, resource_size(res));
index af51015d056eff1a6b1a2dfac264e30259c669d9..469541c1e51eed13b589f47b295f0e916ade4170 100644 (file)
@@ -148,7 +148,7 @@ struct swim3 {
 #define MOTOR_ON       2
 #define RELAX          3       /* also eject in progress */
 #define READ_DATA_0    4
-#define TWOMEG_DRIVE   5
+#define ONEMEG_DRIVE   5
 #define SINGLE_SIDED   6       /* drive or diskette is 4MB type? */
 #define DRIVE_PRESENT  7
 #define DISK_IN                8
@@ -156,9 +156,9 @@ struct swim3 {
 #define TRACK_ZERO     10
 #define TACHO          11
 #define READ_DATA_1    12
-#define MFM_MODE       13
+#define GCR_MODE       13
 #define SEEK_COMPLETE  14
-#define ONEMEG_MEDIA   15
+#define TWOMEG_MEDIA   15
 
 /* Definitions of values used in writing and formatting */
 #define DATA_ESCAPE    0x99
index 010f5f579e68956b74f6adb52fb11c6b52bfbbc7..f3c643a0473c69cb387c828306cf810840550efd 100644 (file)
@@ -197,6 +197,7 @@ config BT_HCIUART_BCM
 config BT_HCIUART_QCA
        bool "Qualcomm Atheros protocol support"
        depends on BT_HCIUART
+       depends on BT_HCIUART_SERDEV
        select BT_HCIUART_H4
        select BT_QCA
        help
index 6659f113042ca69e2cfe25b98dd974c6498b6af5..99cde1f9467d4edae71cc9820b35c00a75016650 100644 (file)
@@ -315,10 +315,12 @@ static int btbcm_read_info(struct hci_dev *hdev)
        return 0;
 }
 
-static const struct {
+struct bcm_subver_table {
        u16 subver;
        const char *name;
-} bcm_uart_subver_table[] = {
+};
+
+static const struct bcm_subver_table bcm_uart_subver_table[] = {
        { 0x4103, "BCM4330B1"   },      /* 002.001.003 */
        { 0x410e, "BCM43341B0"  },      /* 002.001.014 */
        { 0x4406, "BCM4324B3"   },      /* 002.004.006 */
@@ -330,12 +332,28 @@ static const struct {
        { }
 };
 
-int btbcm_initialize(struct hci_dev *hdev, char *fw_name, size_t len)
+static const struct bcm_subver_table bcm_usb_subver_table[] = {
+       { 0x210b, "BCM43142A0"  },      /* 001.001.011 */
+       { 0x2112, "BCM4314A0"   },      /* 001.001.018 */
+       { 0x2118, "BCM20702A0"  },      /* 001.001.024 */
+       { 0x2126, "BCM4335A0"   },      /* 001.001.038 */
+       { 0x220e, "BCM20702A1"  },      /* 001.002.014 */
+       { 0x230f, "BCM4354A2"   },      /* 001.003.015 */
+       { 0x4106, "BCM4335B0"   },      /* 002.001.006 */
+       { 0x410e, "BCM20702B0"  },      /* 002.001.014 */
+       { 0x6109, "BCM4335C0"   },      /* 003.001.009 */
+       { 0x610c, "BCM4354"     },      /* 003.001.012 */
+       { }
+};
+
+int btbcm_initialize(struct hci_dev *hdev, char *fw_name, size_t len,
+                    bool reinit)
 {
-       u16 subver, rev;
-       const char *hw_name = NULL;
+       u16 subver, rev, pid, vid;
+       const char *hw_name = "BCM";
        struct sk_buff *skb;
        struct hci_rp_read_local_version *ver;
+       const struct bcm_subver_table *bcm_subver_table;
        int i, err;
 
        /* Reset */
@@ -354,30 +372,44 @@ int btbcm_initialize(struct hci_dev *hdev, char *fw_name, size_t len)
        kfree_skb(skb);
 
        /* Read controller information */
-       err = btbcm_read_info(hdev);
-       if (err)
-               return err;
+       if (!reinit) {
+               err = btbcm_read_info(hdev);
+               if (err)
+                       return err;
+       }
 
-       switch ((rev & 0xf000) >> 12) {
-       case 0:
-       case 1:
-       case 2:
-       case 3:
-               for (i = 0; bcm_uart_subver_table[i].name; i++) {
-                       if (subver == bcm_uart_subver_table[i].subver) {
-                               hw_name = bcm_uart_subver_table[i].name;
-                               break;
-                       }
+       /* Upper nibble of rev should be between 0 and 3? */
+       if (((rev & 0xf000) >> 12) > 3)
+               return 0;
+
+       bcm_subver_table = (hdev->bus == HCI_USB) ? bcm_usb_subver_table :
+                                                   bcm_uart_subver_table;
+
+       for (i = 0; bcm_subver_table[i].name; i++) {
+               if (subver == bcm_subver_table[i].subver) {
+                       hw_name = bcm_subver_table[i].name;
+                       break;
                }
+       }
 
-               snprintf(fw_name, len, "brcm/%s.hcd", hw_name ? : "BCM");
-               break;
-       default:
-               return 0;
+       if (hdev->bus == HCI_USB) {
+               /* Read USB Product Info */
+               skb = btbcm_read_usb_product(hdev);
+               if (IS_ERR(skb))
+                       return PTR_ERR(skb);
+
+               vid = get_unaligned_le16(skb->data + 1);
+               pid = get_unaligned_le16(skb->data + 3);
+               kfree_skb(skb);
+
+               snprintf(fw_name, len, "brcm/%s-%4.4x-%4.4x.hcd",
+                        hw_name, vid, pid);
+       } else {
+               snprintf(fw_name, len, "brcm/%s.hcd", hw_name);
        }
 
        bt_dev_info(hdev, "%s (%3.3u.%3.3u.%3.3u) build %4.4u",
-                   hw_name ? : "BCM", (subver & 0xe000) >> 13,
+                   hw_name, (subver & 0xe000) >> 13,
                    (subver & 0x1f00) >> 8, (subver & 0x00ff), rev & 0x0fff);
 
        return 0;
@@ -386,30 +418,14 @@ EXPORT_SYMBOL_GPL(btbcm_initialize);
 
 int btbcm_finalize(struct hci_dev *hdev)
 {
-       struct sk_buff *skb;
-       struct hci_rp_read_local_version *ver;
-       u16 subver, rev;
+       char fw_name[64];
        int err;
 
-       /* Reset */
-       err = btbcm_reset(hdev);
+       /* Re-initialize */
+       err = btbcm_initialize(hdev, fw_name, sizeof(fw_name), true);
        if (err)
                return err;
 
-       /* Read Local Version Info */
-       skb = btbcm_read_local_version(hdev);
-       if (IS_ERR(skb))
-               return PTR_ERR(skb);
-
-       ver = (struct hci_rp_read_local_version *)skb->data;
-       rev = le16_to_cpu(ver->hci_rev);
-       subver = le16_to_cpu(ver->lmp_subver);
-       kfree_skb(skb);
-
-       bt_dev_info(hdev, "BCM (%3.3u.%3.3u.%3.3u) build %4.4u",
-                   (subver & 0xe000) >> 13, (subver & 0x1f00) >> 8,
-                   (subver & 0x00ff), rev & 0x0fff);
-
        btbcm_check_bdaddr(hdev);
 
        set_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks);
@@ -418,95 +434,18 @@ int btbcm_finalize(struct hci_dev *hdev)
 }
 EXPORT_SYMBOL_GPL(btbcm_finalize);
 
-static const struct {
-       u16 subver;
-       const char *name;
-} bcm_usb_subver_table[] = {
-       { 0x210b, "BCM43142A0"  },      /* 001.001.011 */
-       { 0x2112, "BCM4314A0"   },      /* 001.001.018 */
-       { 0x2118, "BCM20702A0"  },      /* 001.001.024 */
-       { 0x2126, "BCM4335A0"   },      /* 001.001.038 */
-       { 0x220e, "BCM20702A1"  },      /* 001.002.014 */
-       { 0x230f, "BCM4354A2"   },      /* 001.003.015 */
-       { 0x4106, "BCM4335B0"   },      /* 002.001.006 */
-       { 0x410e, "BCM20702B0"  },      /* 002.001.014 */
-       { 0x6109, "BCM4335C0"   },      /* 003.001.009 */
-       { 0x610c, "BCM4354"     },      /* 003.001.012 */
-       { }
-};
-
 int btbcm_setup_patchram(struct hci_dev *hdev)
 {
        char fw_name[64];
        const struct firmware *fw;
-       u16 subver, rev, pid, vid;
-       const char *hw_name = NULL;
        struct sk_buff *skb;
-       struct hci_rp_read_local_version *ver;
-       int i, err;
-
-       /* Reset */
-       err = btbcm_reset(hdev);
-       if (err)
-               return err;
-
-       /* Read Local Version Info */
-       skb = btbcm_read_local_version(hdev);
-       if (IS_ERR(skb))
-               return PTR_ERR(skb);
-
-       ver = (struct hci_rp_read_local_version *)skb->data;
-       rev = le16_to_cpu(ver->hci_rev);
-       subver = le16_to_cpu(ver->lmp_subver);
-       kfree_skb(skb);
+       int err;
 
-       /* Read controller information */
-       err = btbcm_read_info(hdev);
+       /* Initialize */
+       err = btbcm_initialize(hdev, fw_name, sizeof(fw_name), false);
        if (err)
                return err;
 
-       switch ((rev & 0xf000) >> 12) {
-       case 0:
-       case 3:
-               for (i = 0; bcm_uart_subver_table[i].name; i++) {
-                       if (subver == bcm_uart_subver_table[i].subver) {
-                               hw_name = bcm_uart_subver_table[i].name;
-                               break;
-                       }
-               }
-
-               snprintf(fw_name, sizeof(fw_name), "brcm/%s.hcd",
-                        hw_name ? : "BCM");
-               break;
-       case 1:
-       case 2:
-               /* Read USB Product Info */
-               skb = btbcm_read_usb_product(hdev);
-               if (IS_ERR(skb))
-                       return PTR_ERR(skb);
-
-               vid = get_unaligned_le16(skb->data + 1);
-               pid = get_unaligned_le16(skb->data + 3);
-               kfree_skb(skb);
-
-               for (i = 0; bcm_usb_subver_table[i].name; i++) {
-                       if (subver == bcm_usb_subver_table[i].subver) {
-                               hw_name = bcm_usb_subver_table[i].name;
-                               break;
-                       }
-               }
-
-               snprintf(fw_name, sizeof(fw_name), "brcm/%s-%4.4x-%4.4x.hcd",
-                        hw_name ? : "BCM", vid, pid);
-               break;
-       default:
-               return 0;
-       }
-
-       bt_dev_info(hdev, "%s (%3.3u.%3.3u.%3.3u) build %4.4u",
-                   hw_name ? : "BCM", (subver & 0xe000) >> 13,
-                   (subver & 0x1f00) >> 8, (subver & 0x00ff), rev & 0x0fff);
-
        err = request_firmware(&fw, fw_name, &hdev->dev);
        if (err < 0) {
                bt_dev_info(hdev, "BCM: Patch %s not found", fw_name);
@@ -517,25 +456,11 @@ int btbcm_setup_patchram(struct hci_dev *hdev)
 
        release_firmware(fw);
 
-       /* Reset */
-       err = btbcm_reset(hdev);
+       /* Re-initialize */
+       err = btbcm_initialize(hdev, fw_name, sizeof(fw_name), true);
        if (err)
                return err;
 
-       /* Read Local Version Info */
-       skb = btbcm_read_local_version(hdev);
-       if (IS_ERR(skb))
-               return PTR_ERR(skb);
-
-       ver = (struct hci_rp_read_local_version *)skb->data;
-       rev = le16_to_cpu(ver->hci_rev);
-       subver = le16_to_cpu(ver->lmp_subver);
-       kfree_skb(skb);
-
-       bt_dev_info(hdev, "%s (%3.3u.%3.3u.%3.3u) build %4.4u",
-                   hw_name ? : "BCM", (subver & 0xe000) >> 13,
-                   (subver & 0x1f00) >> 8, (subver & 0x00ff), rev & 0x0fff);
-
        /* Read Local Name */
        skb = btbcm_read_local_name(hdev);
        if (IS_ERR(skb))
index cfe6ad4cc62153d59e0b7b2b88f6132e7604b31e..5346515c880c6582131a770e74af9f2de49825b2 100644 (file)
@@ -73,7 +73,8 @@ int btbcm_patchram(struct hci_dev *hdev, const struct firmware *fw);
 int btbcm_setup_patchram(struct hci_dev *hdev);
 int btbcm_setup_apple(struct hci_dev *hdev);
 
-int btbcm_initialize(struct hci_dev *hdev, char *fw_name, size_t len);
+int btbcm_initialize(struct hci_dev *hdev, char *fw_name, size_t len,
+                    bool reinit);
 int btbcm_finalize(struct hci_dev *hdev);
 
 #else
@@ -104,7 +105,7 @@ static inline int btbcm_setup_apple(struct hci_dev *hdev)
 }
 
 static inline int btbcm_initialize(struct hci_dev *hdev, char *fw_name,
-                                  size_t len)
+                                  size_t len, bool reinit)
 {
        return 0;
 }
index 2793d4180d2f988e76d7cfaf2dd8b50b7280d710..8219816c54a09b73a17dcef442748783f9d616f0 100644 (file)
@@ -127,28 +127,41 @@ static void rome_tlv_check_data(struct rome_config *config,
        BT_DBG("TLV Type\t\t : 0x%x", type_len & 0x000000ff);
        BT_DBG("Length\t\t : %d bytes", length);
 
+       config->dnld_mode = ROME_SKIP_EVT_NONE;
+
        switch (config->type) {
        case TLV_TYPE_PATCH:
                tlv_patch = (struct tlv_type_patch *)tlv->data;
-               BT_DBG("Total Length\t\t : %d bytes",
+
+               /* For Rome version 1.1 to 3.1, all segment commands
+                * are acked by a vendor specific event (VSE).
+                * For Rome >= 3.2, the download mode field indicates
+                * if VSE is skipped by the controller.
+                * In case VSE is skipped, only the last segment is acked.
+                */
+               config->dnld_mode = tlv_patch->download_mode;
+
+               BT_DBG("Total Length           : %d bytes",
                       le32_to_cpu(tlv_patch->total_size));
-               BT_DBG("Patch Data Length\t : %d bytes",
+               BT_DBG("Patch Data Length      : %d bytes",
                       le32_to_cpu(tlv_patch->data_length));
                BT_DBG("Signing Format Version : 0x%x",
                       tlv_patch->format_version);
-               BT_DBG("Signature Algorithm\t : 0x%x",
+               BT_DBG("Signature Algorithm    : 0x%x",
                       tlv_patch->signature);
-               BT_DBG("Reserved\t\t : 0x%x",
-                      le16_to_cpu(tlv_patch->reserved1));
-               BT_DBG("Product ID\t\t : 0x%04x",
+               BT_DBG("Download mode          : 0x%x",
+                      tlv_patch->download_mode);
+               BT_DBG("Reserved               : 0x%x",
+                      tlv_patch->reserved1);
+               BT_DBG("Product ID             : 0x%04x",
                       le16_to_cpu(tlv_patch->product_id));
-               BT_DBG("Rom Build Version\t : 0x%04x",
+               BT_DBG("Rom Build Version      : 0x%04x",
                       le16_to_cpu(tlv_patch->rom_build));
-               BT_DBG("Patch Version\t\t : 0x%04x",
+               BT_DBG("Patch Version          : 0x%04x",
                       le16_to_cpu(tlv_patch->patch_version));
-               BT_DBG("Reserved\t\t : 0x%x",
+               BT_DBG("Reserved               : 0x%x",
                       le16_to_cpu(tlv_patch->reserved2));
-               BT_DBG("Patch Entry Address\t : 0x%x",
+               BT_DBG("Patch Entry Address    : 0x%x",
                       le32_to_cpu(tlv_patch->entry));
                break;
 
@@ -194,8 +207,8 @@ static void rome_tlv_check_data(struct rome_config *config,
        }
 }
 
-static int rome_tlv_send_segment(struct hci_dev *hdev, int idx, int seg_size,
-                                const u8 *data)
+static int rome_tlv_send_segment(struct hci_dev *hdev, int seg_size,
+                                const u8 *data, enum rome_tlv_dnld_mode mode)
 {
        struct sk_buff *skb;
        struct edl_event_hdr *edl;
@@ -203,12 +216,14 @@ static int rome_tlv_send_segment(struct hci_dev *hdev, int idx, int seg_size,
        u8 cmd[MAX_SIZE_PER_TLV_SEGMENT + 2];
        int err = 0;
 
-       BT_DBG("%s: Download segment #%d size %d", hdev->name, idx, seg_size);
-
        cmd[0] = EDL_PATCH_TLV_REQ_CMD;
        cmd[1] = seg_size;
        memcpy(cmd + 2, data, seg_size);
 
+       if (mode == ROME_SKIP_EVT_VSE_CC || mode == ROME_SKIP_EVT_VSE)
+               return __hci_cmd_send(hdev, EDL_PATCH_CMD_OPCODE, seg_size + 2,
+                                     cmd);
+
        skb = __hci_cmd_sync_ev(hdev, EDL_PATCH_CMD_OPCODE, seg_size + 2, cmd,
                                HCI_VENDOR_PKT, HCI_INIT_TIMEOUT);
        if (IS_ERR(skb)) {
@@ -245,47 +260,12 @@ static int rome_tlv_send_segment(struct hci_dev *hdev, int idx, int seg_size,
        return err;
 }
 
-static int rome_tlv_download_request(struct hci_dev *hdev,
-                                    const struct firmware *fw)
-{
-       const u8 *buffer, *data;
-       int total_segment, remain_size;
-       int ret, i;
-
-       if (!fw || !fw->data)
-               return -EINVAL;
-
-       total_segment = fw->size / MAX_SIZE_PER_TLV_SEGMENT;
-       remain_size = fw->size % MAX_SIZE_PER_TLV_SEGMENT;
-
-       BT_DBG("%s: Total segment num %d remain size %d total size %zu",
-              hdev->name, total_segment, remain_size, fw->size);
-
-       data = fw->data;
-       for (i = 0; i < total_segment; i++) {
-               buffer = data + i * MAX_SIZE_PER_TLV_SEGMENT;
-               ret = rome_tlv_send_segment(hdev, i, MAX_SIZE_PER_TLV_SEGMENT,
-                                           buffer);
-               if (ret < 0)
-                       return -EIO;
-       }
-
-       if (remain_size) {
-               buffer = data + total_segment * MAX_SIZE_PER_TLV_SEGMENT;
-               ret = rome_tlv_send_segment(hdev, total_segment, remain_size,
-                                           buffer);
-               if (ret < 0)
-                       return -EIO;
-       }
-
-       return 0;
-}
-
 static int rome_download_firmware(struct hci_dev *hdev,
                                  struct rome_config *config)
 {
        const struct firmware *fw;
-       int ret;
+       const u8 *segment;
+       int ret, remain, i = 0;
 
        bt_dev_info(hdev, "ROME Downloading %s", config->fwname);
 
@@ -298,10 +278,24 @@ static int rome_download_firmware(struct hci_dev *hdev,
 
        rome_tlv_check_data(config, fw);
 
-       ret = rome_tlv_download_request(hdev, fw);
-       if (ret) {
-               BT_ERR("%s: Failed to download file: %s (%d)", hdev->name,
-                      config->fwname, ret);
+       segment = fw->data;
+       remain = fw->size;
+       while (remain > 0) {
+               int segsize = min(MAX_SIZE_PER_TLV_SEGMENT, remain);
+
+               bt_dev_dbg(hdev, "Send segment %d, size %d", i++, segsize);
+
+               remain -= segsize;
+               /* The last segment is always acked regardless download mode */
+               if (!remain || segsize < MAX_SIZE_PER_TLV_SEGMENT)
+                       config->dnld_mode = ROME_SKIP_EVT_NONE;
+
+               ret = rome_tlv_send_segment(hdev, segsize, segment,
+                                           config->dnld_mode);
+               if (ret)
+                       break;
+
+               segment += segsize;
        }
 
        release_firmware(fw);
index 65e994b96c471986da3a8d5396561e23f56662ea..13d77fd873b6edf9b43c7c4da800d3a3a23798bf 100644 (file)
@@ -61,6 +61,13 @@ enum qca_bardrate {
        QCA_BAUDRATE_RESERVED
 };
 
+enum rome_tlv_dnld_mode {
+       ROME_SKIP_EVT_NONE,
+       ROME_SKIP_EVT_VSE,
+       ROME_SKIP_EVT_CC,
+       ROME_SKIP_EVT_VSE_CC
+};
+
 enum rome_tlv_type {
        TLV_TYPE_PATCH = 1,
        TLV_TYPE_NVM
@@ -70,6 +77,7 @@ struct rome_config {
        u8 type;
        char fwname[64];
        uint8_t user_baud_rate;
+       enum rome_tlv_dnld_mode dnld_mode;
 };
 
 struct edl_event_hdr {
@@ -94,7 +102,8 @@ struct tlv_type_patch {
        __le32 data_length;
        __u8   format_version;
        __u8   signature;
-       __le16 reserved1;
+       __u8   download_mode;
+       __u8   reserved1;
        __le16 product_id;
        __le16 rom_build;
        __le16 patch_version;
index 2c9a5fc9137da0d3d48b376c9e3b11ca0c99f265..7df3eed1ef5e9a44a9b4f5ab3f7008b454dc2da5 100644 (file)
@@ -65,6 +65,7 @@ static int btqcomsmd_cmd_callback(struct rpmsg_device *rpdev, void *data,
 {
        struct btqcomsmd *btq = priv;
 
+       btq->hdev->stat.byte_rx += count;
        return btqcomsmd_recv(btq->hdev, HCI_EVENT_PKT, data, count);
 }
 
@@ -76,12 +77,21 @@ static int btqcomsmd_send(struct hci_dev *hdev, struct sk_buff *skb)
        switch (hci_skb_pkt_type(skb)) {
        case HCI_ACLDATA_PKT:
                ret = rpmsg_send(btq->acl_channel, skb->data, skb->len);
+               if (ret) {
+                       hdev->stat.err_tx++;
+                       break;
+               }
                hdev->stat.acl_tx++;
                hdev->stat.byte_tx += skb->len;
                break;
        case HCI_COMMAND_PKT:
                ret = rpmsg_send(btq->cmd_channel, skb->data, skb->len);
+               if (ret) {
+                       hdev->stat.err_tx++;
+                       break;
+               }
                hdev->stat.cmd_tx++;
+               hdev->stat.byte_tx += skb->len;
                break;
        default:
                ret = -EILSEQ;
index c8c8b0b8d333434fbe08dfc94f1a87e6f002b2e2..91882f54c7bd180c1f6033eaa3044a1ce201fef3 100644 (file)
@@ -231,6 +231,7 @@ static const struct usb_device_id blacklist_table[] = {
        { USB_DEVICE(0x0930, 0x0227), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x0b05, 0x17d0), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x0cf3, 0x0036), .driver_info = BTUSB_ATH3012 },
+       { USB_DEVICE(0x0cf3, 0x3004), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x0cf3, 0x3008), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x0cf3, 0x311d), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x0cf3, 0x311e), .driver_info = BTUSB_ATH3012 },
@@ -263,7 +264,6 @@ static const struct usb_device_id blacklist_table[] = {
        { USB_DEVICE(0x0489, 0xe03c), .driver_info = BTUSB_ATH3012 },
 
        /* QCA ROME chipset */
-       { USB_DEVICE(0x0cf3, 0x3004), .driver_info = BTUSB_QCA_ROME },
        { USB_DEVICE(0x0cf3, 0xe007), .driver_info = BTUSB_QCA_ROME },
        { USB_DEVICE(0x0cf3, 0xe009), .driver_info = BTUSB_QCA_ROME },
        { USB_DEVICE(0x0cf3, 0xe010), .driver_info = BTUSB_QCA_ROME },
@@ -276,6 +276,8 @@ static const struct usb_device_id blacklist_table[] = {
        { USB_DEVICE(0x04ca, 0x3011), .driver_info = BTUSB_QCA_ROME },
        { USB_DEVICE(0x04ca, 0x3015), .driver_info = BTUSB_QCA_ROME },
        { USB_DEVICE(0x04ca, 0x3016), .driver_info = BTUSB_QCA_ROME },
+       { USB_DEVICE(0x04ca, 0x301a), .driver_info = BTUSB_QCA_ROME },
+       { USB_DEVICE(0x13d3, 0x3496), .driver_info = BTUSB_QCA_ROME },
 
        /* Broadcom BCM2035 */
        { USB_DEVICE(0x0a5c, 0x2009), .driver_info = BTUSB_BCM92035 },
@@ -399,6 +401,13 @@ static const struct dmi_system_id btusb_needs_reset_resume_table[] = {
                        DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 3060"),
                },
        },
+       {
+               /* Dell XPS 9360 (QCA ROME device 0cf3:e300) */
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "XPS 13 9360"),
+               },
+       },
        {}
 };
 
@@ -2852,6 +2861,12 @@ static int btusb_config_oob_wake(struct hci_dev *hdev)
 }
 #endif
 
+static void btusb_check_needs_reset_resume(struct usb_interface *intf)
+{
+       if (dmi_check_system(btusb_needs_reset_resume_table))
+               interface_to_usbdev(intf)->quirks |= USB_QUIRK_RESET_RESUME;
+}
+
 static int btusb_probe(struct usb_interface *intf,
                       const struct usb_device_id *id)
 {
@@ -2974,9 +2989,6 @@ static int btusb_probe(struct usb_interface *intf,
        hdev->send   = btusb_send_frame;
        hdev->notify = btusb_notify;
 
-       if (dmi_check_system(btusb_needs_reset_resume_table))
-               interface_to_usbdev(intf)->quirks |= USB_QUIRK_RESET_RESUME;
-
 #ifdef CONFIG_PM
        err = btusb_config_oob_wake(hdev);
        if (err)
@@ -3064,6 +3076,7 @@ static int btusb_probe(struct usb_interface *intf,
                data->setup_on_usb = btusb_setup_qca;
                hdev->set_bdaddr = btusb_set_bdaddr_ath3012;
                set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks);
+               btusb_check_needs_reset_resume(intf);
        }
 
 #ifdef CONFIG_BT_HCIBTUSB_RTL
index 441f5e1deb11523f4c72e1df3e33dd347f042f7f..f06f0f1132fb4f804767e037c5393ce1e7930723 100644 (file)
@@ -501,7 +501,7 @@ static int bcm_setup(struct hci_uart *hu)
        hu->hdev->set_diag = bcm_set_diag;
        hu->hdev->set_bdaddr = btbcm_set_bdaddr;
 
-       err = btbcm_initialize(hu->hdev, fw_name, sizeof(fw_name));
+       err = btbcm_initialize(hu->hdev, fw_name, sizeof(fw_name), false);
        if (err)
                return err;
 
@@ -794,19 +794,21 @@ static const struct acpi_gpio_mapping acpi_bcm_int_first_gpios[] = {
        { },
 };
 
-#ifdef CONFIG_ACPI
-/* IRQ polarity of some chipsets are not defined correctly in ACPI table. */
-static const struct dmi_system_id bcm_active_low_irq_dmi_table[] = {
-       {       /* Handle ThinkPad 8 tablets with BCM2E55 chipset ACPI ID */
-               .ident = "Lenovo ThinkPad 8",
+/* Some firmware reports an IRQ which does not work (wrong pin in fw table?) */
+static const struct dmi_system_id bcm_broken_irq_dmi_table[] = {
+       {
+               .ident = "Meegopad T08",
                .matches = {
-                       DMI_EXACT_MATCH(DMI_SYS_VENDOR, "LENOVO"),
-                       DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "ThinkPad 8"),
+                       DMI_EXACT_MATCH(DMI_BOARD_VENDOR,
+                                       "To be filled by OEM."),
+                       DMI_EXACT_MATCH(DMI_BOARD_NAME, "T3 MRD"),
+                       DMI_EXACT_MATCH(DMI_BOARD_VERSION, "V1.1"),
                },
        },
        { }
 };
 
+#ifdef CONFIG_ACPI
 static int bcm_resource(struct acpi_resource *ares, void *data)
 {
        struct bcm_device *dev = data;
@@ -904,6 +906,8 @@ static int bcm_gpio_set_shutdown(struct bcm_device *dev, bool powered)
 
 static int bcm_get_resources(struct bcm_device *dev)
 {
+       const struct dmi_system_id *dmi_id;
+
        dev->name = dev_name(dev->dev);
 
        if (x86_apple_machine && !bcm_apple_get_resources(dev))
@@ -936,6 +940,13 @@ static int bcm_get_resources(struct bcm_device *dev)
                dev->irq = gpiod_to_irq(gpio);
        }
 
+       dmi_id = dmi_first_match(bcm_broken_irq_dmi_table);
+       if (dmi_id) {
+               dev_info(dev->dev, "%s: Has a broken IRQ config, disabling IRQ support / runtime-pm\n",
+                        dmi_id->ident);
+               dev->irq = 0;
+       }
+
        dev_dbg(dev->dev, "BCM irq: %d\n", dev->irq);
        return 0;
 }
@@ -944,7 +955,6 @@ static int bcm_get_resources(struct bcm_device *dev)
 static int bcm_acpi_probe(struct bcm_device *dev)
 {
        LIST_HEAD(resources);
-       const struct dmi_system_id *dmi_id;
        const struct acpi_gpio_mapping *gpio_mapping = acpi_bcm_int_last_gpios;
        struct resource_entry *entry;
        int ret;
@@ -991,13 +1001,6 @@ static int bcm_acpi_probe(struct bcm_device *dev)
                dev->irq_active_low = irq_polarity;
                dev_warn(dev->dev, "Overwriting IRQ polarity to active %s by module-param\n",
                         dev->irq_active_low ? "low" : "high");
-       } else {
-               dmi_id = dmi_first_match(bcm_active_low_irq_dmi_table);
-               if (dmi_id) {
-                       dev_warn(dev->dev, "%s: Overwriting IRQ polarity to active low",
-                                dmi_id->ident);
-                       dev->irq_active_low = true;
-               }
        }
 
        return 0;
index b6a71705b7d6a6f2c1c87452a61c80500115c680..954213e5daa518fdb792aa2a08dd0ebeed2dea51 100644 (file)
@@ -447,6 +447,8 @@ static int hci_uart_setup(struct hci_dev *hdev)
                btbcm_check_bdaddr(hdev);
                break;
 #endif
+       default:
+               break;
        }
 
 done:
index 05ec530b8a3a70b218918c5f39b94f1666b8b7a8..f05382b5a65da37358b1284196cbc0011b0bf90f 100644 (file)
  */
 
 #include <linux/kernel.h>
+#include <linux/clk.h>
 #include <linux/debugfs.h>
+#include <linux/gpio/consumer.h>
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
+#include <linux/serdev.h>
 
 #include <net/bluetooth/bluetooth.h>
 #include <net/bluetooth/hci_core.h>
@@ -50,6 +55,9 @@
 #define IBS_TX_IDLE_TIMEOUT_MS         2000
 #define BAUDRATE_SETTLE_TIMEOUT_MS     300
 
+/* susclk rate */
+#define SUSCLK_RATE_32KHZ      32768
+
 /* HCI_IBS transmit side sleep protocol states */
 enum tx_ibs_states {
        HCI_IBS_TX_ASLEEP,
@@ -111,6 +119,12 @@ struct qca_data {
        u64 votes_off;
 };
 
+struct qca_serdev {
+       struct hci_uart  serdev_hu;
+       struct gpio_desc *bt_en;
+       struct clk       *susclk;
+};
+
 static void __serial_clock_on(struct tty_struct *tty)
 {
        /* TODO: Some chipset requires to enable UART clock on client
@@ -386,6 +400,7 @@ static void hci_ibs_wake_retrans_timeout(struct timer_list *t)
 /* Initialize protocol */
 static int qca_open(struct hci_uart *hu)
 {
+       struct qca_serdev *qcadev;
        struct qca_data *qca;
 
        BT_DBG("hu %p qca_open", hu);
@@ -444,6 +459,13 @@ static int qca_open(struct hci_uart *hu)
        timer_setup(&qca->tx_idle_timer, hci_ibs_tx_idle_timeout, 0);
        qca->tx_idle_delay = IBS_TX_IDLE_TIMEOUT_MS;
 
+       if (hu->serdev) {
+               serdev_device_open(hu->serdev);
+
+               qcadev = serdev_device_get_drvdata(hu->serdev);
+               gpiod_set_value_cansleep(qcadev->bt_en, 1);
+       }
+
        BT_DBG("HCI_UART_QCA open, tx_idle_delay=%u, wake_retrans=%u",
               qca->tx_idle_delay, qca->wake_retrans);
 
@@ -512,6 +534,7 @@ static int qca_flush(struct hci_uart *hu)
 /* Close protocol */
 static int qca_close(struct hci_uart *hu)
 {
+       struct qca_serdev *qcadev;
        struct qca_data *qca = hu->priv;
 
        BT_DBG("hu %p qca close", hu);
@@ -525,6 +548,13 @@ static int qca_close(struct hci_uart *hu)
        destroy_workqueue(qca->workqueue);
        qca->hu = NULL;
 
+       if (hu->serdev) {
+               serdev_device_close(hu->serdev);
+
+               qcadev = serdev_device_get_drvdata(hu->serdev);
+               gpiod_set_value_cansleep(qcadev->bt_en, 0);
+       }
+
        kfree_skb(qca->rx_skb);
 
        hu->priv = NULL;
@@ -885,6 +915,14 @@ static int qca_set_baudrate(struct hci_dev *hdev, uint8_t baudrate)
        return 0;
 }
 
+static inline void host_set_baudrate(struct hci_uart *hu, unsigned int speed)
+{
+       if (hu->serdev)
+               serdev_device_set_baudrate(hu->serdev, speed);
+       else
+               hci_uart_set_baudrate(hu, speed);
+}
+
 static int qca_setup(struct hci_uart *hu)
 {
        struct hci_dev *hdev = hu->hdev;
@@ -905,7 +943,7 @@ static int qca_setup(struct hci_uart *hu)
                speed = hu->proto->init_speed;
 
        if (speed)
-               hci_uart_set_baudrate(hu, speed);
+               host_set_baudrate(hu, speed);
 
        /* Setup user speed if needed */
        speed = 0;
@@ -924,7 +962,7 @@ static int qca_setup(struct hci_uart *hu)
                                   ret);
                        return ret;
                }
-               hci_uart_set_baudrate(hu, speed);
+               host_set_baudrate(hu, speed);
        }
 
        /* Setup patch / NVM configurations */
@@ -935,6 +973,12 @@ static int qca_setup(struct hci_uart *hu)
        } else if (ret == -ENOENT) {
                /* No patch/nvm-config found, run with original fw/config */
                ret = 0;
+       } else if (ret == -EAGAIN) {
+               /*
+                * Userspace firmware loader will return -EAGAIN in case no
+                * patch/nvm-config is found, so run with original fw/config.
+                */
+               ret = 0;
        }
 
        /* Setup bdaddr */
@@ -958,12 +1002,80 @@ static struct hci_uart_proto qca_proto = {
        .dequeue        = qca_dequeue,
 };
 
+static int qca_serdev_probe(struct serdev_device *serdev)
+{
+       struct qca_serdev *qcadev;
+       int err;
+
+       qcadev = devm_kzalloc(&serdev->dev, sizeof(*qcadev), GFP_KERNEL);
+       if (!qcadev)
+               return -ENOMEM;
+
+       qcadev->serdev_hu.serdev = serdev;
+       serdev_device_set_drvdata(serdev, qcadev);
+
+       qcadev->bt_en = devm_gpiod_get(&serdev->dev, "enable",
+                                      GPIOD_OUT_LOW);
+       if (IS_ERR(qcadev->bt_en)) {
+               dev_err(&serdev->dev, "failed to acquire enable gpio\n");
+               return PTR_ERR(qcadev->bt_en);
+       }
+
+       qcadev->susclk = devm_clk_get(&serdev->dev, NULL);
+       if (IS_ERR(qcadev->susclk)) {
+               dev_err(&serdev->dev, "failed to acquire clk\n");
+               return PTR_ERR(qcadev->susclk);
+       }
+
+       err = clk_set_rate(qcadev->susclk, SUSCLK_RATE_32KHZ);
+       if (err)
+               return err;
+
+       err = clk_prepare_enable(qcadev->susclk);
+       if (err)
+               return err;
+
+       err = hci_uart_register_device(&qcadev->serdev_hu, &qca_proto);
+       if (err)
+               clk_disable_unprepare(qcadev->susclk);
+
+       return err;
+}
+
+static void qca_serdev_remove(struct serdev_device *serdev)
+{
+       struct qca_serdev *qcadev = serdev_device_get_drvdata(serdev);
+
+       hci_uart_unregister_device(&qcadev->serdev_hu);
+
+       clk_disable_unprepare(qcadev->susclk);
+}
+
+static const struct of_device_id qca_bluetooth_of_match[] = {
+       { .compatible = "qcom,qca6174-bt" },
+       { /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, qca_bluetooth_of_match);
+
+static struct serdev_device_driver qca_serdev_driver = {
+       .probe = qca_serdev_probe,
+       .remove = qca_serdev_remove,
+       .driver = {
+               .name = "hci_uart_qca",
+               .of_match_table = qca_bluetooth_of_match,
+       },
+};
+
 int __init qca_init(void)
 {
+       serdev_device_driver_register(&qca_serdev_driver);
+
        return hci_uart_register_proto(&qca_proto);
 }
 
 int __exit qca_deinit(void)
 {
+       serdev_device_driver_unregister(&qca_serdev_driver);
+
        return hci_uart_unregister_proto(&qca_proto);
 }
index d1c0b60e9326f5f5a0b0db56ac7d328283f27506..6dc177bf4c42f6fce5b9787072aba2b7aa94e98c 100644 (file)
@@ -33,6 +33,7 @@ config HISILICON_LPC
        bool "Support for ISA I/O space on HiSilicon Hip06/7"
        depends on ARM64 && (ARCH_HISI || COMPILE_TEST)
        select INDIRECT_PIO
+       select MFD_CORE if ACPI
        help
          Driver to enable I/O access to devices attached to the Low Pin
          Count bus on the HiSilicon Hip06/7 SoC.
index 8327478effd0b1c7da0b892a3b857cd6423e5a88..bfc566d3f31a40cf5b89d9284b2538644af68dee 100644 (file)
@@ -2371,7 +2371,7 @@ static int cdrom_ioctl_media_changed(struct cdrom_device_info *cdi,
        if (!CDROM_CAN(CDC_SELECT_DISC) || arg == CDSL_CURRENT)
                return media_changed(cdi, 1);
 
-       if ((unsigned int)arg >= cdi->capacity)
+       if (arg >= cdi->capacity)
                return -EINVAL;
 
        info = kmalloc(sizeof(*info), GFP_KERNEL);
index c381c8e396fcc0e43b5a1b2a5ebd0c9e80ad732f..79d8c84693a185264990d40185006f5eaec0f145 100644 (file)
@@ -195,7 +195,7 @@ static int uninorth_insert_memory(struct agp_memory *mem, off_t pg_start, int ty
        return 0;
 }
 
-int uninorth_remove_memory(struct agp_memory *mem, off_t pg_start, int type)
+static int uninorth_remove_memory(struct agp_memory *mem, off_t pg_start, int type)
 {
        size_t i;
        u32 *gp;
@@ -470,7 +470,7 @@ static int uninorth_free_gatt_table(struct agp_bridge_data *bridge)
        return 0;
 }
 
-void null_cache_flush(void)
+static void null_cache_flush(void)
 {
        mb();
 }
index e027e7fa1472b6612a50b597a9c3b13bf94939a8..cd888d4ee605e0b9cf5f4970d34989d4b72f3b72 100644 (file)
 #include <linux/ptrace.h>
 #include <linux/workqueue.h>
 #include <linux/irq.h>
+#include <linux/ratelimit.h>
 #include <linux/syscalls.h>
 #include <linux/completion.h>
 #include <linux/uuid.h>
@@ -427,8 +428,9 @@ struct crng_state primary_crng = {
  * its value (from 0->1->2).
  */
 static int crng_init = 0;
-#define crng_ready() (likely(crng_init > 0))
+#define crng_ready() (likely(crng_init > 1))
 static int crng_init_cnt = 0;
+static unsigned long crng_global_init_time = 0;
 #define CRNG_INIT_CNT_THRESH (2*CHACHA20_KEY_SIZE)
 static void _extract_crng(struct crng_state *crng,
                          __u32 out[CHACHA20_BLOCK_WORDS]);
@@ -437,6 +439,16 @@ static void _crng_backtrack_protect(struct crng_state *crng,
 static void process_random_ready_list(void);
 static void _get_random_bytes(void *buf, int nbytes);
 
+static struct ratelimit_state unseeded_warning =
+       RATELIMIT_STATE_INIT("warn_unseeded_randomness", HZ, 3);
+static struct ratelimit_state urandom_warning =
+       RATELIMIT_STATE_INIT("warn_urandom_randomness", HZ, 3);
+
+static int ratelimit_disable __read_mostly;
+
+module_param_named(ratelimit_disable, ratelimit_disable, int, 0644);
+MODULE_PARM_DESC(ratelimit_disable, "Disable random ratelimit suppression");
+
 /**********************************************************************
  *
  * OS independent entropy store.   Here are the functions which handle
@@ -787,6 +799,43 @@ static void crng_initialize(struct crng_state *crng)
        crng->init_time = jiffies - CRNG_RESEED_INTERVAL - 1;
 }
 
+#ifdef CONFIG_NUMA
+static void do_numa_crng_init(struct work_struct *work)
+{
+       int i;
+       struct crng_state *crng;
+       struct crng_state **pool;
+
+       pool = kcalloc(nr_node_ids, sizeof(*pool), GFP_KERNEL|__GFP_NOFAIL);
+       for_each_online_node(i) {
+               crng = kmalloc_node(sizeof(struct crng_state),
+                                   GFP_KERNEL | __GFP_NOFAIL, i);
+               spin_lock_init(&crng->lock);
+               crng_initialize(crng);
+               pool[i] = crng;
+       }
+       mb();
+       if (cmpxchg(&crng_node_pool, NULL, pool)) {
+               for_each_node(i)
+                       kfree(pool[i]);
+               kfree(pool);
+       }
+}
+
+static DECLARE_WORK(numa_crng_init_work, do_numa_crng_init);
+
+static void numa_crng_init(void)
+{
+       schedule_work(&numa_crng_init_work);
+}
+#else
+static void numa_crng_init(void) {}
+#endif
+
+/*
+ * crng_fast_load() can be called by code in the interrupt service
+ * path.  So we can't afford to dilly-dally.
+ */
 static int crng_fast_load(const char *cp, size_t len)
 {
        unsigned long flags;
@@ -794,7 +843,7 @@ static int crng_fast_load(const char *cp, size_t len)
 
        if (!spin_trylock_irqsave(&primary_crng.lock, flags))
                return 0;
-       if (crng_ready()) {
+       if (crng_init != 0) {
                spin_unlock_irqrestore(&primary_crng.lock, flags);
                return 0;
        }
@@ -813,6 +862,51 @@ static int crng_fast_load(const char *cp, size_t len)
        return 1;
 }
 
+/*
+ * crng_slow_load() is called by add_device_randomness, which has two
+ * attributes.  (1) We can't trust the buffer passed to it is
+ * guaranteed to be unpredictable (so it might not have any entropy at
+ * all), and (2) it doesn't have the performance constraints of
+ * crng_fast_load().
+ *
+ * So we do something more comprehensive which is guaranteed to touch
+ * all of the primary_crng's state, and which uses a LFSR with a
+ * period of 255 as part of the mixing algorithm.  Finally, we do
+ * *not* advance crng_init_cnt since buffer we may get may be something
+ * like a fixed DMI table (for example), which might very well be
+ * unique to the machine, but is otherwise unvarying.
+ */
+static int crng_slow_load(const char *cp, size_t len)
+{
+       unsigned long           flags;
+       static unsigned char    lfsr = 1;
+       unsigned char           tmp;
+       unsigned                i, max = CHACHA20_KEY_SIZE;
+       const char *            src_buf = cp;
+       char *                  dest_buf = (char *) &primary_crng.state[4];
+
+       if (!spin_trylock_irqsave(&primary_crng.lock, flags))
+               return 0;
+       if (crng_init != 0) {
+               spin_unlock_irqrestore(&primary_crng.lock, flags);
+               return 0;
+       }
+       if (len > max)
+               max = len;
+
+       for (i = 0; i < max ; i++) {
+               tmp = lfsr;
+               lfsr >>= 1;
+               if (tmp & 1)
+                       lfsr ^= 0xE1;
+               tmp = dest_buf[i % CHACHA20_KEY_SIZE];
+               dest_buf[i % CHACHA20_KEY_SIZE] ^= src_buf[i % len] ^ lfsr;
+               lfsr += (tmp << 3) | (tmp >> 5);
+       }
+       spin_unlock_irqrestore(&primary_crng.lock, flags);
+       return 1;
+}
+
 static void crng_reseed(struct crng_state *crng, struct entropy_store *r)
 {
        unsigned long   flags;
@@ -831,7 +925,7 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r)
                _crng_backtrack_protect(&primary_crng, buf.block,
                                        CHACHA20_KEY_SIZE);
        }
-       spin_lock_irqsave(&primary_crng.lock, flags);
+       spin_lock_irqsave(&crng->lock, flags);
        for (i = 0; i < 8; i++) {
                unsigned long   rv;
                if (!arch_get_random_seed_long(&rv) &&
@@ -841,13 +935,26 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r)
        }
        memzero_explicit(&buf, sizeof(buf));
        crng->init_time = jiffies;
-       spin_unlock_irqrestore(&primary_crng.lock, flags);
+       spin_unlock_irqrestore(&crng->lock, flags);
        if (crng == &primary_crng && crng_init < 2) {
                invalidate_batched_entropy();
+               numa_crng_init();
                crng_init = 2;
                process_random_ready_list();
                wake_up_interruptible(&crng_init_wait);
                pr_notice("random: crng init done\n");
+               if (unseeded_warning.missed) {
+                       pr_notice("random: %d get_random_xx warning(s) missed "
+                                 "due to ratelimiting\n",
+                                 unseeded_warning.missed);
+                       unseeded_warning.missed = 0;
+               }
+               if (urandom_warning.missed) {
+                       pr_notice("random: %d urandom warning(s) missed "
+                                 "due to ratelimiting\n",
+                                 urandom_warning.missed);
+                       urandom_warning.missed = 0;
+               }
        }
 }
 
@@ -856,8 +963,9 @@ static void _extract_crng(struct crng_state *crng,
 {
        unsigned long v, flags;
 
-       if (crng_init > 1 &&
-           time_after(jiffies, crng->init_time + CRNG_RESEED_INTERVAL))
+       if (crng_ready() &&
+           (time_after(crng_global_init_time, crng->init_time) ||
+            time_after(jiffies, crng->init_time + CRNG_RESEED_INTERVAL)))
                crng_reseed(crng, crng == &primary_crng ? &input_pool : NULL);
        spin_lock_irqsave(&crng->lock, flags);
        if (arch_get_random_long(&v))
@@ -981,10 +1089,8 @@ void add_device_randomness(const void *buf, unsigned int size)
        unsigned long time = random_get_entropy() ^ jiffies;
        unsigned long flags;
 
-       if (!crng_ready()) {
-               crng_fast_load(buf, size);
-               return;
-       }
+       if (!crng_ready() && size)
+               crng_slow_load(buf, size);
 
        trace_add_device_randomness(size, _RET_IP_);
        spin_lock_irqsave(&input_pool.lock, flags);
@@ -1139,7 +1245,7 @@ void add_interrupt_randomness(int irq, int irq_flags)
        fast_mix(fast_pool);
        add_interrupt_bench(cycles);
 
-       if (!crng_ready()) {
+       if (unlikely(crng_init == 0)) {
                if ((fast_pool->count >= 64) &&
                    crng_fast_load((char *) fast_pool->pool,
                                   sizeof(fast_pool->pool))) {
@@ -1489,8 +1595,9 @@ static void _warn_unseeded_randomness(const char *func_name, void *caller,
 #ifndef CONFIG_WARN_ALL_UNSEEDED_RANDOM
        print_once = true;
 #endif
-       pr_notice("random: %s called from %pS with crng_init=%d\n",
-                 func_name, caller, crng_init);
+       if (__ratelimit(&unseeded_warning))
+               pr_notice("random: %s called from %pS with crng_init=%d\n",
+                         func_name, caller, crng_init);
 }
 
 /*
@@ -1680,28 +1787,14 @@ static void init_std_data(struct entropy_store *r)
  */
 static int rand_initialize(void)
 {
-#ifdef CONFIG_NUMA
-       int i;
-       struct crng_state *crng;
-       struct crng_state **pool;
-#endif
-
        init_std_data(&input_pool);
        init_std_data(&blocking_pool);
        crng_initialize(&primary_crng);
-
-#ifdef CONFIG_NUMA
-       pool = kcalloc(nr_node_ids, sizeof(*pool), GFP_KERNEL|__GFP_NOFAIL);
-       for_each_online_node(i) {
-               crng = kmalloc_node(sizeof(struct crng_state),
-                                   GFP_KERNEL | __GFP_NOFAIL, i);
-               spin_lock_init(&crng->lock);
-               crng_initialize(crng);
-               pool[i] = crng;
+       crng_global_init_time = jiffies;
+       if (ratelimit_disable) {
+               urandom_warning.interval = 0;
+               unseeded_warning.interval = 0;
        }
-       mb();
-       crng_node_pool = pool;
-#endif
        return 0;
 }
 early_initcall(rand_initialize);
@@ -1769,9 +1862,10 @@ urandom_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos)
 
        if (!crng_ready() && maxwarn > 0) {
                maxwarn--;
-               printk(KERN_NOTICE "random: %s: uninitialized urandom read "
-                      "(%zd bytes read)\n",
-                      current->comm, nbytes);
+               if (__ratelimit(&urandom_warning))
+                       printk(KERN_NOTICE "random: %s: uninitialized "
+                              "urandom read (%zd bytes read)\n",
+                              current->comm, nbytes);
                spin_lock_irqsave(&primary_crng.lock, flags);
                crng_init_cnt = 0;
                spin_unlock_irqrestore(&primary_crng.lock, flags);
@@ -1875,6 +1969,14 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
                input_pool.entropy_count = 0;
                blocking_pool.entropy_count = 0;
                return 0;
+       case RNDRESEEDCRNG:
+               if (!capable(CAP_SYS_ADMIN))
+                       return -EPERM;
+               if (crng_init < 2)
+                       return -ENODATA;
+               crng_reseed(&primary_crng, NULL);
+               crng_global_init_time = jiffies - 1;
+               return 0;
        default:
                return -EINVAL;
        }
@@ -2212,7 +2314,7 @@ void add_hwgenerator_randomness(const char *buffer, size_t count,
 {
        struct entropy_store *poolp = &input_pool;
 
-       if (!crng_ready()) {
+       if (unlikely(crng_init == 0)) {
                crng_fast_load(buffer, count);
                return;
        }
index 468f0613401268205762709e2382b5444a06ed91..21085515814f23f09e6fbea4cb49d4d3d99d86c7 100644 (file)
@@ -422,7 +422,7 @@ static void reclaim_dma_bufs(void)
        }
 }
 
-static struct port_buffer *alloc_buf(struct virtqueue *vq, size_t buf_size,
+static struct port_buffer *alloc_buf(struct virtio_device *vdev, size_t buf_size,
                                     int pages)
 {
        struct port_buffer *buf;
@@ -445,16 +445,16 @@ static struct port_buffer *alloc_buf(struct virtqueue *vq, size_t buf_size,
                return buf;
        }
 
-       if (is_rproc_serial(vq->vdev)) {
+       if (is_rproc_serial(vdev)) {
                /*
                 * Allocate DMA memory from ancestor. When a virtio
                 * device is created by remoteproc, the DMA memory is
                 * associated with the grandparent device:
                 * vdev => rproc => platform-dev.
                 */
-               if (!vq->vdev->dev.parent || !vq->vdev->dev.parent->parent)
+               if (!vdev->dev.parent || !vdev->dev.parent->parent)
                        goto free_buf;
-               buf->dev = vq->vdev->dev.parent->parent;
+               buf->dev = vdev->dev.parent->parent;
 
                /* Increase device refcnt to avoid freeing it */
                get_device(buf->dev);
@@ -838,7 +838,7 @@ static ssize_t port_fops_write(struct file *filp, const char __user *ubuf,
 
        count = min((size_t)(32 * 1024), count);
 
-       buf = alloc_buf(port->out_vq, count, 0);
+       buf = alloc_buf(port->portdev->vdev, count, 0);
        if (!buf)
                return -ENOMEM;
 
@@ -957,7 +957,7 @@ static ssize_t port_fops_splice_write(struct pipe_inode_info *pipe,
        if (ret < 0)
                goto error_out;
 
-       buf = alloc_buf(port->out_vq, 0, pipe->nrbufs);
+       buf = alloc_buf(port->portdev->vdev, 0, pipe->nrbufs);
        if (!buf) {
                ret = -ENOMEM;
                goto error_out;
@@ -1374,7 +1374,7 @@ static unsigned int fill_queue(struct virtqueue *vq, spinlock_t *lock)
 
        nr_added_bufs = 0;
        do {
-               buf = alloc_buf(vq, PAGE_SIZE, 0);
+               buf = alloc_buf(vq->vdev, PAGE_SIZE, 0);
                if (!buf)
                        break;
 
@@ -1402,7 +1402,6 @@ static int add_port(struct ports_device *portdev, u32 id)
 {
        char debugfs_name[16];
        struct port *port;
-       struct port_buffer *buf;
        dev_t devt;
        unsigned int nr_added_bufs;
        int err;
@@ -1513,8 +1512,6 @@ static int add_port(struct ports_device *portdev, u32 id)
        return 0;
 
 free_inbufs:
-       while ((buf = virtqueue_detach_unused_buf(port->in_vq)))
-               free_buf(buf, true);
 free_device:
        device_destroy(pdrvdata.class, port->dev->devt);
 free_cdev:
@@ -1539,34 +1536,14 @@ static void remove_port(struct kref *kref)
 
 static void remove_port_data(struct port *port)
 {
-       struct port_buffer *buf;
-
        spin_lock_irq(&port->inbuf_lock);
        /* Remove unused data this port might have received. */
        discard_port_data(port);
        spin_unlock_irq(&port->inbuf_lock);
 
-       /* Remove buffers we queued up for the Host to send us data in. */
-       do {
-               spin_lock_irq(&port->inbuf_lock);
-               buf = virtqueue_detach_unused_buf(port->in_vq);
-               spin_unlock_irq(&port->inbuf_lock);
-               if (buf)
-                       free_buf(buf, true);
-       } while (buf);
-
        spin_lock_irq(&port->outvq_lock);
        reclaim_consumed_buffers(port);
        spin_unlock_irq(&port->outvq_lock);
-
-       /* Free pending buffers from the out-queue. */
-       do {
-               spin_lock_irq(&port->outvq_lock);
-               buf = virtqueue_detach_unused_buf(port->out_vq);
-               spin_unlock_irq(&port->outvq_lock);
-               if (buf)
-                       free_buf(buf, true);
-       } while (buf);
 }
 
 /*
@@ -1791,13 +1768,24 @@ static void control_work_handler(struct work_struct *work)
        spin_unlock(&portdev->c_ivq_lock);
 }
 
+static void flush_bufs(struct virtqueue *vq, bool can_sleep)
+{
+       struct port_buffer *buf;
+       unsigned int len;
+
+       while ((buf = virtqueue_get_buf(vq, &len)))
+               free_buf(buf, can_sleep);
+}
+
 static void out_intr(struct virtqueue *vq)
 {
        struct port *port;
 
        port = find_port_by_vq(vq->vdev->priv, vq);
-       if (!port)
+       if (!port) {
+               flush_bufs(vq, false);
                return;
+       }
 
        wake_up_interruptible(&port->waitqueue);
 }
@@ -1808,8 +1796,10 @@ static void in_intr(struct virtqueue *vq)
        unsigned long flags;
 
        port = find_port_by_vq(vq->vdev->priv, vq);
-       if (!port)
+       if (!port) {
+               flush_bufs(vq, false);
                return;
+       }
 
        spin_lock_irqsave(&port->inbuf_lock, flags);
        port->inbuf = get_inbuf(port);
@@ -1984,24 +1974,54 @@ static const struct file_operations portdev_fops = {
 
 static void remove_vqs(struct ports_device *portdev)
 {
+       struct virtqueue *vq;
+
+       virtio_device_for_each_vq(portdev->vdev, vq) {
+               struct port_buffer *buf;
+
+               flush_bufs(vq, true);
+               while ((buf = virtqueue_detach_unused_buf(vq)))
+                       free_buf(buf, true);
+       }
        portdev->vdev->config->del_vqs(portdev->vdev);
        kfree(portdev->in_vqs);
        kfree(portdev->out_vqs);
 }
 
-static void remove_controlq_data(struct ports_device *portdev)
+static void virtcons_remove(struct virtio_device *vdev)
 {
-       struct port_buffer *buf;
-       unsigned int len;
+       struct ports_device *portdev;
+       struct port *port, *port2;
 
-       if (!use_multiport(portdev))
-               return;
+       portdev = vdev->priv;
 
-       while ((buf = virtqueue_get_buf(portdev->c_ivq, &len)))
-               free_buf(buf, true);
+       spin_lock_irq(&pdrvdata_lock);
+       list_del(&portdev->list);
+       spin_unlock_irq(&pdrvdata_lock);
 
-       while ((buf = virtqueue_detach_unused_buf(portdev->c_ivq)))
-               free_buf(buf, true);
+       /* Disable interrupts for vqs */
+       vdev->config->reset(vdev);
+       /* Finish up work that's lined up */
+       if (use_multiport(portdev))
+               cancel_work_sync(&portdev->control_work);
+       else
+               cancel_work_sync(&portdev->config_work);
+
+       list_for_each_entry_safe(port, port2, &portdev->ports, list)
+               unplug_port(port);
+
+       unregister_chrdev(portdev->chr_major, "virtio-portsdev");
+
+       /*
+        * When yanking out a device, we immediately lose the
+        * (device-side) queues.  So there's no point in keeping the
+        * guest side around till we drop our final reference.  This
+        * also means that any ports which are in an open state will
+        * have to just stop using the port, as the vqs are going
+        * away.
+        */
+       remove_vqs(portdev);
+       kfree(portdev);
 }
 
 /*
@@ -2070,6 +2090,7 @@ static int virtcons_probe(struct virtio_device *vdev)
 
        spin_lock_init(&portdev->ports_lock);
        INIT_LIST_HEAD(&portdev->ports);
+       INIT_LIST_HEAD(&portdev->list);
 
        virtio_device_ready(portdev->vdev);
 
@@ -2087,8 +2108,15 @@ static int virtcons_probe(struct virtio_device *vdev)
                if (!nr_added_bufs) {
                        dev_err(&vdev->dev,
                                "Error allocating buffers for control queue\n");
-                       err = -ENOMEM;
-                       goto free_vqs;
+                       /*
+                        * The host might want to notify mgmt sw about device
+                        * add failure.
+                        */
+                       __send_control_msg(portdev, VIRTIO_CONSOLE_BAD_ID,
+                                          VIRTIO_CONSOLE_DEVICE_READY, 0);
+                       /* Device was functional: we need full cleanup. */
+                       virtcons_remove(vdev);
+                       return -ENOMEM;
                }
        } else {
                /*
@@ -2119,11 +2147,6 @@ static int virtcons_probe(struct virtio_device *vdev)
 
        return 0;
 
-free_vqs:
-       /* The host might want to notify mgmt sw about device add failure */
-       __send_control_msg(portdev, VIRTIO_CONSOLE_BAD_ID,
-                          VIRTIO_CONSOLE_DEVICE_READY, 0);
-       remove_vqs(portdev);
 free_chrdev:
        unregister_chrdev(portdev->chr_major, "virtio-portsdev");
 free:
@@ -2132,43 +2155,6 @@ static int virtcons_probe(struct virtio_device *vdev)
        return err;
 }
 
-static void virtcons_remove(struct virtio_device *vdev)
-{
-       struct ports_device *portdev;
-       struct port *port, *port2;
-
-       portdev = vdev->priv;
-
-       spin_lock_irq(&pdrvdata_lock);
-       list_del(&portdev->list);
-       spin_unlock_irq(&pdrvdata_lock);
-
-       /* Disable interrupts for vqs */
-       vdev->config->reset(vdev);
-       /* Finish up work that's lined up */
-       if (use_multiport(portdev))
-               cancel_work_sync(&portdev->control_work);
-       else
-               cancel_work_sync(&portdev->config_work);
-
-       list_for_each_entry_safe(port, port2, &portdev->ports, list)
-               unplug_port(port);
-
-       unregister_chrdev(portdev->chr_major, "virtio-portsdev");
-
-       /*
-        * When yanking out a device, we immediately lose the
-        * (device-side) queues.  So there's no point in keeping the
-        * guest side around till we drop our final reference.  This
-        * also means that any ports which are in an open state will
-        * have to just stop using the port, as the vqs are going
-        * away.
-        */
-       remove_controlq_data(portdev);
-       remove_vqs(portdev);
-       kfree(portdev);
-}
-
 static struct virtio_device_id id_table[] = {
        { VIRTIO_ID_CONSOLE, VIRTIO_DEV_ANY_ID },
        { 0 },
@@ -2209,7 +2195,6 @@ static int virtcons_freeze(struct virtio_device *vdev)
         */
        if (use_multiport(portdev))
                virtqueue_disable_cb(portdev->c_ivq);
-       remove_controlq_data(portdev);
 
        list_for_each_entry(port, &portdev->ports, list) {
                virtqueue_disable_cb(port->in_vq);
index 41492e980ef4c0deb4fd6f62020d76137c912b42..34968a381d0fdc878d98e7fab60762f2f6588ec5 100644 (file)
@@ -266,15 +266,13 @@ config COMMON_CLK_STM32MP157
          Support for stm32mp157 SoC family clocks
 
 config COMMON_CLK_STM32F
-       bool "Clock driver for stm32f4 and stm32f7 SoC families"
-       depends on MACH_STM32F429 || MACH_STM32F469 || MACH_STM32F746
+       def_bool COMMON_CLK && (MACH_STM32F429 || MACH_STM32F469 || MACH_STM32F746)
        help
        ---help---
          Support for stm32f4 and stm32f7 SoC families clocks
 
 config COMMON_CLK_STM32H7
-       bool "Clock driver for stm32h7 SoC family"
-       depends on MACH_STM32H743
+       def_bool COMMON_CLK && MACH_STM32H743
        help
        ---help---
          Support for stm32h7 SoC family clocks
index c58019750b7e186da9d503b486d2fd6510de749f..a2f8c42e527a2622d18953b8be7f432d8f28754a 100644 (file)
@@ -541,7 +541,7 @@ static int cs2000_probe(struct i2c_client *client,
        return ret;
 }
 
-static int cs2000_resume(struct device *dev)
+static int __maybe_unused cs2000_resume(struct device *dev)
 {
        struct cs2000_priv *priv = dev_get_drvdata(dev);
 
index ac4a042f8658258dbcbfdbc058e8b87c8412a51b..1628b93655edf809ec40d1e733a0b25ee7a8b26c 100644 (file)
@@ -112,10 +112,18 @@ static int clk_mux_set_parent(struct clk_hw *hw, u8 index)
        return 0;
 }
 
+static int clk_mux_determine_rate(struct clk_hw *hw,
+                                 struct clk_rate_request *req)
+{
+       struct clk_mux *mux = to_clk_mux(hw);
+
+       return clk_mux_determine_rate_flags(hw, req, mux->flags);
+}
+
 const struct clk_ops clk_mux_ops = {
        .get_parent = clk_mux_get_parent,
        .set_parent = clk_mux_set_parent,
-       .determine_rate = __clk_mux_determine_rate,
+       .determine_rate = clk_mux_determine_rate,
 };
 EXPORT_SYMBOL_GPL(clk_mux_ops);
 
index f1d5967b4b39a8daeec89f47291ec9d25339832f..edd3cf4514010ce9ed66e66199c0b648ecadf8d1 100644 (file)
@@ -216,7 +216,7 @@ static const char * const usart1_src[] = {
        "pclk5", "pll3_q", "ck_hsi", "ck_csi", "pll4_q", "ck_hse"
 };
 
-const char * const usart234578_src[] = {
+static const char * const usart234578_src[] = {
        "pclk1", "pll4_q", "ck_hsi", "ck_csi", "ck_hse"
 };
 
@@ -224,10 +224,6 @@ static const char * const usart6_src[] = {
        "pclk2", "pll4_q", "ck_hsi", "ck_csi", "ck_hse"
 };
 
-static const char * const dfsdm_src[] = {
-       "pclk2", "ck_mcu"
-};
-
 static const char * const fdcan_src[] = {
        "ck_hse", "pll3_q", "pll4_q"
 };
@@ -316,10 +312,8 @@ struct stm32_clk_mgate {
 struct clock_config {
        u32 id;
        const char *name;
-       union {
-               const char *parent_name;
-               const char * const *parent_names;
-       };
+       const char *parent_name;
+       const char * const *parent_names;
        int num_parents;
        unsigned long flags;
        void *cfg;
@@ -469,7 +463,7 @@ static void mp1_gate_clk_disable(struct clk_hw *hw)
        }
 }
 
-const struct clk_ops mp1_gate_clk_ops = {
+static const struct clk_ops mp1_gate_clk_ops = {
        .enable         = mp1_gate_clk_enable,
        .disable        = mp1_gate_clk_disable,
        .is_enabled     = clk_gate_is_enabled,
@@ -698,7 +692,7 @@ static void mp1_mgate_clk_disable(struct clk_hw *hw)
                mp1_gate_clk_disable(hw);
 }
 
-const struct clk_ops mp1_mgate_clk_ops = {
+static const struct clk_ops mp1_mgate_clk_ops = {
        .enable         = mp1_mgate_clk_enable,
        .disable        = mp1_mgate_clk_disable,
        .is_enabled     = clk_gate_is_enabled,
@@ -732,7 +726,7 @@ static int clk_mmux_set_parent(struct clk_hw *hw, u8 index)
        return 0;
 }
 
-const struct clk_ops clk_mmux_ops = {
+static const struct clk_ops clk_mmux_ops = {
        .get_parent     = clk_mmux_get_parent,
        .set_parent     = clk_mmux_set_parent,
        .determine_rate = __clk_mux_determine_rate,
@@ -1048,10 +1042,10 @@ struct stm32_pll_cfg {
        u32 offset;
 };
 
-struct clk_hw *_clk_register_pll(struct device *dev,
-                                struct clk_hw_onecell_data *clk_data,
-                                void __iomem *base, spinlock_t *lock,
-                                const struct clock_config *cfg)
+static struct clk_hw *_clk_register_pll(struct device *dev,
+                                       struct clk_hw_onecell_data *clk_data,
+                                       void __iomem *base, spinlock_t *lock,
+                                       const struct clock_config *cfg)
 {
        struct stm32_pll_cfg *stm_pll_cfg = cfg->cfg;
 
@@ -1405,7 +1399,8 @@ enum {
        G_USBH,
        G_ETHSTP,
        G_RTCAPB,
-       G_TZC,
+       G_TZC1,
+       G_TZC2,
        G_TZPC,
        G_IWDG1,
        G_BSEC,
@@ -1417,7 +1412,7 @@ enum {
        G_LAST
 };
 
-struct stm32_mgate mp1_mgate[G_LAST];
+static struct stm32_mgate mp1_mgate[G_LAST];
 
 #define _K_GATE(_id, _gate_offset, _gate_bit_idx, _gate_flags,\
               _mgate, _ops)\
@@ -1440,7 +1435,7 @@ struct stm32_mgate mp1_mgate[G_LAST];
               &mp1_mgate[_id], &mp1_mgate_clk_ops)
 
 /* Peripheral gates */
-struct stm32_gate_cfg per_gate_cfg[G_LAST] = {
+static struct stm32_gate_cfg per_gate_cfg[G_LAST] = {
        /* Multi gates */
        K_GATE(G_MDIO,          RCC_APB1ENSETR, 31, 0),
        K_MGATE(G_DAC12,        RCC_APB1ENSETR, 29, 0),
@@ -1506,7 +1501,8 @@ struct stm32_gate_cfg per_gate_cfg[G_LAST] = {
        K_GATE(G_BSEC,          RCC_APB5ENSETR, 16, 0),
        K_GATE(G_IWDG1,         RCC_APB5ENSETR, 15, 0),
        K_GATE(G_TZPC,          RCC_APB5ENSETR, 13, 0),
-       K_GATE(G_TZC,           RCC_APB5ENSETR, 12, 0),
+       K_GATE(G_TZC2,          RCC_APB5ENSETR, 12, 0),
+       K_GATE(G_TZC1,          RCC_APB5ENSETR, 11, 0),
        K_GATE(G_RTCAPB,        RCC_APB5ENSETR, 8, 0),
        K_MGATE(G_USART1,       RCC_APB5ENSETR, 4, 0),
        K_MGATE(G_I2C6,         RCC_APB5ENSETR, 3, 0),
@@ -1600,7 +1596,7 @@ enum {
        M_LAST
 };
 
-struct stm32_mmux ker_mux[M_LAST];
+static struct stm32_mmux ker_mux[M_LAST];
 
 #define _K_MUX(_id, _offset, _shift, _width, _mux_flags, _mmux, _ops)\
        [_id] = {\
@@ -1623,7 +1619,7 @@ struct stm32_mmux ker_mux[M_LAST];
        _K_MUX(_id, _offset, _shift, _width, _mux_flags,\
                        &ker_mux[_id], &clk_mmux_ops)
 
-const struct stm32_mux_cfg ker_mux_cfg[M_LAST] = {
+static const struct stm32_mux_cfg ker_mux_cfg[M_LAST] = {
        /* Kernel multi mux */
        K_MMUX(M_SDMMC12, RCC_SDMMC12CKSELR, 0, 3, 0),
        K_MMUX(M_SPI23, RCC_SPI2S23CKSELR, 0, 3, 0),
@@ -1860,7 +1856,8 @@ static const struct clock_config stm32mp1_clock_cfg[] = {
        PCLK(USART1, "usart1", "pclk5", 0, G_USART1),
        PCLK(RTCAPB, "rtcapb", "pclk5", CLK_IGNORE_UNUSED |
             CLK_IS_CRITICAL, G_RTCAPB),
-       PCLK(TZC, "tzc", "pclk5", CLK_IGNORE_UNUSED, G_TZC),
+       PCLK(TZC1, "tzc1", "ck_axi", CLK_IGNORE_UNUSED, G_TZC1),
+       PCLK(TZC2, "tzc2", "ck_axi", CLK_IGNORE_UNUSED, G_TZC2),
        PCLK(TZPC, "tzpc", "pclk5", CLK_IGNORE_UNUSED, G_TZPC),
        PCLK(IWDG1, "iwdg1", "pclk5", 0, G_IWDG1),
        PCLK(BSEC, "bsec", "pclk5", CLK_IGNORE_UNUSED, G_BSEC),
@@ -1916,8 +1913,7 @@ static const struct clock_config stm32mp1_clock_cfg[] = {
        KCLK(RNG1_K, "rng1_k", rng_src, 0, G_RNG1, M_RNG1),
        KCLK(RNG2_K, "rng2_k", rng_src, 0, G_RNG2, M_RNG2),
        KCLK(USBPHY_K, "usbphy_k", usbphy_src, 0, G_USBPHY, M_USBPHY),
-       KCLK(STGEN_K, "stgen_k",  stgen_src, CLK_IGNORE_UNUSED,
-            G_STGEN, M_STGEN),
+       KCLK(STGEN_K, "stgen_k", stgen_src, CLK_IS_CRITICAL, G_STGEN, M_STGEN),
        KCLK(SPDIF_K, "spdif_k", spdif_src, 0, G_SPDIF, M_SPDIF),
        KCLK(SPI1_K, "spi1_k", spi123_src, 0, G_SPI1, M_SPI1),
        KCLK(SPI2_K, "spi2_k", spi123_src, 0, G_SPI2, M_SPI23),
@@ -1948,8 +1944,8 @@ static const struct clock_config stm32mp1_clock_cfg[] = {
        KCLK(FDCAN_K, "fdcan_k", fdcan_src, 0, G_FDCAN, M_FDCAN),
        KCLK(SAI1_K, "sai1_k", sai_src, 0, G_SAI1, M_SAI1),
        KCLK(SAI2_K, "sai2_k", sai2_src, 0, G_SAI2, M_SAI2),
-       KCLK(SAI3_K, "sai3_k", sai_src, 0, G_SAI2, M_SAI3),
-       KCLK(SAI4_K, "sai4_k", sai_src, 0, G_SAI2, M_SAI4),
+       KCLK(SAI3_K, "sai3_k", sai_src, 0, G_SAI3, M_SAI3),
+       KCLK(SAI4_K, "sai4_k", sai_src, 0, G_SAI4, M_SAI4),
        KCLK(ADC12_K, "adc12_k", adc12_src, 0, G_ADC12, M_ADC12),
        KCLK(DSI_K, "dsi_k", dsi_src, 0, G_DSI, M_DSI),
        KCLK(ADFSDM_K, "adfsdm_k", sai_src, 0, G_ADFSDM, M_SAI1),
@@ -1992,10 +1988,6 @@ static const struct clock_config stm32mp1_clock_cfg[] = {
                  _DIV(RCC_MCO2CFGR, 4, 4, 0, NULL)),
 
        /* Debug clocks */
-       FIXED_FACTOR(NO_ID, "ck_axi_div2", "ck_axi", 0, 1, 2),
-
-       GATE(DBG, "ck_apb_dbg", "ck_axi_div2", 0, RCC_DBGCFGR, 8, 0),
-
        GATE(CK_DBG, "ck_sys_dbg", "ck_axi", 0, RCC_DBGCFGR, 8, 0),
 
        COMPOSITE(CK_TRACE, "ck_trace", ck_trace_src, CLK_OPS_PARENT_ENABLE,
index ea67ac81c6f97db113cef4977936776d1673ae3f..7af555f0e60c1b7d4ac31020a0765a6674064037 100644 (file)
@@ -426,9 +426,9 @@ static bool mux_is_better_rate(unsigned long rate, unsigned long now,
        return now <= rate && now > best;
 }
 
-static int
-clk_mux_determine_rate_flags(struct clk_hw *hw, struct clk_rate_request *req,
-                            unsigned long flags)
+int clk_mux_determine_rate_flags(struct clk_hw *hw,
+                                struct clk_rate_request *req,
+                                unsigned long flags)
 {
        struct clk_core *core = hw->core, *parent, *best_parent = NULL;
        int i, num_parents, ret;
@@ -488,6 +488,7 @@ clk_mux_determine_rate_flags(struct clk_hw *hw, struct clk_rate_request *req,
 
        return 0;
 }
+EXPORT_SYMBOL_GPL(clk_mux_determine_rate_flags);
 
 struct clk *__clk_lookup(const char *name)
 {
index 114ecbb94ec5ed8d5e364c981d740ecc305d553a..12320118f8deea7dabba7a4d7af975645fb1d48f 100644 (file)
@@ -464,7 +464,7 @@ static void __init imx6ul_clocks_init(struct device_node *ccm_node)
        clk_set_rate(clks[IMX6UL_CLK_AHB], 99000000);
 
        /* Change periph_pre clock to pll2_bus to adjust AXI rate to 264MHz */
-       clk_set_parent(clks[IMX6UL_CLK_PERIPH_CLK2_SEL], clks[IMX6UL_CLK_PLL3_USB_OTG]);
+       clk_set_parent(clks[IMX6UL_CLK_PERIPH_CLK2_SEL], clks[IMX6UL_CLK_OSC]);
        clk_set_parent(clks[IMX6UL_CLK_PERIPH], clks[IMX6UL_CLK_PERIPH_CLK2]);
        clk_set_parent(clks[IMX6UL_CLK_PERIPH_PRE], clks[IMX6UL_CLK_PLL2_BUS]);
        clk_set_parent(clks[IMX6UL_CLK_PERIPH], clks[IMX6UL_CLK_PERIPH_PRE]);
index 3645fdb62343904995fbeb7b45ef87d3f4ac0e2b..ab7a3556f5b2f53505731d3cc74d6b1b4a7c5ff8 100644 (file)
@@ -153,10 +153,19 @@ static int clk_regmap_mux_set_parent(struct clk_hw *hw, u8 index)
                                  val << mux->shift);
 }
 
+static int clk_regmap_mux_determine_rate(struct clk_hw *hw,
+                                        struct clk_rate_request *req)
+{
+       struct clk_regmap *clk = to_clk_regmap(hw);
+       struct clk_regmap_mux_data *mux = clk_get_regmap_mux_data(clk);
+
+       return clk_mux_determine_rate_flags(hw, req, mux->flags);
+}
+
 const struct clk_ops clk_regmap_mux_ops = {
        .get_parent = clk_regmap_mux_get_parent,
        .set_parent = clk_regmap_mux_set_parent,
-       .determine_rate = __clk_mux_determine_rate,
+       .determine_rate = clk_regmap_mux_determine_rate,
 };
 EXPORT_SYMBOL_GPL(clk_regmap_mux_ops);
 
index 0be78383f2574ce30458ceb95051eeb215cdbc72..badc4c22b4ee99e800869923ccd4b65abfced67c 100644 (file)
@@ -17,8 +17,6 @@
 #define AO_RTC_ALT_CLK_CNTL0   0x94
 #define AO_RTC_ALT_CLK_CNTL1   0x98
 
-extern const struct clk_ops meson_aoclk_gate_regmap_ops;
-
 struct aoclk_cec_32k {
        struct clk_hw hw;
        struct regmap *regmap;
index cc2992493e0b6cd1ba6ad53154f2476454eb2a08..d0524ec71aaddf8c83eb0c3746b245cdfa974403 100644 (file)
@@ -253,7 +253,7 @@ static struct clk_fixed_factor meson8b_fclk_div3_div = {
        .mult = 1,
        .div = 3,
        .hw.init = &(struct clk_init_data){
-               .name = "fclk_div_div3",
+               .name = "fclk_div3_div",
                .ops = &clk_fixed_factor_ops,
                .parent_names = (const char *[]){ "fixed_pll" },
                .num_parents = 1,
@@ -632,7 +632,8 @@ static struct clk_regmap meson8b_cpu_clk = {
        .hw.init = &(struct clk_init_data){
                .name = "cpu_clk",
                .ops = &clk_regmap_mux_ro_ops,
-               .parent_names = (const char *[]){ "xtal", "cpu_out_sel" },
+               .parent_names = (const char *[]){ "xtal",
+                                                 "cpu_scale_out_sel" },
                .num_parents = 2,
                .flags = (CLK_SET_RATE_PARENT |
                          CLK_SET_RATE_NO_REPARENT),
index 05d97a6871d8f80e5e7e61d7735c98981a33ef66..6c8318470b48774efefd09437dac7cb365c92d33 100644 (file)
@@ -114,7 +114,7 @@ static int tpm_set_next_event(unsigned long delta,
         * of writing CNT registers which may cause the min_delta event got
         * missed, so we need add a ETIME check here in case it happened.
         */
-       return (int)((next - now) <= 0) ? -ETIME : 0;
+       return (int)(next - now) <= 0 ? -ETIME : 0;
 }
 
 static int tpm_set_state_oneshot(struct clock_event_device *evt)
index a782ce87715c5ccc5270428d40ac0a4bd0832340..ed5e42461094476d76bcee450794ab81a7fe6d5a 100644 (file)
@@ -262,6 +262,8 @@ void proc_coredump_connector(struct task_struct *task)
        ev->what = PROC_EVENT_COREDUMP;
        ev->event_data.coredump.process_pid = task->pid;
        ev->event_data.coredump.process_tgid = task->tgid;
+       ev->event_data.coredump.parent_pid = task->real_parent->pid;
+       ev->event_data.coredump.parent_tgid = task->real_parent->tgid;
 
        memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id));
        msg->ack = 0; /* not used */
@@ -288,6 +290,8 @@ void proc_exit_connector(struct task_struct *task)
        ev->event_data.exit.process_tgid = task->tgid;
        ev->event_data.exit.exit_code = task->exit_code;
        ev->event_data.exit.exit_signal = task->exit_signal;
+       ev->event_data.exit.parent_pid = task->real_parent->pid;
+       ev->event_data.exit.parent_tgid = task->real_parent->tgid;
 
        memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id));
        msg->ack = 0; /* not used */
index 7f56fe5183f27817c0e818cf216890b520562243..96b35b8b36060f4af9eed2ac82c46aa5d2ea08be 100644 (file)
@@ -20,7 +20,7 @@ config ACPI_CPPC_CPUFREQ
 
 config ARM_ARMADA_37XX_CPUFREQ
        tristate "Armada 37xx CPUFreq support"
-       depends on ARCH_MVEBU
+       depends on ARCH_MVEBU && CPUFREQ_DT
        help
          This adds the CPUFreq driver support for Marvell Armada 37xx SoCs.
          The Armada 37xx PMU supports 4 frequency and VDD levels.
@@ -71,16 +71,6 @@ config ARM_BRCMSTB_AVS_CPUFREQ
 
          Say Y, if you have a Broadcom SoC with AVS support for DFS or DVFS.
 
-config ARM_BRCMSTB_AVS_CPUFREQ_DEBUG
-       bool "Broadcom STB AVS CPUfreq driver sysfs debug capability"
-       depends on ARM_BRCMSTB_AVS_CPUFREQ
-       help
-         Enabling this option turns on debug support via sysfs under
-         /sys/kernel/debug/brcmstb-avs-cpufreq. It is possible to read all and
-         write some AVS mailbox registers through sysfs entries.
-
-         If in doubt, say N.
-
 config ARM_EXYNOS5440_CPUFREQ
        tristate "SAMSUNG EXYNOS5440"
        depends on SOC_EXYNOS5440
index 6cdac1aaf23c5de81869f15d5591bb23f2beacd2..b07559b9ed99a6e256ba5afce20b5e156304ade7 100644 (file)
 #include <linux/platform_device.h>
 #include <linux/semaphore.h>
 
-#ifdef CONFIG_ARM_BRCMSTB_AVS_CPUFREQ_DEBUG
-#include <linux/ctype.h>
-#include <linux/debugfs.h>
-#include <linux/slab.h>
-#include <linux/uaccess.h>
-#endif
-
 /* Max number of arguments AVS calls take */
 #define AVS_MAX_CMD_ARGS       4
 /*
@@ -182,88 +175,11 @@ struct private_data {
        void __iomem *base;
        void __iomem *avs_intr_base;
        struct device *dev;
-#ifdef CONFIG_ARM_BRCMSTB_AVS_CPUFREQ_DEBUG
-       struct dentry *debugfs;
-#endif
        struct completion done;
        struct semaphore sem;
        struct pmap pmap;
 };
 
-#ifdef CONFIG_ARM_BRCMSTB_AVS_CPUFREQ_DEBUG
-
-enum debugfs_format {
-       DEBUGFS_NORMAL,
-       DEBUGFS_FLOAT,
-       DEBUGFS_REV,
-};
-
-struct debugfs_data {
-       struct debugfs_entry *entry;
-       struct private_data *priv;
-};
-
-struct debugfs_entry {
-       char *name;
-       u32 offset;
-       fmode_t mode;
-       enum debugfs_format format;
-};
-
-#define DEBUGFS_ENTRY(name, mode, format)      { \
-       #name, AVS_MBOX_##name, mode, format \
-}
-
-/*
- * These are used for debugfs only. Otherwise we use AVS_MBOX_PARAM() directly.
- */
-#define AVS_MBOX_PARAM1                AVS_MBOX_PARAM(0)
-#define AVS_MBOX_PARAM2                AVS_MBOX_PARAM(1)
-#define AVS_MBOX_PARAM3                AVS_MBOX_PARAM(2)
-#define AVS_MBOX_PARAM4                AVS_MBOX_PARAM(3)
-
-/*
- * This table stores the name, access permissions and offset for each hardware
- * register and is used to generate debugfs entries.
- */
-static struct debugfs_entry debugfs_entries[] = {
-       DEBUGFS_ENTRY(COMMAND, S_IWUSR, DEBUGFS_NORMAL),
-       DEBUGFS_ENTRY(STATUS, S_IWUSR, DEBUGFS_NORMAL),
-       DEBUGFS_ENTRY(VOLTAGE0, 0, DEBUGFS_FLOAT),
-       DEBUGFS_ENTRY(TEMP0, 0, DEBUGFS_FLOAT),
-       DEBUGFS_ENTRY(PV0, 0, DEBUGFS_FLOAT),
-       DEBUGFS_ENTRY(MV0, 0, DEBUGFS_FLOAT),
-       DEBUGFS_ENTRY(PARAM1, S_IWUSR, DEBUGFS_NORMAL),
-       DEBUGFS_ENTRY(PARAM2, S_IWUSR, DEBUGFS_NORMAL),
-       DEBUGFS_ENTRY(PARAM3, S_IWUSR, DEBUGFS_NORMAL),
-       DEBUGFS_ENTRY(PARAM4, S_IWUSR, DEBUGFS_NORMAL),
-       DEBUGFS_ENTRY(REVISION, 0, DEBUGFS_REV),
-       DEBUGFS_ENTRY(PSTATE, 0, DEBUGFS_NORMAL),
-       DEBUGFS_ENTRY(HEARTBEAT, 0, DEBUGFS_NORMAL),
-       DEBUGFS_ENTRY(MAGIC, S_IWUSR, DEBUGFS_NORMAL),
-       DEBUGFS_ENTRY(SIGMA_HVT, 0, DEBUGFS_NORMAL),
-       DEBUGFS_ENTRY(SIGMA_SVT, 0, DEBUGFS_NORMAL),
-       DEBUGFS_ENTRY(VOLTAGE1, 0, DEBUGFS_FLOAT),
-       DEBUGFS_ENTRY(TEMP1, 0, DEBUGFS_FLOAT),
-       DEBUGFS_ENTRY(PV1, 0, DEBUGFS_FLOAT),
-       DEBUGFS_ENTRY(MV1, 0, DEBUGFS_FLOAT),
-       DEBUGFS_ENTRY(FREQUENCY, 0, DEBUGFS_NORMAL),
-};
-
-static int brcm_avs_target_index(struct cpufreq_policy *, unsigned int);
-
-static char *__strtolower(char *s)
-{
-       char *p;
-
-       for (p = s; *p; p++)
-               *p = tolower(*p);
-
-       return s;
-}
-
-#endif /* CONFIG_ARM_BRCMSTB_AVS_CPUFREQ_DEBUG */
-
 static void __iomem *__map_region(const char *name)
 {
        struct device_node *np;
@@ -516,238 +432,6 @@ brcm_avs_get_freq_table(struct device *dev, struct private_data *priv)
        return table;
 }
 
-#ifdef CONFIG_ARM_BRCMSTB_AVS_CPUFREQ_DEBUG
-
-#define MANT(x)        (unsigned int)(abs((x)) / 1000)
-#define FRAC(x)        (unsigned int)(abs((x)) - abs((x)) / 1000 * 1000)
-
-static int brcm_avs_debug_show(struct seq_file *s, void *data)
-{
-       struct debugfs_data *dbgfs = s->private;
-       void __iomem *base;
-       u32 val, offset;
-
-       if (!dbgfs) {
-               seq_puts(s, "No device pointer\n");
-               return 0;
-       }
-
-       base = dbgfs->priv->base;
-       offset = dbgfs->entry->offset;
-       val = readl(base + offset);
-       switch (dbgfs->entry->format) {
-       case DEBUGFS_NORMAL:
-               seq_printf(s, "%u\n", val);
-               break;
-       case DEBUGFS_FLOAT:
-               seq_printf(s, "%d.%03d\n", MANT(val), FRAC(val));
-               break;
-       case DEBUGFS_REV:
-               seq_printf(s, "%c.%c.%c.%c\n", (val >> 24 & 0xff),
-                          (val >> 16 & 0xff), (val >> 8 & 0xff),
-                          val & 0xff);
-               break;
-       }
-       seq_printf(s, "0x%08x\n", val);
-
-       return 0;
-}
-
-#undef MANT
-#undef FRAC
-
-static ssize_t brcm_avs_seq_write(struct file *file, const char __user *buf,
-                                 size_t size, loff_t *ppos)
-{
-       struct seq_file *s = file->private_data;
-       struct debugfs_data *dbgfs = s->private;
-       struct private_data *priv = dbgfs->priv;
-       void __iomem *base, *avs_intr_base;
-       bool use_issue_command = false;
-       unsigned long val, offset;
-       char str[128];
-       int ret;
-       char *str_ptr = str;
-
-       if (size >= sizeof(str))
-               return -E2BIG;
-
-       memset(str, 0, sizeof(str));
-       ret = copy_from_user(str, buf, size);
-       if (ret)
-               return ret;
-
-       base = priv->base;
-       avs_intr_base = priv->avs_intr_base;
-       offset = dbgfs->entry->offset;
-       /*
-        * Special case writing to "command" entry only: if the string starts
-        * with a 'c', we use the driver's __issue_avs_command() function.
-        * Otherwise, we perform a raw write. This should allow testing of raw
-        * access as well as using the higher level function. (Raw access
-        * doesn't clear the firmware return status after issuing the command.)
-        */
-       if (str_ptr[0] == 'c' && offset == AVS_MBOX_COMMAND) {
-               use_issue_command = true;
-               str_ptr++;
-       }
-       if (kstrtoul(str_ptr, 0, &val) != 0)
-               return -EINVAL;
-
-       /*
-        * Setting the P-state is a special case. We need to update the CPU
-        * frequency we report.
-        */
-       if (val == AVS_CMD_SET_PSTATE) {
-               struct cpufreq_policy *policy;
-               unsigned int pstate;
-
-               policy = cpufreq_cpu_get(smp_processor_id());
-               /* Read back the P-state we are about to set */
-               pstate = readl(base + AVS_MBOX_PARAM(0));
-               if (use_issue_command) {
-                       ret = brcm_avs_target_index(policy, pstate);
-                       return ret ? ret : size;
-               }
-               policy->cur = policy->freq_table[pstate].frequency;
-       }
-
-       if (use_issue_command) {
-               ret = __issue_avs_command(priv, val, false, NULL);
-       } else {
-               /* Locking here is not perfect, but is only for debug. */
-               ret = down_interruptible(&priv->sem);
-               if (ret)
-                       return ret;
-
-               writel(val, base + offset);
-               /* We have to wake up the firmware to process a command. */
-               if (offset == AVS_MBOX_COMMAND)
-                       writel(AVS_CPU_L2_INT_MASK,
-                              avs_intr_base + AVS_CPU_L2_SET0);
-               up(&priv->sem);
-       }
-
-       return ret ? ret : size;
-}
-
-static struct debugfs_entry *__find_debugfs_entry(const char *name)
-{
-       int i;
-
-       for (i = 0; i < ARRAY_SIZE(debugfs_entries); i++)
-               if (strcasecmp(debugfs_entries[i].name, name) == 0)
-                       return &debugfs_entries[i];
-
-       return NULL;
-}
-
-static int brcm_avs_debug_open(struct inode *inode, struct file *file)
-{
-       struct debugfs_data *data;
-       fmode_t fmode;
-       int ret;
-
-       /*
-        * seq_open(), which is called by single_open(), clears "write" access.
-        * We need write access to some files, so we preserve our access mode
-        * and restore it.
-        */
-       fmode = file->f_mode;
-       /*
-        * Check access permissions even for root. We don't want to be writing
-        * to read-only registers. Access for regular users has already been
-        * checked by the VFS layer.
-        */
-       if ((fmode & FMODE_WRITER) && !(inode->i_mode & S_IWUSR))
-               return -EACCES;
-
-       data = kmalloc(sizeof(*data), GFP_KERNEL);
-       if (!data)
-               return -ENOMEM;
-       /*
-        * We use the same file system operations for all our debug files. To
-        * produce specific output, we look up the file name upon opening a
-        * debugfs entry and map it to a memory offset. This offset is then used
-        * in the generic "show" function to read a specific register.
-        */
-       data->entry = __find_debugfs_entry(file->f_path.dentry->d_iname);
-       data->priv = inode->i_private;
-
-       ret = single_open(file, brcm_avs_debug_show, data);
-       if (ret)
-               kfree(data);
-       file->f_mode = fmode;
-
-       return ret;
-}
-
-static int brcm_avs_debug_release(struct inode *inode, struct file *file)
-{
-       struct seq_file *seq_priv = file->private_data;
-       struct debugfs_data *data = seq_priv->private;
-
-       kfree(data);
-       return single_release(inode, file);
-}
-
-static const struct file_operations brcm_avs_debug_ops = {
-       .open           = brcm_avs_debug_open,
-       .read           = seq_read,
-       .write          = brcm_avs_seq_write,
-       .llseek         = seq_lseek,
-       .release        = brcm_avs_debug_release,
-};
-
-static void brcm_avs_cpufreq_debug_init(struct platform_device *pdev)
-{
-       struct private_data *priv = platform_get_drvdata(pdev);
-       struct dentry *dir;
-       int i;
-
-       if (!priv)
-               return;
-
-       dir = debugfs_create_dir(BRCM_AVS_CPUFREQ_NAME, NULL);
-       if (IS_ERR_OR_NULL(dir))
-               return;
-       priv->debugfs = dir;
-
-       for (i = 0; i < ARRAY_SIZE(debugfs_entries); i++) {
-               /*
-                * The DEBUGFS_ENTRY macro generates uppercase strings. We
-                * convert them to lowercase before creating the debugfs
-                * entries.
-                */
-               char *entry = __strtolower(debugfs_entries[i].name);
-               fmode_t mode = debugfs_entries[i].mode;
-
-               if (!debugfs_create_file(entry, S_IFREG | S_IRUGO | mode,
-                                        dir, priv, &brcm_avs_debug_ops)) {
-                       priv->debugfs = NULL;
-                       debugfs_remove_recursive(dir);
-                       break;
-               }
-       }
-}
-
-static void brcm_avs_cpufreq_debug_exit(struct platform_device *pdev)
-{
-       struct private_data *priv = platform_get_drvdata(pdev);
-
-       if (priv && priv->debugfs) {
-               debugfs_remove_recursive(priv->debugfs);
-               priv->debugfs = NULL;
-       }
-}
-
-#else
-
-static void brcm_avs_cpufreq_debug_init(struct platform_device *pdev) {}
-static void brcm_avs_cpufreq_debug_exit(struct platform_device *pdev) {}
-
-#endif /* CONFIG_ARM_BRCMSTB_AVS_CPUFREQ_DEBUG */
-
 /*
  * To ensure the right firmware is running we need to
  *    - check the MAGIC matches what we expect
@@ -1016,11 +700,8 @@ static int brcm_avs_cpufreq_probe(struct platform_device *pdev)
                return ret;
 
        brcm_avs_driver.driver_data = pdev;
-       ret = cpufreq_register_driver(&brcm_avs_driver);
-       if (!ret)
-               brcm_avs_cpufreq_debug_init(pdev);
 
-       return ret;
+       return cpufreq_register_driver(&brcm_avs_driver);
 }
 
 static int brcm_avs_cpufreq_remove(struct platform_device *pdev)
@@ -1032,8 +713,6 @@ static int brcm_avs_cpufreq_remove(struct platform_device *pdev)
        if (ret)
                return ret;
 
-       brcm_avs_cpufreq_debug_exit(pdev);
-
        priv = platform_get_drvdata(pdev);
        iounmap(priv->base);
        iounmap(priv->avs_intr_base);
index bc5fc163087607998bd594db39985dadf7f49a01..b15115a48775cb9c6b18cc94b1aa5decd67667c9 100644 (file)
@@ -126,6 +126,49 @@ static void cppc_cpufreq_stop_cpu(struct cpufreq_policy *policy)
                                cpu->perf_caps.lowest_perf, cpu_num, ret);
 }
 
+/*
+ * The PCC subspace describes the rate at which platform can accept commands
+ * on the shared PCC channel (including READs which do not count towards freq
+ * trasition requests), so ideally we need to use the PCC values as a fallback
+ * if we don't have a platform specific transition_delay_us
+ */
+#ifdef CONFIG_ARM64
+#include <asm/cputype.h>
+
+static unsigned int cppc_cpufreq_get_transition_delay_us(int cpu)
+{
+       unsigned long implementor = read_cpuid_implementor();
+       unsigned long part_num = read_cpuid_part_number();
+       unsigned int delay_us = 0;
+
+       switch (implementor) {
+       case ARM_CPU_IMP_QCOM:
+               switch (part_num) {
+               case QCOM_CPU_PART_FALKOR_V1:
+               case QCOM_CPU_PART_FALKOR:
+                       delay_us = 10000;
+                       break;
+               default:
+                       delay_us = cppc_get_transition_latency(cpu) / NSEC_PER_USEC;
+                       break;
+               }
+               break;
+       default:
+               delay_us = cppc_get_transition_latency(cpu) / NSEC_PER_USEC;
+               break;
+       }
+
+       return delay_us;
+}
+
+#else
+
+static unsigned int cppc_cpufreq_get_transition_delay_us(int cpu)
+{
+       return cppc_get_transition_latency(cpu) / NSEC_PER_USEC;
+}
+#endif
+
 static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy)
 {
        struct cppc_cpudata *cpu;
@@ -162,8 +205,7 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy)
                cpu->perf_caps.highest_perf;
        policy->cpuinfo.max_freq = cppc_dmi_max_khz;
 
-       policy->transition_delay_us = cppc_get_transition_latency(cpu_num) /
-               NSEC_PER_USEC;
+       policy->transition_delay_us = cppc_cpufreq_get_transition_delay_us(cpu_num);
        policy->shared_type = cpu->shared_type;
 
        if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) {
index 0591874856d3c8ba037537bc863c4bbbfe21e0a5..54edaec1e60837dfcc515430012979482652271a 100644 (file)
@@ -679,6 +679,16 @@ void gpstate_timer_handler(struct timer_list *t)
 
        if (!spin_trylock(&gpstates->gpstate_lock))
                return;
+       /*
+        * If the timer has migrated to the different cpu then bring
+        * it back to one of the policy->cpus
+        */
+       if (!cpumask_test_cpu(raw_smp_processor_id(), policy->cpus)) {
+               gpstates->timer.expires = jiffies + msecs_to_jiffies(1);
+               add_timer_on(&gpstates->timer, cpumask_first(policy->cpus));
+               spin_unlock(&gpstates->gpstate_lock);
+               return;
+       }
 
        /*
         * If PMCR was last updated was using fast_swtich then
@@ -718,10 +728,8 @@ void gpstate_timer_handler(struct timer_list *t)
        if (gpstate_idx != gpstates->last_lpstate_idx)
                queue_gpstate_timer(gpstates);
 
+       set_pstate(&freq_data);
        spin_unlock(&gpstates->gpstate_lock);
-
-       /* Timer may get migrated to a different cpu on cpu hot unplug */
-       smp_call_function_any(policy->cpus, set_pstate, &freq_data, 1);
 }
 
 /*
index be8606457f27f02dc046ad1e8f02841742f8e4b4..aff2c1594220a9973e63894255d6873c2a9a53e7 100644 (file)
@@ -19,6 +19,7 @@
 #include <linux/dax.h>
 #include <linux/fs.h>
 #include <linux/mm.h>
+#include <linux/mman.h>
 #include "dax-private.h"
 #include "dax.h"
 
@@ -540,6 +541,7 @@ static const struct file_operations dax_fops = {
        .release = dax_release,
        .get_unmapped_area = dax_get_unmapped_area,
        .mmap = dax_mmap,
+       .mmap_supported_flags = MAP_SYNC,
 };
 
 static void dev_dax_release(struct device *dev)
index d29275b97e8453a1fd7731fe4564bf0a61b9460c..4a828c18099a6e411ff928256992c836f93ebdcb 100644 (file)
@@ -524,6 +524,14 @@ static int bam_alloc_chan(struct dma_chan *chan)
        return 0;
 }
 
+static int bam_pm_runtime_get_sync(struct device *dev)
+{
+       if (pm_runtime_enabled(dev))
+               return pm_runtime_get_sync(dev);
+
+       return 0;
+}
+
 /**
  * bam_free_chan - Frees dma resources associated with specific channel
  * @chan: specified channel
@@ -539,7 +547,7 @@ static void bam_free_chan(struct dma_chan *chan)
        unsigned long flags;
        int ret;
 
-       ret = pm_runtime_get_sync(bdev->dev);
+       ret = bam_pm_runtime_get_sync(bdev->dev);
        if (ret < 0)
                return;
 
@@ -720,7 +728,7 @@ static int bam_pause(struct dma_chan *chan)
        unsigned long flag;
        int ret;
 
-       ret = pm_runtime_get_sync(bdev->dev);
+       ret = bam_pm_runtime_get_sync(bdev->dev);
        if (ret < 0)
                return ret;
 
@@ -746,7 +754,7 @@ static int bam_resume(struct dma_chan *chan)
        unsigned long flag;
        int ret;
 
-       ret = pm_runtime_get_sync(bdev->dev);
+       ret = bam_pm_runtime_get_sync(bdev->dev);
        if (ret < 0)
                return ret;
 
@@ -852,7 +860,7 @@ static irqreturn_t bam_dma_irq(int irq, void *data)
        if (srcs & P_IRQ)
                tasklet_schedule(&bdev->task);
 
-       ret = pm_runtime_get_sync(bdev->dev);
+       ret = bam_pm_runtime_get_sync(bdev->dev);
        if (ret < 0)
                return ret;
 
@@ -969,7 +977,7 @@ static void bam_start_dma(struct bam_chan *bchan)
        if (!vd)
                return;
 
-       ret = pm_runtime_get_sync(bdev->dev);
+       ret = bam_pm_runtime_get_sync(bdev->dev);
        if (ret < 0)
                return;
 
index e6f17825db7942594e657ce9126b2e8612d45c86..2b90606452a2faf1a845b82bfd5bd9ce83b8bd00 100644 (file)
@@ -284,7 +284,7 @@ scmi_clock_info_get(const struct scmi_handle *handle, u32 clk_id)
        struct clock_info *ci = handle->clk_priv;
        struct scmi_clock_info *clk = ci->clk + clk_id;
 
-       if (!clk->name || !clk->name[0])
+       if (!clk->name[0])
                return NULL;
 
        return clk;
index 14b147135a0cbf31be2d5506790bd55fd25f43c9..2455be8cbc4f75b1d8ebe12044d341c23bf85331 100644 (file)
@@ -778,6 +778,7 @@ scmi_create_protocol_device(struct device_node *np, struct scmi_info *info,
        if (scmi_mbox_chan_setup(info, &sdev->dev, prot_id)) {
                dev_err(&sdev->dev, "failed to setup transport\n");
                scmi_device_destroy(sdev);
+               return;
        }
 
        /* setup handle now as the transport is ready */
index b9bd827caa22ca29fe5303bfdf33395831d4128a..1b4d465cc5d9f9f998869f80d3895095ea86c087 100644 (file)
@@ -97,6 +97,16 @@ efi_status_t handle_kernel_image(efi_system_table_t *sys_table_arg,
                u32 offset = !IS_ENABLED(CONFIG_DEBUG_ALIGN_RODATA) ?
                             (phys_seed >> 32) & mask : TEXT_OFFSET;
 
+               /*
+                * With CONFIG_RANDOMIZE_TEXT_OFFSET=y, TEXT_OFFSET may not
+                * be a multiple of EFI_KIMG_ALIGN, and we must ensure that
+                * we preserve the misalignment of 'offset' relative to
+                * EFI_KIMG_ALIGN so that statically allocated objects whose
+                * alignment exceeds PAGE_SIZE appear correctly aligned in
+                * memory.
+                */
+               offset |= TEXT_OFFSET % EFI_KIMG_ALIGN;
+
                /*
                 * If KASLR is enabled, and we have some randomness available,
                 * locate the kernel at a randomized offset in physical memory.
index 14f14efdf0d53ca23c24d7c514cdd4c4ad4b8174..06d212a3d49dd6ebdba471aa1814759ff828d1aa 100644 (file)
@@ -249,7 +249,7 @@ static int altera_ps_probe(struct spi_device *spi)
 
        conf->data = of_id->data;
        conf->spi = spi;
-       conf->config = devm_gpiod_get(&spi->dev, "nconfig", GPIOD_OUT_HIGH);
+       conf->config = devm_gpiod_get(&spi->dev, "nconfig", GPIOD_OUT_LOW);
        if (IS_ERR(conf->config)) {
                dev_err(&spi->dev, "Failed to get config gpio: %ld\n",
                        PTR_ERR(conf->config));
index 77e485557498329193fd291194cc037de55c8160..6f693b7d5220a02b110ae28b8fb6dd5a997d631f 100644 (file)
@@ -384,7 +384,7 @@ static void aspeed_gpio_irq_set_mask(struct irq_data *d, bool set)
        if (set)
                reg |= bit;
        else
-               reg &= bit;
+               reg &= ~bit;
        iowrite32(reg, addr);
 
        spin_unlock_irqrestore(&gpio->lock, flags);
index 1948724d8c369df7baab3e889138408c1e860db8..25d16b2af1c39b305830d837c5aff62ecaf95ae2 100644 (file)
@@ -116,9 +116,9 @@ static int idio_16_gpio_get_multiple(struct gpio_chip *chip,
        unsigned long word_mask;
        const unsigned long port_mask = GENMASK(gpio_reg_size - 1, 0);
        unsigned long port_state;
-       u8 __iomem ports[] = {
-               idio16gpio->reg->out0_7, idio16gpio->reg->out8_15,
-               idio16gpio->reg->in0_7, idio16gpio->reg->in8_15,
+       void __iomem *ports[] = {
+               &idio16gpio->reg->out0_7, &idio16gpio->reg->out8_15,
+               &idio16gpio->reg->in0_7, &idio16gpio->reg->in8_15,
        };
 
        /* clear bits array to a clean slate */
@@ -143,7 +143,7 @@ static int idio_16_gpio_get_multiple(struct gpio_chip *chip,
                }
 
                /* read bits from current gpio port */
-               port_state = ioread8(ports + i);
+               port_state = ioread8(ports[i]);
 
                /* store acquired bits at respective bits array offset */
                bits[word_index] |= port_state << word_offset;
index 835607ecf658efc95af727767f81caaaf9180d9c..f953541e78901b3971242338492af07a570b533d 100644 (file)
@@ -206,10 +206,10 @@ static int idio_24_gpio_get_multiple(struct gpio_chip *chip,
        unsigned long word_mask;
        const unsigned long port_mask = GENMASK(gpio_reg_size - 1, 0);
        unsigned long port_state;
-       u8 __iomem ports[] = {
-               idio24gpio->reg->out0_7, idio24gpio->reg->out8_15,
-               idio24gpio->reg->out16_23, idio24gpio->reg->in0_7,
-               idio24gpio->reg->in8_15, idio24gpio->reg->in16_23,
+       void __iomem *ports[] = {
+               &idio24gpio->reg->out0_7, &idio24gpio->reg->out8_15,
+               &idio24gpio->reg->out16_23, &idio24gpio->reg->in0_7,
+               &idio24gpio->reg->in8_15, &idio24gpio->reg->in16_23,
        };
        const unsigned long out_mode_mask = BIT(1);
 
@@ -217,7 +217,7 @@ static int idio_24_gpio_get_multiple(struct gpio_chip *chip,
        bitmap_zero(bits, chip->ngpio);
 
        /* get bits are evaluated a gpio port register at a time */
-       for (i = 0; i < ARRAY_SIZE(ports); i++) {
+       for (i = 0; i < ARRAY_SIZE(ports) + 1; i++) {
                /* gpio offset in bits array */
                bits_offset = i * gpio_reg_size;
 
@@ -236,7 +236,7 @@ static int idio_24_gpio_get_multiple(struct gpio_chip *chip,
 
                /* read bits from current gpio port (port 6 is TTL GPIO) */
                if (i < 6)
-                       port_state = ioread8(ports + i);
+                       port_state = ioread8(ports[i]);
                else if (ioread8(&idio24gpio->reg->ctl) & out_mode_mask)
                        port_state = ioread8(&idio24gpio->reg->ttl_out0_7);
                else
@@ -301,9 +301,9 @@ static void idio_24_gpio_set_multiple(struct gpio_chip *chip,
        const unsigned long port_mask = GENMASK(gpio_reg_size, 0);
        unsigned long flags;
        unsigned int out_state;
-       u8 __iomem ports[] = {
-               idio24gpio->reg->out0_7, idio24gpio->reg->out8_15,
-               idio24gpio->reg->out16_23
+       void __iomem *ports[] = {
+               &idio24gpio->reg->out0_7, &idio24gpio->reg->out8_15,
+               &idio24gpio->reg->out16_23
        };
        const unsigned long out_mode_mask = BIT(1);
        const unsigned int ttl_offset = 48;
@@ -327,9 +327,9 @@ static void idio_24_gpio_set_multiple(struct gpio_chip *chip,
                raw_spin_lock_irqsave(&idio24gpio->lock, flags);
 
                /* process output lines */
-               out_state = ioread8(ports + i) & ~gpio_mask;
+               out_state = ioread8(ports[i]) & ~gpio_mask;
                out_state |= (*bits >> bits_offset) & gpio_mask;
-               iowrite8(out_state, ports + i);
+               iowrite8(out_state, ports[i]);
 
                raw_spin_unlock_irqrestore(&idio24gpio->lock, flags);
        }
index 43aeb07343ec76fe3eeef0283d8d52327530bfdf..d8ccb500872fa2eacd26a91c473ee02a131cea18 100644 (file)
@@ -497,7 +497,7 @@ static int linehandle_create(struct gpio_device *gdev, void __user *ip)
        struct gpiohandle_request handlereq;
        struct linehandle_state *lh;
        struct file *file;
-       int fd, i, ret;
+       int fd, i, count = 0, ret;
        u32 lflags;
 
        if (copy_from_user(&handlereq, ip, sizeof(handlereq)))
@@ -558,6 +558,7 @@ static int linehandle_create(struct gpio_device *gdev, void __user *ip)
                if (ret)
                        goto out_free_descs;
                lh->descs[i] = desc;
+               count = i;
 
                if (lflags & GPIOHANDLE_REQUEST_ACTIVE_LOW)
                        set_bit(FLAG_ACTIVE_LOW, &desc->flags);
@@ -628,7 +629,7 @@ static int linehandle_create(struct gpio_device *gdev, void __user *ip)
 out_put_unused_fd:
        put_unused_fd(fd);
 out_free_descs:
-       for (; i >= 0; i--)
+       for (i = 0; i < count; i++)
                gpiod_free(lh->descs[i]);
        kfree(lh->label);
 out_free_lh:
@@ -902,7 +903,7 @@ static int lineevent_create(struct gpio_device *gdev, void __user *ip)
        desc = &gdev->descs[offset];
        ret = gpiod_request(desc, le->label);
        if (ret)
-               goto out_free_desc;
+               goto out_free_label;
        le->desc = desc;
        le->eflags = eflags;
 
index 09d35051fdd68689ac00d69504765166e738076f..3fabf9f97022e9c74c2fe0f59e9207b408a6546f 100644 (file)
@@ -419,9 +419,11 @@ int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned ring_id)
 
        if (other) {
                signed long r;
-               r = dma_fence_wait_timeout(other, false, MAX_SCHEDULE_TIMEOUT);
+               r = dma_fence_wait(other, true);
                if (r < 0) {
-                       DRM_ERROR("Error (%ld) waiting for fence!\n", r);
+                       if (r != -ERESTARTSYS)
+                               DRM_ERROR("Error (%ld) waiting for fence!\n", r);
+
                        return r;
                }
        }
index b0e591eaa71a21754e1e2246bf41df3cdd1936cf..e14263fca1c91472ead16fd5bfdceed0b11be81e 100644 (file)
@@ -1459,10 +1459,11 @@ static const u32 sgpr_init_compute_shader[] =
 static const u32 vgpr_init_regs[] =
 {
        mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
-       mmCOMPUTE_RESOURCE_LIMITS, 0,
+       mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
        mmCOMPUTE_NUM_THREAD_X, 256*4,
        mmCOMPUTE_NUM_THREAD_Y, 1,
        mmCOMPUTE_NUM_THREAD_Z, 1,
+       mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */
        mmCOMPUTE_PGM_RSRC2, 20,
        mmCOMPUTE_USER_DATA_0, 0xedcedc00,
        mmCOMPUTE_USER_DATA_1, 0xedcedc01,
@@ -1479,10 +1480,11 @@ static const u32 vgpr_init_regs[] =
 static const u32 sgpr1_init_regs[] =
 {
        mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
-       mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
+       mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
        mmCOMPUTE_NUM_THREAD_X, 256*5,
        mmCOMPUTE_NUM_THREAD_Y, 1,
        mmCOMPUTE_NUM_THREAD_Z, 1,
+       mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
        mmCOMPUTE_PGM_RSRC2, 20,
        mmCOMPUTE_USER_DATA_0, 0xedcedc00,
        mmCOMPUTE_USER_DATA_1, 0xedcedc01,
@@ -1503,6 +1505,7 @@ static const u32 sgpr2_init_regs[] =
        mmCOMPUTE_NUM_THREAD_X, 256*5,
        mmCOMPUTE_NUM_THREAD_Y, 1,
        mmCOMPUTE_NUM_THREAD_Z, 1,
+       mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
        mmCOMPUTE_PGM_RSRC2, 20,
        mmCOMPUTE_USER_DATA_0, 0xedcedc00,
        mmCOMPUTE_USER_DATA_1, 0xedcedc01,
index ed2f06c9f3462173d0d2f8cf2ce6a09e0a532e94..3858820a0055d31fb30621f55efa9067d8eebfd8 100644 (file)
@@ -6,5 +6,6 @@ config HSA_AMD
        tristate "HSA kernel driver for AMD GPU devices"
        depends on DRM_AMDGPU && X86_64
        imply AMD_IOMMU_V2
+       select MMU_NOTIFIER
        help
          Enable this if you want to use HSA features on AMD GPU devices.
index cd679cf1fd308fa5e436c6691399351e0f2f0c71..59808a39ecf4afb98587369384111948b4997adc 100644 (file)
@@ -749,12 +749,13 @@ static int kfd_ioctl_get_clock_counters(struct file *filep,
        struct timespec64 time;
 
        dev = kfd_device_by_id(args->gpu_id);
-       if (dev == NULL)
-               return -EINVAL;
-
-       /* Reading GPU clock counter from KGD */
-       args->gpu_clock_counter =
-               dev->kfd2kgd->get_gpu_clock_counter(dev->kgd);
+       if (dev)
+               /* Reading GPU clock counter from KGD */
+               args->gpu_clock_counter =
+                       dev->kfd2kgd->get_gpu_clock_counter(dev->kgd);
+       else
+               /* Node without GPU resource */
+               args->gpu_clock_counter = 0;
 
        /* No access to rdtsc. Using raw monotonic time */
        getrawmonotonic64(&time);
@@ -1147,7 +1148,7 @@ static int kfd_ioctl_acquire_vm(struct file *filep, struct kfd_process *p,
        return ret;
 }
 
-bool kfd_dev_is_large_bar(struct kfd_dev *dev)
+static bool kfd_dev_is_large_bar(struct kfd_dev *dev)
 {
        struct kfd_local_mem_info mem_info;
 
@@ -1421,7 +1422,7 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
 
        pdd = kfd_get_process_device_data(dev, p);
        if (!pdd) {
-               err = PTR_ERR(pdd);
+               err = -EINVAL;
                goto bind_process_to_device_failed;
        }
 
index 4e2f379ce2172c4a8b7925aec15873105f4b2c40..1dd1142246c25f0ef3a2b7cbae42bc534f86491e 100644 (file)
@@ -4557,6 +4557,7 @@ static int dm_update_crtcs_state(struct dc *dc,
                struct amdgpu_dm_connector *aconnector = NULL;
                struct drm_connector_state *new_con_state = NULL;
                struct dm_connector_state *dm_conn_state = NULL;
+               struct drm_plane_state *new_plane_state = NULL;
 
                new_stream = NULL;
 
@@ -4564,6 +4565,13 @@ static int dm_update_crtcs_state(struct dc *dc,
                dm_new_crtc_state = to_dm_crtc_state(new_crtc_state);
                acrtc = to_amdgpu_crtc(crtc);
 
+               new_plane_state = drm_atomic_get_new_plane_state(state, new_crtc_state->crtc->primary);
+
+               if (new_crtc_state->enable && new_plane_state && !new_plane_state->fb) {
+                       ret = -EINVAL;
+                       goto fail;
+               }
+
                aconnector = amdgpu_dm_find_first_crtc_matching_connector(state, crtc);
 
                /* TODO This hack should go away */
@@ -4760,7 +4768,7 @@ static int dm_update_planes_state(struct dc *dc,
                        if (!dm_old_crtc_state->stream)
                                continue;
 
-                       DRM_DEBUG_DRIVER("Disabling DRM plane: %d on DRM crtc %d\n",
+                       DRM_DEBUG_ATOMIC("Disabling DRM plane: %d on DRM crtc %d\n",
                                        plane->base.id, old_plane_crtc->base.id);
 
                        if (!dc_remove_plane_from_context(
index f6cb502c303f3e76fac70c1f797525e78e940a48..25f064c010384664c9c157382ddb518cd7d6af4a 100644 (file)
@@ -138,13 +138,6 @@ int amdgpu_dm_set_regamma_lut(struct dm_crtc_state *crtc)
        lut = (struct drm_color_lut *)blob->data;
        lut_size = blob->length / sizeof(struct drm_color_lut);
 
-       if (__is_lut_linear(lut, lut_size)) {
-               /* Set to bypass if lut is set to linear */
-               stream->out_transfer_func->type = TF_TYPE_BYPASS;
-               stream->out_transfer_func->tf = TRANSFER_FUNCTION_LINEAR;
-               return 0;
-       }
-
        gamma = dc_create_gamma();
        if (!gamma)
                return -ENOMEM;
index 490017df371de3bd1ae902f1794ea46d2c6a7a96..4be21bf5474981a8b954f4b1fb6b5c6a283cc82b 100644 (file)
@@ -329,14 +329,15 @@ void amdgpu_dm_irq_fini(struct amdgpu_device *adev)
 {
        int src;
        struct irq_list_head *lh;
+       unsigned long irq_table_flags;
        DRM_DEBUG_KMS("DM_IRQ: releasing resources.\n");
-
        for (src = 0; src < DAL_IRQ_SOURCES_NUMBER; src++) {
-
+               DM_IRQ_TABLE_LOCK(adev, irq_table_flags);
                /* The handler was removed from the table,
                 * it means it is safe to flush all the 'work'
                 * (because no code can schedule a new one). */
                lh = &adev->dm.irq_handler_list_low_tab[src];
+               DM_IRQ_TABLE_UNLOCK(adev, irq_table_flags);
                flush_work(&lh->work);
        }
 }
index 8291d74f26bcfd8934f424232297b7b60822268d..4304d9e408b88d180eabac07327497fdda353b25 100644 (file)
@@ -83,21 +83,22 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux,
        enum i2c_mot_mode mot = (msg->request & DP_AUX_I2C_MOT) ?
                I2C_MOT_TRUE : I2C_MOT_FALSE;
        enum ddc_result res;
-       ssize_t read_bytes;
+       uint32_t read_bytes = msg->size;
 
        if (WARN_ON(msg->size > 16))
                return -E2BIG;
 
        switch (msg->request & ~DP_AUX_I2C_MOT) {
        case DP_AUX_NATIVE_READ:
-               read_bytes = dal_ddc_service_read_dpcd_data(
+               res = dal_ddc_service_read_dpcd_data(
                                TO_DM_AUX(aux)->ddc_service,
                                false,
                                I2C_MOT_UNDEF,
                                msg->address,
                                msg->buffer,
-                               msg->size);
-               return read_bytes;
+                               msg->size,
+                               &read_bytes);
+               break;
        case DP_AUX_NATIVE_WRITE:
                res = dal_ddc_service_write_dpcd_data(
                                TO_DM_AUX(aux)->ddc_service,
@@ -108,14 +109,15 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux,
                                msg->size);
                break;
        case DP_AUX_I2C_READ:
-               read_bytes = dal_ddc_service_read_dpcd_data(
+               res = dal_ddc_service_read_dpcd_data(
                                TO_DM_AUX(aux)->ddc_service,
                                true,
                                mot,
                                msg->address,
                                msg->buffer,
-                               msg->size);
-               return read_bytes;
+                               msg->size,
+                               &read_bytes);
+               break;
        case DP_AUX_I2C_WRITE:
                res = dal_ddc_service_write_dpcd_data(
                                TO_DM_AUX(aux)->ddc_service,
@@ -137,7 +139,9 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux,
                 r == DDC_RESULT_SUCESSFULL);
 #endif
 
-       return msg->size;
+       if (res != DDC_RESULT_SUCESSFULL)
+               return -EIO;
+       return read_bytes;
 }
 
 static enum drm_connector_status
@@ -161,6 +165,11 @@ dm_dp_mst_connector_destroy(struct drm_connector *connector)
        struct amdgpu_dm_connector *amdgpu_dm_connector = to_amdgpu_dm_connector(connector);
        struct amdgpu_encoder *amdgpu_encoder = amdgpu_dm_connector->mst_encoder;
 
+       if (amdgpu_dm_connector->edid) {
+               kfree(amdgpu_dm_connector->edid);
+               amdgpu_dm_connector->edid = NULL;
+       }
+
        drm_encoder_cleanup(&amdgpu_encoder->base);
        kfree(amdgpu_encoder);
        drm_connector_cleanup(connector);
@@ -181,28 +190,22 @@ static const struct drm_connector_funcs dm_dp_mst_connector_funcs = {
 void dm_dp_mst_dc_sink_create(struct drm_connector *connector)
 {
        struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
-       struct edid *edid;
        struct dc_sink *dc_sink;
        struct dc_sink_init_data init_params = {
                        .link = aconnector->dc_link,
                        .sink_signal = SIGNAL_TYPE_DISPLAY_PORT_MST };
 
+       /* FIXME none of this is safe. we shouldn't touch aconnector here in
+        * atomic_check
+        */
+
        /*
         * TODO: Need to further figure out why ddc.algo is NULL while MST port exists
         */
        if (!aconnector->port || !aconnector->port->aux.ddc.algo)
                return;
 
-       edid = drm_dp_mst_get_edid(connector, &aconnector->mst_port->mst_mgr, aconnector->port);
-
-       if (!edid) {
-               drm_mode_connector_update_edid_property(
-                       &aconnector->base,
-                       NULL);
-               return;
-       }
-
-       aconnector->edid = edid;
+       ASSERT(aconnector->edid);
 
        dc_sink = dc_link_add_remote_sink(
                aconnector->dc_link,
@@ -215,9 +218,6 @@ void dm_dp_mst_dc_sink_create(struct drm_connector *connector)
 
        amdgpu_dm_add_sink_to_freesync_module(
                        connector, aconnector->edid);
-
-       drm_mode_connector_update_edid_property(
-                                       &aconnector->base, aconnector->edid);
 }
 
 static int dm_dp_mst_get_modes(struct drm_connector *connector)
@@ -230,10 +230,6 @@ static int dm_dp_mst_get_modes(struct drm_connector *connector)
 
        if (!aconnector->edid) {
                struct edid *edid;
-               struct dc_sink *dc_sink;
-               struct dc_sink_init_data init_params = {
-                               .link = aconnector->dc_link,
-                               .sink_signal = SIGNAL_TYPE_DISPLAY_PORT_MST };
                edid = drm_dp_mst_get_edid(connector, &aconnector->mst_port->mst_mgr, aconnector->port);
 
                if (!edid) {
@@ -244,11 +240,17 @@ static int dm_dp_mst_get_modes(struct drm_connector *connector)
                }
 
                aconnector->edid = edid;
+       }
 
+       if (!aconnector->dc_sink) {
+               struct dc_sink *dc_sink;
+               struct dc_sink_init_data init_params = {
+                               .link = aconnector->dc_link,
+                               .sink_signal = SIGNAL_TYPE_DISPLAY_PORT_MST };
                dc_sink = dc_link_add_remote_sink(
                        aconnector->dc_link,
-                       (uint8_t *)edid,
-                       (edid->extensions + 1) * EDID_LENGTH,
+                       (uint8_t *)aconnector->edid,
+                       (aconnector->edid->extensions + 1) * EDID_LENGTH,
                        &init_params);
 
                dc_sink->priv = aconnector;
@@ -256,12 +258,12 @@ static int dm_dp_mst_get_modes(struct drm_connector *connector)
 
                if (aconnector->dc_sink)
                        amdgpu_dm_add_sink_to_freesync_module(
-                                       connector, edid);
-
-               drm_mode_connector_update_edid_property(
-                                               &aconnector->base, edid);
+                                       connector, aconnector->edid);
        }
 
+       drm_mode_connector_update_edid_property(
+                                       &aconnector->base, aconnector->edid);
+
        ret = drm_add_edid_modes(connector, aconnector->edid);
 
        return ret;
@@ -424,14 +426,6 @@ static void dm_dp_destroy_mst_connector(struct drm_dp_mst_topology_mgr *mgr,
                dc_sink_release(aconnector->dc_sink);
                aconnector->dc_sink = NULL;
        }
-       if (aconnector->edid) {
-               kfree(aconnector->edid);
-               aconnector->edid = NULL;
-       }
-
-       drm_mode_connector_update_edid_property(
-                       &aconnector->base,
-                       NULL);
 
        aconnector->mst_connected = false;
 }
index 985fe8c22875abe3610de2f8fb0dc64c12bba2cf..10a5807a7e8b20e59a2e02009ad7fe0ebe5bfa6c 100644 (file)
@@ -70,6 +70,10 @@ static enum bp_result get_firmware_info_v3_1(
        struct bios_parser *bp,
        struct dc_firmware_info *info);
 
+static enum bp_result get_firmware_info_v3_2(
+       struct bios_parser *bp,
+       struct dc_firmware_info *info);
+
 static struct atom_hpd_int_record *get_hpd_record(struct bios_parser *bp,
                struct atom_display_object_path_v2 *object);
 
@@ -1321,9 +1325,11 @@ static enum bp_result bios_parser_get_firmware_info(
                case 3:
                        switch (revision.minor) {
                        case 1:
-                       case 2:
                                result = get_firmware_info_v3_1(bp, info);
                                break;
+                       case 2:
+                               result = get_firmware_info_v3_2(bp, info);
+                               break;
                        default:
                                break;
                        }
@@ -1383,6 +1389,84 @@ static enum bp_result get_firmware_info_v3_1(
        return BP_RESULT_OK;
 }
 
+static enum bp_result get_firmware_info_v3_2(
+       struct bios_parser *bp,
+       struct dc_firmware_info *info)
+{
+       struct atom_firmware_info_v3_2 *firmware_info;
+       struct atom_display_controller_info_v4_1 *dce_info = NULL;
+       struct atom_common_table_header *header;
+       struct atom_data_revision revision;
+       struct atom_smu_info_v3_2 *smu_info_v3_2 = NULL;
+       struct atom_smu_info_v3_3 *smu_info_v3_3 = NULL;
+
+       if (!info)
+               return BP_RESULT_BADINPUT;
+
+       firmware_info = GET_IMAGE(struct atom_firmware_info_v3_2,
+                       DATA_TABLES(firmwareinfo));
+
+       dce_info = GET_IMAGE(struct atom_display_controller_info_v4_1,
+                       DATA_TABLES(dce_info));
+
+       if (!firmware_info || !dce_info)
+               return BP_RESULT_BADBIOSTABLE;
+
+       memset(info, 0, sizeof(*info));
+
+       header = GET_IMAGE(struct atom_common_table_header,
+                                       DATA_TABLES(smu_info));
+       get_atom_data_table_revision(header, &revision);
+
+       if (revision.minor == 2) {
+               /* Vega12 */
+               smu_info_v3_2 = GET_IMAGE(struct atom_smu_info_v3_2,
+                                                       DATA_TABLES(smu_info));
+
+               if (!smu_info_v3_2)
+                       return BP_RESULT_BADBIOSTABLE;
+
+               info->default_engine_clk = smu_info_v3_2->bootup_dcefclk_10khz * 10;
+       } else if (revision.minor == 3) {
+               /* Vega20 */
+               smu_info_v3_3 = GET_IMAGE(struct atom_smu_info_v3_3,
+                                                       DATA_TABLES(smu_info));
+
+               if (!smu_info_v3_3)
+                       return BP_RESULT_BADBIOSTABLE;
+
+               info->default_engine_clk = smu_info_v3_3->bootup_dcefclk_10khz * 10;
+       }
+
+        // We need to convert from 10KHz units into KHz units.
+       info->default_memory_clk = firmware_info->bootup_mclk_in10khz * 10;
+
+        /* 27MHz for Vega10 & Vega12; 100MHz for Vega20 */
+       info->pll_info.crystal_frequency = dce_info->dce_refclk_10khz * 10;
+       /* Hardcode frequency if BIOS gives no DCE Ref Clk */
+       if (info->pll_info.crystal_frequency == 0) {
+               if (revision.minor == 2)
+                       info->pll_info.crystal_frequency = 27000;
+               else if (revision.minor == 3)
+                       info->pll_info.crystal_frequency = 100000;
+       }
+       /*dp_phy_ref_clk is not correct for atom_display_controller_info_v4_2, but we don't use it*/
+       info->dp_phy_ref_clk     = dce_info->dpphy_refclk_10khz * 10;
+       info->i2c_engine_ref_clk = dce_info->i2c_engine_refclk_10khz * 10;
+
+       /* Get GPU PLL VCO Clock */
+       if (bp->cmd_tbl.get_smu_clock_info != NULL) {
+               if (revision.minor == 2)
+                       info->smu_gpu_pll_output_freq =
+                                       bp->cmd_tbl.get_smu_clock_info(bp, SMU9_SYSPLL0_ID) * 10;
+               else if (revision.minor == 3)
+                       info->smu_gpu_pll_output_freq =
+                                       bp->cmd_tbl.get_smu_clock_info(bp, SMU11_SYSPLL3_0_ID) * 10;
+       }
+
+       return BP_RESULT_OK;
+}
+
 static enum bp_result bios_parser_get_encoder_cap_info(
        struct dc_bios *dcb,
        struct graphics_object_id object_id,
index 49c2face1e7a869e07e94da881e7ed33acbe483e..ae48d603ebd6ca73c289c71f50795f5d3bd6f65e 100644 (file)
@@ -629,13 +629,14 @@ bool dal_ddc_service_query_ddc_data(
        return ret;
 }
 
-ssize_t dal_ddc_service_read_dpcd_data(
+enum ddc_result dal_ddc_service_read_dpcd_data(
        struct ddc_service *ddc,
        bool i2c,
        enum i2c_mot_mode mot,
        uint32_t address,
        uint8_t *data,
-       uint32_t len)
+       uint32_t len,
+       uint32_t *read)
 {
        struct aux_payload read_payload = {
                .i2c_over_aux = i2c,
@@ -652,6 +653,8 @@ ssize_t dal_ddc_service_read_dpcd_data(
                .mot = mot
        };
 
+       *read = 0;
+
        if (len > DEFAULT_AUX_MAX_DATA_SIZE) {
                BREAK_TO_DEBUGGER();
                return DDC_RESULT_FAILED_INVALID_OPERATION;
@@ -661,7 +664,8 @@ ssize_t dal_ddc_service_read_dpcd_data(
                ddc->ctx->i2caux,
                ddc->ddc_pin,
                &command)) {
-               return (ssize_t)command.payloads->length;
+               *read = command.payloads->length;
+               return DDC_RESULT_SUCESSFULL;
        }
 
        return DDC_RESULT_FAILED_OPERATION;
index ade5b8ee9c3c0ead9272204360e9372208ba2b50..132eef3826e2d99b71115d509e887887e9eb220b 100644 (file)
@@ -66,8 +66,8 @@ struct dc_plane_state *dc_create_plane_state(struct dc *dc)
 {
        struct dc *core_dc = dc;
 
-       struct dc_plane_state *plane_state = kzalloc(sizeof(*plane_state),
-                                                    GFP_KERNEL);
+       struct dc_plane_state *plane_state = kvzalloc(sizeof(*plane_state),
+                                                     GFP_KERNEL);
 
        if (NULL == plane_state)
                return NULL;
@@ -120,7 +120,7 @@ static void dc_plane_state_free(struct kref *kref)
 {
        struct dc_plane_state *plane_state = container_of(kref, struct dc_plane_state, refcount);
        destruct(plane_state);
-       kfree(plane_state);
+       kvfree(plane_state);
 }
 
 void dc_plane_state_release(struct dc_plane_state *plane_state)
@@ -136,7 +136,7 @@ void dc_gamma_retain(struct dc_gamma *gamma)
 static void dc_gamma_free(struct kref *kref)
 {
        struct dc_gamma *gamma = container_of(kref, struct dc_gamma, refcount);
-       kfree(gamma);
+       kvfree(gamma);
 }
 
 void dc_gamma_release(struct dc_gamma **gamma)
@@ -147,7 +147,7 @@ void dc_gamma_release(struct dc_gamma **gamma)
 
 struct dc_gamma *dc_create_gamma(void)
 {
-       struct dc_gamma *gamma = kzalloc(sizeof(*gamma), GFP_KERNEL);
+       struct dc_gamma *gamma = kvzalloc(sizeof(*gamma), GFP_KERNEL);
 
        if (gamma == NULL)
                goto alloc_fail;
@@ -167,7 +167,7 @@ void dc_transfer_func_retain(struct dc_transfer_func *tf)
 static void dc_transfer_func_free(struct kref *kref)
 {
        struct dc_transfer_func *tf = container_of(kref, struct dc_transfer_func, refcount);
-       kfree(tf);
+       kvfree(tf);
 }
 
 void dc_transfer_func_release(struct dc_transfer_func *tf)
@@ -177,7 +177,7 @@ void dc_transfer_func_release(struct dc_transfer_func *tf)
 
 struct dc_transfer_func *dc_create_transfer_func(void)
 {
-       struct dc_transfer_func *tf = kzalloc(sizeof(*tf), GFP_KERNEL);
+       struct dc_transfer_func *tf = kvzalloc(sizeof(*tf), GFP_KERNEL);
 
        if (tf == NULL)
                goto alloc_fail;
index 090b7a8dd67bde2bdfdaf243c04d175abf82112c..30b3a08b91be27dade29620cfc5dbf857c89bad1 100644 (file)
@@ -102,13 +102,14 @@ bool dal_ddc_service_query_ddc_data(
                uint8_t *read_buf,
                uint32_t read_size);
 
-ssize_t dal_ddc_service_read_dpcd_data(
+enum ddc_result dal_ddc_service_read_dpcd_data(
                struct ddc_service *ddc,
                bool i2c,
                enum i2c_mot_mode mot,
                uint32_t address,
                uint8_t *data,
-               uint32_t len);
+               uint32_t len,
+               uint32_t *read);
 
 enum ddc_result dal_ddc_service_write_dpcd_data(
                struct ddc_service *ddc,
index 9831cb5eaa7cfc8b67d33c8ee4cbcc7ec3b915c3..9b0a04f99ac836188dd06c071486f87f626eb191 100644 (file)
 
 #define AI_GREENLAND_P_A0 1
 #define AI_GREENLAND_P_A1 2
+#define AI_UNKNOWN 0xFF
 
-#define ASICREV_IS_GREENLAND_M(eChipRev)  (eChipRev < AI_UNKNOWN)
-#define ASICREV_IS_GREENLAND_P(eChipRev)  (eChipRev < AI_UNKNOWN)
+#define AI_VEGA12_P_A0 20
+#define ASICREV_IS_GREENLAND_M(eChipRev)  (eChipRev < AI_VEGA12_P_A0)
+#define ASICREV_IS_GREENLAND_P(eChipRev)  (eChipRev < AI_VEGA12_P_A0)
+
+#define ASICREV_IS_VEGA12_P(eChipRev) ((eChipRev >= AI_VEGA12_P_A0) && (eChipRev < AI_UNKNOWN))
+#define ASICREV_IS_VEGA12_p(eChipRev) ((eChipRev >= AI_VEGA12_P_A0) && (eChipRev < AI_UNKNOWN))
 
 /* DCN1_0 */
 #define INTERNAL_REV_RAVEN_A0             0x00    /* First spin of Raven */
index e7e374f5686449d5ef6f596c8a4aa92a9e40ec2a..b3747a019debf8fe7336b408fdaa032e9feae056 100644 (file)
@@ -1093,19 +1093,19 @@ bool mod_color_calculate_regamma_params(struct dc_transfer_func *output_tf,
 
        output_tf->type = TF_TYPE_DISTRIBUTED_POINTS;
 
-       rgb_user = kzalloc(sizeof(*rgb_user) * (ramp->num_entries + _EXTRA_POINTS),
-                          GFP_KERNEL);
+       rgb_user = kvzalloc(sizeof(*rgb_user) * (ramp->num_entries + _EXTRA_POINTS),
+                           GFP_KERNEL);
        if (!rgb_user)
                goto rgb_user_alloc_fail;
-       rgb_regamma = kzalloc(sizeof(*rgb_regamma) * (MAX_HW_POINTS + _EXTRA_POINTS),
-                       GFP_KERNEL);
+       rgb_regamma = kvzalloc(sizeof(*rgb_regamma) * (MAX_HW_POINTS + _EXTRA_POINTS),
+                              GFP_KERNEL);
        if (!rgb_regamma)
                goto rgb_regamma_alloc_fail;
-       axix_x = kzalloc(sizeof(*axix_x) * (ramp->num_entries + 3),
-                        GFP_KERNEL);
+       axix_x = kvzalloc(sizeof(*axix_x) * (ramp->num_entries + 3),
+                         GFP_KERNEL);
        if (!axix_x)
                goto axix_x_alloc_fail;
-       coeff = kzalloc(sizeof(*coeff) * (MAX_HW_POINTS + _EXTRA_POINTS), GFP_KERNEL);
+       coeff = kvzalloc(sizeof(*coeff) * (MAX_HW_POINTS + _EXTRA_POINTS), GFP_KERNEL);
        if (!coeff)
                goto coeff_alloc_fail;
 
@@ -1157,13 +1157,13 @@ bool mod_color_calculate_regamma_params(struct dc_transfer_func *output_tf,
 
        ret = true;
 
-       kfree(coeff);
+       kvfree(coeff);
 coeff_alloc_fail:
-       kfree(axix_x);
+       kvfree(axix_x);
 axix_x_alloc_fail:
-       kfree(rgb_regamma);
+       kvfree(rgb_regamma);
 rgb_regamma_alloc_fail:
-       kfree(rgb_user);
+       kvfree(rgb_user);
 rgb_user_alloc_fail:
        return ret;
 }
@@ -1192,19 +1192,19 @@ bool mod_color_calculate_degamma_params(struct dc_transfer_func *input_tf,
 
        input_tf->type = TF_TYPE_DISTRIBUTED_POINTS;
 
-       rgb_user = kzalloc(sizeof(*rgb_user) * (ramp->num_entries + _EXTRA_POINTS),
-                          GFP_KERNEL);
+       rgb_user = kvzalloc(sizeof(*rgb_user) * (ramp->num_entries + _EXTRA_POINTS),
+                           GFP_KERNEL);
        if (!rgb_user)
                goto rgb_user_alloc_fail;
-       curve = kzalloc(sizeof(*curve) * (MAX_HW_POINTS + _EXTRA_POINTS),
-                       GFP_KERNEL);
+       curve = kvzalloc(sizeof(*curve) * (MAX_HW_POINTS + _EXTRA_POINTS),
+                        GFP_KERNEL);
        if (!curve)
                goto curve_alloc_fail;
-       axix_x = kzalloc(sizeof(*axix_x) * (ramp->num_entries + _EXTRA_POINTS),
-                        GFP_KERNEL);
+       axix_x = kvzalloc(sizeof(*axix_x) * (ramp->num_entries + _EXTRA_POINTS),
+                         GFP_KERNEL);
        if (!axix_x)
                goto axix_x_alloc_fail;
-       coeff = kzalloc(sizeof(*coeff) * (MAX_HW_POINTS + _EXTRA_POINTS), GFP_KERNEL);
+       coeff = kvzalloc(sizeof(*coeff) * (MAX_HW_POINTS + _EXTRA_POINTS), GFP_KERNEL);
        if (!coeff)
                goto coeff_alloc_fail;
 
@@ -1246,13 +1246,13 @@ bool mod_color_calculate_degamma_params(struct dc_transfer_func *input_tf,
 
        ret = true;
 
-       kfree(coeff);
+       kvfree(coeff);
 coeff_alloc_fail:
-       kfree(axix_x);
+       kvfree(axix_x);
 axix_x_alloc_fail:
-       kfree(curve);
+       kvfree(curve);
 curve_alloc_fail:
-       kfree(rgb_user);
+       kvfree(rgb_user);
 rgb_user_alloc_fail:
 
        return ret;
@@ -1281,8 +1281,9 @@ bool  mod_color_calculate_curve(enum dc_transfer_func_predefined trans,
                }
                ret = true;
        } else if (trans == TRANSFER_FUNCTION_PQ) {
-               rgb_regamma = kzalloc(sizeof(*rgb_regamma) * (MAX_HW_POINTS +
-                                               _EXTRA_POINTS), GFP_KERNEL);
+               rgb_regamma = kvzalloc(sizeof(*rgb_regamma) *
+                                      (MAX_HW_POINTS + _EXTRA_POINTS),
+                                      GFP_KERNEL);
                if (!rgb_regamma)
                        goto rgb_regamma_alloc_fail;
                points->end_exponent = 7;
@@ -1302,11 +1303,12 @@ bool  mod_color_calculate_curve(enum dc_transfer_func_predefined trans,
                }
                ret = true;
 
-               kfree(rgb_regamma);
+               kvfree(rgb_regamma);
        } else if (trans == TRANSFER_FUNCTION_SRGB ||
                          trans == TRANSFER_FUNCTION_BT709) {
-               rgb_regamma = kzalloc(sizeof(*rgb_regamma) * (MAX_HW_POINTS +
-                                               _EXTRA_POINTS), GFP_KERNEL);
+               rgb_regamma = kvzalloc(sizeof(*rgb_regamma) *
+                                      (MAX_HW_POINTS + _EXTRA_POINTS),
+                                      GFP_KERNEL);
                if (!rgb_regamma)
                        goto rgb_regamma_alloc_fail;
                points->end_exponent = 0;
@@ -1324,7 +1326,7 @@ bool  mod_color_calculate_curve(enum dc_transfer_func_predefined trans,
                }
                ret = true;
 
-               kfree(rgb_regamma);
+               kvfree(rgb_regamma);
        }
 rgb_regamma_alloc_fail:
        return ret;
@@ -1348,8 +1350,9 @@ bool  mod_color_calculate_degamma_curve(enum dc_transfer_func_predefined trans,
                }
                ret = true;
        } else if (trans == TRANSFER_FUNCTION_PQ) {
-               rgb_degamma = kzalloc(sizeof(*rgb_degamma) * (MAX_HW_POINTS +
-                                               _EXTRA_POINTS), GFP_KERNEL);
+               rgb_degamma = kvzalloc(sizeof(*rgb_degamma) *
+                                      (MAX_HW_POINTS + _EXTRA_POINTS),
+                                      GFP_KERNEL);
                if (!rgb_degamma)
                        goto rgb_degamma_alloc_fail;
 
@@ -1364,11 +1367,12 @@ bool  mod_color_calculate_degamma_curve(enum dc_transfer_func_predefined trans,
                }
                ret = true;
 
-               kfree(rgb_degamma);
+               kvfree(rgb_degamma);
        } else if (trans == TRANSFER_FUNCTION_SRGB ||
                          trans == TRANSFER_FUNCTION_BT709) {
-               rgb_degamma = kzalloc(sizeof(*rgb_degamma) * (MAX_HW_POINTS +
-                                               _EXTRA_POINTS), GFP_KERNEL);
+               rgb_degamma = kvzalloc(sizeof(*rgb_degamma) *
+                                      (MAX_HW_POINTS + _EXTRA_POINTS),
+                                      GFP_KERNEL);
                if (!rgb_degamma)
                        goto rgb_degamma_alloc_fail;
 
@@ -1382,7 +1386,7 @@ bool  mod_color_calculate_degamma_curve(enum dc_transfer_func_predefined trans,
                }
                ret = true;
 
-               kfree(rgb_degamma);
+               kvfree(rgb_degamma);
        }
        points->end_exponent = 0;
        points->x_point_at_y1_red = 1;
index 0f5ad54d3fd3d9c5e3700ec1d3e6324bf0436f4c..de177ce8ca80216b126f3503c9821ae12efc27ef 100644 (file)
@@ -501,6 +501,32 @@ enum atom_cooling_solution_id{
   LIQUID_COOLING = 0x01
 };
 
+struct atom_firmware_info_v3_2 {
+  struct atom_common_table_header table_header;
+  uint32_t firmware_revision;
+  uint32_t bootup_sclk_in10khz;
+  uint32_t bootup_mclk_in10khz;
+  uint32_t firmware_capability;             // enum atombios_firmware_capability
+  uint32_t main_call_parser_entry;          /* direct address of main parser call in VBIOS binary. */
+  uint32_t bios_scratch_reg_startaddr;      // 1st bios scratch register dword address
+  uint16_t bootup_vddc_mv;
+  uint16_t bootup_vddci_mv;
+  uint16_t bootup_mvddc_mv;
+  uint16_t bootup_vddgfx_mv;
+  uint8_t  mem_module_id;
+  uint8_t  coolingsolution_id;              /*0: Air cooling; 1: Liquid cooling ... */
+  uint8_t  reserved1[2];
+  uint32_t mc_baseaddr_high;
+  uint32_t mc_baseaddr_low;
+  uint8_t  board_i2c_feature_id;            // enum of atom_board_i2c_feature_id_def
+  uint8_t  board_i2c_feature_gpio_id;       // i2c id find in gpio_lut data table gpio_id
+  uint8_t  board_i2c_feature_slave_addr;
+  uint8_t  reserved3;
+  uint16_t bootup_mvddq_mv;
+  uint16_t bootup_mvpp_mv;
+  uint32_t zfbstartaddrin16mb;
+  uint32_t reserved2[3];
+};
 
 /* 
   ***************************************************************************
@@ -1169,7 +1195,29 @@ struct  atom_gfx_info_v2_2
   uint32_t rlc_gpu_timer_refclk; 
 };
 
-
+struct  atom_gfx_info_v2_3 {
+  struct  atom_common_table_header  table_header;
+  uint8_t gfxip_min_ver;
+  uint8_t gfxip_max_ver;
+  uint8_t max_shader_engines;
+  uint8_t max_tile_pipes;
+  uint8_t max_cu_per_sh;
+  uint8_t max_sh_per_se;
+  uint8_t max_backends_per_se;
+  uint8_t max_texture_channel_caches;
+  uint32_t regaddr_cp_dma_src_addr;
+  uint32_t regaddr_cp_dma_src_addr_hi;
+  uint32_t regaddr_cp_dma_dst_addr;
+  uint32_t regaddr_cp_dma_dst_addr_hi;
+  uint32_t regaddr_cp_dma_command;
+  uint32_t regaddr_cp_status;
+  uint32_t regaddr_rlc_gpu_clock_32;
+  uint32_t rlc_gpu_timer_refclk;
+  uint8_t active_cu_per_sh;
+  uint8_t active_rb_per_se;
+  uint16_t gcgoldenoffset;
+  uint32_t rm21_sram_vmin_value;
+};
 
 /* 
   ***************************************************************************
@@ -1198,6 +1246,76 @@ struct atom_smu_info_v3_1
   uint8_t  fw_ctf_polarity;         // GPIO polarity for CTF
 };
 
+struct atom_smu_info_v3_2 {
+  struct   atom_common_table_header  table_header;
+  uint8_t  smuip_min_ver;
+  uint8_t  smuip_max_ver;
+  uint8_t  smu_rsd1;
+  uint8_t  gpuclk_ss_mode;
+  uint16_t sclk_ss_percentage;
+  uint16_t sclk_ss_rate_10hz;
+  uint16_t gpuclk_ss_percentage;    // in unit of 0.001%
+  uint16_t gpuclk_ss_rate_10hz;
+  uint32_t core_refclk_10khz;
+  uint8_t  ac_dc_gpio_bit;          // GPIO bit shift in SMU_GPIOPAD_A  configured for AC/DC switching, =0xff means invalid
+  uint8_t  ac_dc_polarity;          // GPIO polarity for AC/DC switching
+  uint8_t  vr0hot_gpio_bit;         // GPIO bit shift in SMU_GPIOPAD_A  configured for VR0 HOT event, =0xff means invalid
+  uint8_t  vr0hot_polarity;         // GPIO polarity for VR0 HOT event
+  uint8_t  vr1hot_gpio_bit;         // GPIO bit shift in SMU_GPIOPAD_A configured for VR1 HOT event , =0xff means invalid
+  uint8_t  vr1hot_polarity;         // GPIO polarity for VR1 HOT event
+  uint8_t  fw_ctf_gpio_bit;         // GPIO bit shift in SMU_GPIOPAD_A configured for CTF, =0xff means invalid
+  uint8_t  fw_ctf_polarity;         // GPIO polarity for CTF
+  uint8_t  pcc_gpio_bit;            // GPIO bit shift in SMU_GPIOPAD_A configured for PCC, =0xff means invalid
+  uint8_t  pcc_gpio_polarity;       // GPIO polarity for CTF
+  uint16_t smugoldenoffset;
+  uint32_t gpupll_vco_freq_10khz;
+  uint32_t bootup_smnclk_10khz;
+  uint32_t bootup_socclk_10khz;
+  uint32_t bootup_mp0clk_10khz;
+  uint32_t bootup_mp1clk_10khz;
+  uint32_t bootup_lclk_10khz;
+  uint32_t bootup_dcefclk_10khz;
+  uint32_t ctf_threshold_override_value;
+  uint32_t reserved[5];
+};
+
+struct atom_smu_info_v3_3 {
+  struct   atom_common_table_header  table_header;
+  uint8_t  smuip_min_ver;
+  uint8_t  smuip_max_ver;
+  uint8_t  smu_rsd1;
+  uint8_t  gpuclk_ss_mode;
+  uint16_t sclk_ss_percentage;
+  uint16_t sclk_ss_rate_10hz;
+  uint16_t gpuclk_ss_percentage;    // in unit of 0.001%
+  uint16_t gpuclk_ss_rate_10hz;
+  uint32_t core_refclk_10khz;
+  uint8_t  ac_dc_gpio_bit;          // GPIO bit shift in SMU_GPIOPAD_A  configured for AC/DC switching, =0xff means invalid
+  uint8_t  ac_dc_polarity;          // GPIO polarity for AC/DC switching
+  uint8_t  vr0hot_gpio_bit;         // GPIO bit shift in SMU_GPIOPAD_A  configured for VR0 HOT event, =0xff means invalid
+  uint8_t  vr0hot_polarity;         // GPIO polarity for VR0 HOT event
+  uint8_t  vr1hot_gpio_bit;         // GPIO bit shift in SMU_GPIOPAD_A configured for VR1 HOT event , =0xff means invalid
+  uint8_t  vr1hot_polarity;         // GPIO polarity for VR1 HOT event
+  uint8_t  fw_ctf_gpio_bit;         // GPIO bit shift in SMU_GPIOPAD_A configured for CTF, =0xff means invalid
+  uint8_t  fw_ctf_polarity;         // GPIO polarity for CTF
+  uint8_t  pcc_gpio_bit;            // GPIO bit shift in SMU_GPIOPAD_A configured for PCC, =0xff means invalid
+  uint8_t  pcc_gpio_polarity;       // GPIO polarity for CTF
+  uint16_t smugoldenoffset;
+  uint32_t gpupll_vco_freq_10khz;
+  uint32_t bootup_smnclk_10khz;
+  uint32_t bootup_socclk_10khz;
+  uint32_t bootup_mp0clk_10khz;
+  uint32_t bootup_mp1clk_10khz;
+  uint32_t bootup_lclk_10khz;
+  uint32_t bootup_dcefclk_10khz;
+  uint32_t ctf_threshold_override_value;
+  uint32_t syspll3_0_vco_freq_10khz;
+  uint32_t syspll3_1_vco_freq_10khz;
+  uint32_t bootup_fclk_10khz;
+  uint32_t bootup_waflclk_10khz;
+  uint32_t reserved[3];
+};
+
 /*
  ***************************************************************************
    Data Table smc_dpm_info  structure
@@ -1283,7 +1401,6 @@ struct atom_smc_dpm_info_v4_1
        uint32_t boardreserved[10];
 };
 
-
 /* 
   ***************************************************************************
     Data Table asic_profiling_info  structure
@@ -1864,6 +1981,55 @@ enum atom_smu9_syspll0_clock_id
   SMU9_SYSPLL0_DISPCLK_ID  = 11,      //       DISPCLK
 };
 
+enum atom_smu11_syspll_id {
+  SMU11_SYSPLL0_ID            = 0,
+  SMU11_SYSPLL1_0_ID          = 1,
+  SMU11_SYSPLL1_1_ID          = 2,
+  SMU11_SYSPLL1_2_ID          = 3,
+  SMU11_SYSPLL2_ID            = 4,
+  SMU11_SYSPLL3_0_ID          = 5,
+  SMU11_SYSPLL3_1_ID          = 6,
+};
+
+
+enum atom_smu11_syspll0_clock_id {
+  SMU11_SYSPLL0_SOCCLK_ID   = 0,       //      SOCCLK
+  SMU11_SYSPLL0_MP0CLK_ID   = 1,       //      MP0CLK
+  SMU11_SYSPLL0_DCLK_ID     = 2,       //      DCLK
+  SMU11_SYSPLL0_VCLK_ID     = 3,       //      VCLK
+  SMU11_SYSPLL0_ECLK_ID     = 4,       //      ECLK
+  SMU11_SYSPLL0_DCEFCLK_ID  = 5,       //      DCEFCLK
+};
+
+
+enum atom_smu11_syspll1_0_clock_id {
+  SMU11_SYSPLL1_0_UCLKA_ID   = 0,       // UCLK_a
+};
+
+enum atom_smu11_syspll1_1_clock_id {
+  SMU11_SYSPLL1_0_UCLKB_ID   = 0,       // UCLK_b
+};
+
+enum atom_smu11_syspll1_2_clock_id {
+  SMU11_SYSPLL1_0_FCLK_ID   = 0,        // FCLK
+};
+
+enum atom_smu11_syspll2_clock_id {
+  SMU11_SYSPLL2_GFXCLK_ID   = 0,        // GFXCLK
+};
+
+enum atom_smu11_syspll3_0_clock_id {
+  SMU11_SYSPLL3_0_WAFCLK_ID = 0,       //      WAFCLK
+  SMU11_SYSPLL3_0_DISPCLK_ID = 1,      //      DISPCLK
+  SMU11_SYSPLL3_0_DPREFCLK_ID = 2,     //      DPREFCLK
+};
+
+enum atom_smu11_syspll3_1_clock_id {
+  SMU11_SYSPLL3_1_MP1CLK_ID = 0,       //      MP1CLK
+  SMU11_SYSPLL3_1_SMNCLK_ID = 1,       //      SMNCLK
+  SMU11_SYSPLL3_1_LCLK_ID = 2,         //      LCLK
+};
+
 struct  atom_get_smu_clock_info_output_parameters_v3_1
 {
   union {
index add90675fd2afcfd72d66b8f30f4f6c4ab6a2540..18b5b2ff47febeb13c0682994fb5e2bd57bb53ef 100644 (file)
 #define PCIE_BUS_CLK                10000
 #define TCLK                        (PCIE_BUS_CLK / 10)
 
-static const struct profile_mode_setting smu7_profiling[5] =
+static const struct profile_mode_setting smu7_profiling[6] =
                                        {{1, 0, 100, 30, 1, 0, 100, 10},
                                         {1, 10, 0, 30, 0, 0, 0, 0},
                                         {0, 0, 0, 0, 1, 10, 16, 31},
                                         {1, 0, 11, 50, 1, 0, 100, 10},
                                         {1, 0, 5, 30, 0, 0, 0, 0},
+                                        {0, 0, 0, 0, 0, 0, 0, 0},
                                        };
 
 /** Values for the CG_THERMAL_CTRL::DPM_EVENT_SRC field. */
@@ -4743,23 +4744,27 @@ static void smu7_check_dpm_table_updated(struct pp_hwmgr *hwmgr)
 
        for (i=0; i < dep_table->count; i++) {
                if (dep_table->entries[i].vddc != odn_dep_table->entries[i].vddc) {
-                       data->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_VDDC;
-                       break;
+                       data->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_VDDC | DPMTABLE_OD_UPDATE_MCLK;
+                       return;
                }
        }
-       if (i == dep_table->count)
+       if (i == dep_table->count && data->need_update_smu7_dpm_table & DPMTABLE_OD_UPDATE_VDDC) {
                data->need_update_smu7_dpm_table &= ~DPMTABLE_OD_UPDATE_VDDC;
+               data->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_MCLK;
+       }
 
        dep_table = table_info->vdd_dep_on_sclk;
        odn_dep_table = (struct phm_ppt_v1_clock_voltage_dependency_table *)&(odn_table->vdd_dependency_on_sclk);
        for (i=0; i < dep_table->count; i++) {
                if (dep_table->entries[i].vddc != odn_dep_table->entries[i].vddc) {
-                       data->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_VDDC;
-                       break;
+                       data->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_VDDC | DPMTABLE_OD_UPDATE_SCLK;
+                       return;
                }
        }
-       if (i == dep_table->count)
+       if (i == dep_table->count && data->need_update_smu7_dpm_table & DPMTABLE_OD_UPDATE_VDDC) {
                data->need_update_smu7_dpm_table &= ~DPMTABLE_OD_UPDATE_VDDC;
+               data->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_SCLK;
+       }
 }
 
 static int smu7_odn_edit_dpm_table(struct pp_hwmgr *hwmgr,
@@ -4860,6 +4865,17 @@ static int smu7_get_power_profile_mode(struct pp_hwmgr *hwmgr, char *buf)
        len = sizeof(smu7_profiling) / sizeof(struct profile_mode_setting);
 
        for (i = 0; i < len; i++) {
+               if (i == hwmgr->power_profile_mode) {
+                       size += sprintf(buf + size, "%3d %14s %s: %8d %16d %16d %16d %16d %16d\n",
+                       i, profile_name[i], "*",
+                       data->current_profile_setting.sclk_up_hyst,
+                       data->current_profile_setting.sclk_down_hyst,
+                       data->current_profile_setting.sclk_activity,
+                       data->current_profile_setting.mclk_up_hyst,
+                       data->current_profile_setting.mclk_down_hyst,
+                       data->current_profile_setting.mclk_activity);
+                       continue;
+               }
                if (smu7_profiling[i].bupdate_sclk)
                        size += sprintf(buf + size, "%3d %16s: %8d %16d %16d ",
                        i, profile_name[i], smu7_profiling[i].sclk_up_hyst,
@@ -4879,24 +4895,6 @@ static int smu7_get_power_profile_mode(struct pp_hwmgr *hwmgr, char *buf)
                        "-", "-", "-");
        }
 
-       size += sprintf(buf + size, "%3d %16s: %8d %16d %16d %16d %16d %16d\n",
-                       i, profile_name[i],
-                       data->custom_profile_setting.sclk_up_hyst,
-                       data->custom_profile_setting.sclk_down_hyst,
-                       data->custom_profile_setting.sclk_activity,
-                       data->custom_profile_setting.mclk_up_hyst,
-                       data->custom_profile_setting.mclk_down_hyst,
-                       data->custom_profile_setting.mclk_activity);
-
-       size += sprintf(buf + size, "%3s %16s: %8d %16d %16d %16d %16d %16d\n",
-                       "*", "CURRENT",
-                       data->current_profile_setting.sclk_up_hyst,
-                       data->current_profile_setting.sclk_down_hyst,
-                       data->current_profile_setting.sclk_activity,
-                       data->current_profile_setting.mclk_up_hyst,
-                       data->current_profile_setting.mclk_down_hyst,
-                       data->current_profile_setting.mclk_activity);
-
        return size;
 }
 
@@ -4935,16 +4933,16 @@ static int smu7_set_power_profile_mode(struct pp_hwmgr *hwmgr, long *input, uint
                if (size < 8)
                        return -EINVAL;
 
-               data->custom_profile_setting.bupdate_sclk = input[0];
-               data->custom_profile_setting.sclk_up_hyst = input[1];
-               data->custom_profile_setting.sclk_down_hyst = input[2];
-               data->custom_profile_setting.sclk_activity = input[3];
-               data->custom_profile_setting.bupdate_mclk = input[4];
-               data->custom_profile_setting.mclk_up_hyst = input[5];
-               data->custom_profile_setting.mclk_down_hyst = input[6];
-               data->custom_profile_setting.mclk_activity = input[7];
-               if (!smum_update_dpm_settings(hwmgr, &data->custom_profile_setting)) {
-                       memcpy(&data->current_profile_setting, &data->custom_profile_setting, sizeof(struct profile_mode_setting));
+               tmp.bupdate_sclk = input[0];
+               tmp.sclk_up_hyst = input[1];
+               tmp.sclk_down_hyst = input[2];
+               tmp.sclk_activity = input[3];
+               tmp.bupdate_mclk = input[4];
+               tmp.mclk_up_hyst = input[5];
+               tmp.mclk_down_hyst = input[6];
+               tmp.mclk_activity = input[7];
+               if (!smum_update_dpm_settings(hwmgr, &tmp)) {
+                       memcpy(&data->current_profile_setting, &tmp, sizeof(struct profile_mode_setting));
                        hwmgr->power_profile_mode = mode;
                }
                break;
index f40179c9ca97795e94e749ca1f107711c93c31cf..b8d0bb37859507ba3d51d6f99b8968dd085823b0 100644 (file)
@@ -325,7 +325,6 @@ struct smu7_hwmgr {
        uint16_t                              mem_latency_high;
        uint16_t                              mem_latency_low;
        uint32_t                              vr_config;
-       struct profile_mode_setting           custom_profile_setting;
        struct profile_mode_setting           current_profile_setting;
 };
 
index 03bc7453f3b1e4db89eb24b515ee16d03fbd6228..d9e92e3065352e8b59d9cf2ccac53cdb94af1f9c 100644 (file)
@@ -852,12 +852,10 @@ int smu7_set_power_limit(struct pp_hwmgr *hwmgr, uint32_t n)
 {
        struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
 
-       n = (n & 0xff) << 8;
-
        if (data->power_containment_features &
                        POWERCONTAINMENT_FEATURE_PkgPwrLimit)
                return smum_send_msg_to_smc_with_parameter(hwmgr,
-                               PPSMC_MSG_PkgPwrSetLimit, n);
+                               PPSMC_MSG_PkgPwrSetLimit, n<<8);
        return 0;
 }
 
index fb696e3d06cfc46f680d1668ebb985adcb988ea3..2f8a3b983cce0c9444cac949ed7ce79a420b06ba 100644 (file)
@@ -412,8 +412,10 @@ typedef struct {
   QuadraticInt_t    ReservedEquation2;
   QuadraticInt_t    ReservedEquation3;
 
+       uint16_t     MinVoltageUlvGfx;
+       uint16_t     MinVoltageUlvSoc;
 
-  uint32_t     Reserved[15];
+       uint32_t     Reserved[14];
 
 
 
index 3aa65bdecb0e80aeb289486d1b705b52e0076856..684ac626ac53277ffe4e4be5c97576fd861b9dde 100644 (file)
@@ -74,6 +74,7 @@ config DRM_SIL_SII8620
        tristate "Silicon Image SII8620 HDMI/MHL bridge"
        depends on OF && RC_CORE
        select DRM_KMS_HELPER
+       imply EXTCON
        help
          Silicon Image SII8620 HDMI/MHL bridge chip driver.
 
index 498d5948d1a8099066fc62d5305ffe64c3c84ca0..9837c8d69e6918f0418158054db605b55e082cb8 100644 (file)
@@ -56,7 +56,9 @@ static int dumb_vga_get_modes(struct drm_connector *connector)
        }
 
        drm_mode_connector_update_edid_property(connector, edid);
-       return drm_add_edid_modes(connector, edid);
+       ret = drm_add_edid_modes(connector, edid);
+       kfree(edid);
+       return ret;
 
 fallback:
        /*
index 7d25c42f22dbcf8d3efc8949b1db59999eda693e..c825c76edc1d1cfc25b7fa9b7ba6112a40e55184 100644 (file)
@@ -155,6 +155,8 @@ void drm_atomic_state_default_clear(struct drm_atomic_state *state)
                                                       state->connectors[i].state);
                state->connectors[i].ptr = NULL;
                state->connectors[i].state = NULL;
+               state->connectors[i].old_state = NULL;
+               state->connectors[i].new_state = NULL;
                drm_connector_put(connector);
        }
 
@@ -169,6 +171,8 @@ void drm_atomic_state_default_clear(struct drm_atomic_state *state)
 
                state->crtcs[i].ptr = NULL;
                state->crtcs[i].state = NULL;
+               state->crtcs[i].old_state = NULL;
+               state->crtcs[i].new_state = NULL;
        }
 
        for (i = 0; i < config->num_total_plane; i++) {
@@ -181,6 +185,8 @@ void drm_atomic_state_default_clear(struct drm_atomic_state *state)
                                                   state->planes[i].state);
                state->planes[i].ptr = NULL;
                state->planes[i].state = NULL;
+               state->planes[i].old_state = NULL;
+               state->planes[i].new_state = NULL;
        }
 
        for (i = 0; i < state->num_private_objs; i++) {
@@ -190,6 +196,8 @@ void drm_atomic_state_default_clear(struct drm_atomic_state *state)
                                                 state->private_objs[i].state);
                state->private_objs[i].ptr = NULL;
                state->private_objs[i].state = NULL;
+               state->private_objs[i].old_state = NULL;
+               state->private_objs[i].new_state = NULL;
        }
        state->num_private_objs = 0;
 
index 02a50929af6759ab7769bba100134b2e6f4b0713..e7f4fe2848a542c3e45b55a8c92b258273cd71c8 100644 (file)
@@ -350,19 +350,44 @@ int drm_dp_dual_mode_set_tmds_output(enum drm_dp_dual_mode_type type,
 {
        uint8_t tmds_oen = enable ? 0 : DP_DUAL_MODE_TMDS_DISABLE;
        ssize_t ret;
+       int retry;
 
        if (type < DRM_DP_DUAL_MODE_TYPE2_DVI)
                return 0;
 
-       ret = drm_dp_dual_mode_write(adapter, DP_DUAL_MODE_TMDS_OEN,
-                                    &tmds_oen, sizeof(tmds_oen));
-       if (ret) {
-               DRM_DEBUG_KMS("Failed to %s TMDS output buffers\n",
-                             enable ? "enable" : "disable");
-               return ret;
+       /*
+        * LSPCON adapters in low-power state may ignore the first write, so
+        * read back and verify the written value a few times.
+        */
+       for (retry = 0; retry < 3; retry++) {
+               uint8_t tmp;
+
+               ret = drm_dp_dual_mode_write(adapter, DP_DUAL_MODE_TMDS_OEN,
+                                            &tmds_oen, sizeof(tmds_oen));
+               if (ret) {
+                       DRM_DEBUG_KMS("Failed to %s TMDS output buffers (%d attempts)\n",
+                                     enable ? "enable" : "disable",
+                                     retry + 1);
+                       return ret;
+               }
+
+               ret = drm_dp_dual_mode_read(adapter, DP_DUAL_MODE_TMDS_OEN,
+                                           &tmp, sizeof(tmp));
+               if (ret) {
+                       DRM_DEBUG_KMS("I2C read failed during TMDS output buffer %s (%d attempts)\n",
+                                     enable ? "enabling" : "disabling",
+                                     retry + 1);
+                       return ret;
+               }
+
+               if (tmp == tmds_oen)
+                       return 0;
        }
 
-       return 0;
+       DRM_DEBUG_KMS("I2C write value mismatch during TMDS output buffer %s\n",
+                     enable ? "enabling" : "disabling");
+
+       return -EIO;
 }
 EXPORT_SYMBOL(drm_dp_dual_mode_set_tmds_output);
 
index a1b9338736e3be4aafc5522c217f75ce3dcea832..c2c21d839727869ef30a59245b75708d7c6fbcf7 100644 (file)
@@ -716,7 +716,7 @@ static void remove_compat_control_link(struct drm_device *dev)
        if (!minor)
                return;
 
-       name = kasprintf(GFP_KERNEL, "controlD%d", minor->index);
+       name = kasprintf(GFP_KERNEL, "controlD%d", minor->index + 64);
        if (!name)
                return;
 
index 39ac15ce47023055f5a2badb7e5a99c59e19b3dd..9e2ae02f31e08fbad87669a126761c015d25db44 100644 (file)
@@ -65,12 +65,13 @@ int drm_mode_create_dumb_ioctl(struct drm_device *dev,
                return -EINVAL;
 
        /* overflow checks for 32bit size calculations */
-       /* NOTE: DIV_ROUND_UP() can overflow */
+       if (args->bpp > U32_MAX - 8)
+               return -EINVAL;
        cpp = DIV_ROUND_UP(args->bpp, 8);
-       if (!cpp || cpp > 0xffffffffU / args->width)
+       if (cpp > U32_MAX / args->width)
                return -EINVAL;
        stride = cpp * args->width;
-       if (args->height > 0xffffffffU / stride)
+       if (args->height > U32_MAX / stride)
                return -EINVAL;
 
        /* test for wrap-around */
index 134069f364829030c8bbd7d7bf1e45b3c7340666..39f1db4acda4eaf1059be7071c1d48e384416193 100644 (file)
@@ -4451,6 +4451,7 @@ drm_reset_display_info(struct drm_connector *connector)
        info->max_tmds_clock = 0;
        info->dvi_dual = false;
        info->has_hdmi_infoframe = false;
+       memset(&info->hdmi, 0, sizeof(info->hdmi));
 
        info->non_desktop = 0;
 }
@@ -4462,17 +4463,11 @@ u32 drm_add_display_info(struct drm_connector *connector, const struct edid *edi
 
        u32 quirks = edid_get_quirks(edid);
 
+       drm_reset_display_info(connector);
+
        info->width_mm = edid->width_cm * 10;
        info->height_mm = edid->height_cm * 10;
 
-       /* driver figures it out in this case */
-       info->bpc = 0;
-       info->color_formats = 0;
-       info->cea_rev = 0;
-       info->max_tmds_clock = 0;
-       info->dvi_dual = false;
-       info->has_hdmi_infoframe = false;
-
        info->non_desktop = !!(quirks & EDID_QUIRK_NON_DESKTOP);
 
        DRM_DEBUG_KMS("non_desktop set to %d\n", info->non_desktop);
index e394799979a6eabce5c91a7a6b87773dd4b45186..6d9b9453707c5af4cbad984c790ce9538daa5d82 100644 (file)
@@ -212,6 +212,7 @@ static int drm_open_helper(struct file *filp, struct drm_minor *minor)
                return -ENOMEM;
 
        filp->private_data = priv;
+       filp->f_mode |= FMODE_UNSIGNED_OFFSET;
        priv->filp = filp;
        priv->pid = get_pid(task_pid(current));
        priv->minor = minor;
index 0faaf829f5bf52a9d730d47f5de035a16a77a165..f0e79178bde61dea434f15d5cc47f30786812994 100644 (file)
@@ -18,6 +18,7 @@
 #include <drm/drm_fb_helper.h>
 #include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
+#include <drm/drm_gem_framebuffer_helper.h>
 #include <uapi/drm/exynos_drm.h>
 
 #include "exynos_drm_drv.h"
 #include "exynos_drm_iommu.h"
 #include "exynos_drm_crtc.h"
 
-#define to_exynos_fb(x)        container_of(x, struct exynos_drm_fb, fb)
-
-/*
- * exynos specific framebuffer structure.
- *
- * @fb: drm framebuffer obejct.
- * @exynos_gem: array of exynos specific gem object containing a gem object.
- */
-struct exynos_drm_fb {
-       struct drm_framebuffer  fb;
-       struct exynos_drm_gem   *exynos_gem[MAX_FB_BUFFER];
-       dma_addr_t                      dma_addr[MAX_FB_BUFFER];
-};
-
 static int check_fb_gem_memory_type(struct drm_device *drm_dev,
                                    struct exynos_drm_gem *exynos_gem)
 {
@@ -66,40 +53,9 @@ static int check_fb_gem_memory_type(struct drm_device *drm_dev,
        return 0;
 }
 
-static void exynos_drm_fb_destroy(struct drm_framebuffer *fb)
-{
-       struct exynos_drm_fb *exynos_fb = to_exynos_fb(fb);
-       unsigned int i;
-
-       drm_framebuffer_cleanup(fb);
-
-       for (i = 0; i < ARRAY_SIZE(exynos_fb->exynos_gem); i++) {
-               struct drm_gem_object *obj;
-
-               if (exynos_fb->exynos_gem[i] == NULL)
-                       continue;
-
-               obj = &exynos_fb->exynos_gem[i]->base;
-               drm_gem_object_unreference_unlocked(obj);
-       }
-
-       kfree(exynos_fb);
-       exynos_fb = NULL;
-}
-
-static int exynos_drm_fb_create_handle(struct drm_framebuffer *fb,
-                                       struct drm_file *file_priv,
-                                       unsigned int *handle)
-{
-       struct exynos_drm_fb *exynos_fb = to_exynos_fb(fb);
-
-       return drm_gem_handle_create(file_priv,
-                                    &exynos_fb->exynos_gem[0]->base, handle);
-}
-
 static const struct drm_framebuffer_funcs exynos_drm_fb_funcs = {
-       .destroy        = exynos_drm_fb_destroy,
-       .create_handle  = exynos_drm_fb_create_handle,
+       .destroy        = drm_gem_fb_destroy,
+       .create_handle  = drm_gem_fb_create_handle,
 };
 
 struct drm_framebuffer *
@@ -108,12 +64,12 @@ exynos_drm_framebuffer_init(struct drm_device *dev,
                            struct exynos_drm_gem **exynos_gem,
                            int count)
 {
-       struct exynos_drm_fb *exynos_fb;
+       struct drm_framebuffer *fb;
        int i;
        int ret;
 
-       exynos_fb = kzalloc(sizeof(*exynos_fb), GFP_KERNEL);
-       if (!exynos_fb)
+       fb = kzalloc(sizeof(*fb), GFP_KERNEL);
+       if (!fb)
                return ERR_PTR(-ENOMEM);
 
        for (i = 0; i < count; i++) {
@@ -121,23 +77,21 @@ exynos_drm_framebuffer_init(struct drm_device *dev,
                if (ret < 0)
                        goto err;
 
-               exynos_fb->exynos_gem[i] = exynos_gem[i];
-               exynos_fb->dma_addr[i] = exynos_gem[i]->dma_addr
-                                               + mode_cmd->offsets[i];
+               fb->obj[i] = &exynos_gem[i]->base;
        }
 
-       drm_helper_mode_fill_fb_struct(dev, &exynos_fb->fb, mode_cmd);
+       drm_helper_mode_fill_fb_struct(dev, fb, mode_cmd);
 
-       ret = drm_framebuffer_init(dev, &exynos_fb->fb, &exynos_drm_fb_funcs);
+       ret = drm_framebuffer_init(dev, fb, &exynos_drm_fb_funcs);
        if (ret < 0) {
                DRM_ERROR("failed to initialize framebuffer\n");
                goto err;
        }
 
-       return &exynos_fb->fb;
+       return fb;
 
 err:
-       kfree(exynos_fb);
+       kfree(fb);
        return ERR_PTR(ret);
 }
 
@@ -191,12 +145,13 @@ exynos_user_fb_create(struct drm_device *dev, struct drm_file *file_priv,
 
 dma_addr_t exynos_drm_fb_dma_addr(struct drm_framebuffer *fb, int index)
 {
-       struct exynos_drm_fb *exynos_fb = to_exynos_fb(fb);
+       struct exynos_drm_gem *exynos_gem;
 
        if (WARN_ON_ONCE(index >= MAX_FB_BUFFER))
                return 0;
 
-       return exynos_fb->dma_addr[index];
+       exynos_gem = to_exynos_gem(fb->obj[index]);
+       return exynos_gem->dma_addr + fb->offsets[index];
 }
 
 static struct drm_mode_config_helper_funcs exynos_drm_mode_config_helpers = {
index abd84cbcf1c2ca763e71d4e493661f8bef22170f..09c4bc0b1859f7bc8412cc3dfb3806ddaa0d3134 100644 (file)
@@ -954,8 +954,6 @@ static int hdmi_create_connector(struct drm_encoder *encoder)
        drm_mode_connector_attach_encoder(connector, encoder);
 
        if (hdata->bridge) {
-               encoder->bridge = hdata->bridge;
-               hdata->bridge->encoder = encoder;
                ret = drm_bridge_attach(encoder, hdata->bridge, NULL);
                if (ret)
                        DRM_ERROR("Failed to attach bridge\n");
index 257299ec95c435ae08660f7f659f62c7577e2b1c..272c79f5f5bff856dfa4a29a2574a70ee72366cf 100644 (file)
@@ -473,7 +473,7 @@ static void vp_video_buffer(struct mixer_context *ctx,
                        chroma_addr[1] = chroma_addr[0] + 0x40;
                } else {
                        luma_addr[1] = luma_addr[0] + fb->pitches[0];
-                       chroma_addr[1] = chroma_addr[0] + fb->pitches[0];
+                       chroma_addr[1] = chroma_addr[0] + fb->pitches[1];
                }
        } else {
                luma_addr[1] = 0;
@@ -482,6 +482,7 @@ static void vp_video_buffer(struct mixer_context *ctx,
 
        spin_lock_irqsave(&ctx->reg_slock, flags);
 
+       vp_reg_write(ctx, VP_SHADOW_UPDATE, 1);
        /* interlace or progressive scan mode */
        val = (test_bit(MXR_BIT_INTERLACE, &ctx->flags) ? ~0 : 0);
        vp_reg_writemask(ctx, VP_MODE, val, VP_MODE_LINE_SKIP);
@@ -495,21 +496,23 @@ static void vp_video_buffer(struct mixer_context *ctx,
        vp_reg_write(ctx, VP_IMG_SIZE_Y, VP_IMG_HSIZE(fb->pitches[0]) |
                VP_IMG_VSIZE(fb->height));
        /* chroma plane for NV12/NV21 is half the height of the luma plane */
-       vp_reg_write(ctx, VP_IMG_SIZE_C, VP_IMG_HSIZE(fb->pitches[0]) |
+       vp_reg_write(ctx, VP_IMG_SIZE_C, VP_IMG_HSIZE(fb->pitches[1]) |
                VP_IMG_VSIZE(fb->height / 2));
 
        vp_reg_write(ctx, VP_SRC_WIDTH, state->src.w);
-       vp_reg_write(ctx, VP_SRC_HEIGHT, state->src.h);
        vp_reg_write(ctx, VP_SRC_H_POSITION,
                        VP_SRC_H_POSITION_VAL(state->src.x));
-       vp_reg_write(ctx, VP_SRC_V_POSITION, state->src.y);
-
        vp_reg_write(ctx, VP_DST_WIDTH, state->crtc.w);
        vp_reg_write(ctx, VP_DST_H_POSITION, state->crtc.x);
+
        if (test_bit(MXR_BIT_INTERLACE, &ctx->flags)) {
+               vp_reg_write(ctx, VP_SRC_HEIGHT, state->src.h / 2);
+               vp_reg_write(ctx, VP_SRC_V_POSITION, state->src.y / 2);
                vp_reg_write(ctx, VP_DST_HEIGHT, state->crtc.h / 2);
                vp_reg_write(ctx, VP_DST_V_POSITION, state->crtc.y / 2);
        } else {
+               vp_reg_write(ctx, VP_SRC_HEIGHT, state->src.h);
+               vp_reg_write(ctx, VP_SRC_V_POSITION, state->src.y);
                vp_reg_write(ctx, VP_DST_HEIGHT, state->crtc.h);
                vp_reg_write(ctx, VP_DST_V_POSITION, state->crtc.y);
        }
@@ -699,6 +702,15 @@ static irqreturn_t mixer_irq_handler(int irq, void *arg)
 
                /* interlace scan need to check shadow register */
                if (test_bit(MXR_BIT_INTERLACE, &ctx->flags)) {
+                       if (test_bit(MXR_BIT_VP_ENABLED, &ctx->flags) &&
+                           vp_reg_read(ctx, VP_SHADOW_UPDATE))
+                               goto out;
+
+                       base = mixer_reg_read(ctx, MXR_CFG);
+                       shadow = mixer_reg_read(ctx, MXR_CFG_S);
+                       if (base != shadow)
+                               goto out;
+
                        base = mixer_reg_read(ctx, MXR_GRAPHIC_BASE(0));
                        shadow = mixer_reg_read(ctx, MXR_GRAPHIC_BASE_S(0));
                        if (base != shadow)
index c311f571bdf9ec522f938c5cc4891acf9c1026b2..189cfa2470a850a65db8b619d27c8ab58d4e27b1 100644 (file)
@@ -47,6 +47,7 @@
 #define MXR_MO                         0x0304
 #define MXR_RESOLUTION                 0x0310
 
+#define MXR_CFG_S                      0x2004
 #define MXR_GRAPHIC0_BASE_S            0x2024
 #define MXR_GRAPHIC1_BASE_S            0x2044
 
index db6b94dda5dfaede1ebb97cc21b17ea301fdd275..d85939bd7b4719c077d65b97d94ecbe8d229288c 100644 (file)
@@ -1080,6 +1080,7 @@ static int cmd_handler_mi_user_interrupt(struct parser_exec_state *s)
 {
        set_bit(cmd_interrupt_events[s->ring_id].mi_user_interrupt,
                        s->workload->pending_events);
+       patch_value(s, cmd_ptr(s, 0), MI_NOOP);
        return 0;
 }
 
index dd96ffc878acda99ef0a87206391f1f24f269248..6d8180e8d1e21a71916e8b6cad404dab8d8c3257 100644 (file)
@@ -169,6 +169,8 @@ static u8 dpcd_fix_data[DPCD_HEADER_SIZE] = {
 static void emulate_monitor_status_change(struct intel_vgpu *vgpu)
 {
        struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
+       int pipe;
+
        vgpu_vreg_t(vgpu, SDEISR) &= ~(SDE_PORTB_HOTPLUG_CPT |
                        SDE_PORTC_HOTPLUG_CPT |
                        SDE_PORTD_HOTPLUG_CPT);
@@ -267,6 +269,14 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu)
        if (IS_BROADWELL(dev_priv))
                vgpu_vreg_t(vgpu, PCH_ADPA) &= ~ADPA_CRT_HOTPLUG_MONITOR_MASK;
 
+       /* Disable Primary/Sprite/Cursor plane */
+       for_each_pipe(dev_priv, pipe) {
+               vgpu_vreg_t(vgpu, DSPCNTR(pipe)) &= ~DISPLAY_PLANE_ENABLE;
+               vgpu_vreg_t(vgpu, SPRCTL(pipe)) &= ~SPRITE_ENABLE;
+               vgpu_vreg_t(vgpu, CURCNTR(pipe)) &= ~CURSOR_MODE;
+               vgpu_vreg_t(vgpu, CURCNTR(pipe)) |= CURSOR_MODE_DISABLE;
+       }
+
        vgpu_vreg_t(vgpu, PIPECONF(PIPE_A)) |= PIPECONF_ENABLE;
 }
 
index b555eb26f9ceb94a0eb2e3f408849c98c00ad1e9..6f4f8e941fc200aa66972be703a811c17fff93d8 100644 (file)
@@ -323,6 +323,7 @@ static void update_fb_info(struct vfio_device_gfx_plane_info *gvt_dmabuf,
                      struct intel_vgpu_fb_info *fb_info)
 {
        gvt_dmabuf->drm_format = fb_info->drm_format;
+       gvt_dmabuf->drm_format_mod = fb_info->drm_format_mod;
        gvt_dmabuf->width = fb_info->width;
        gvt_dmabuf->height = fb_info->height;
        gvt_dmabuf->stride = fb_info->stride;
index 6b50fe78dc1b8bf3bbede208d801ca5b58a68007..1c120683e9588c5a1d379c370e2760234f2d3ab1 100644 (file)
@@ -245,16 +245,13 @@ int intel_vgpu_decode_primary_plane(struct intel_vgpu *vgpu,
        plane->hw_format = fmt;
 
        plane->base = vgpu_vreg_t(vgpu, DSPSURF(pipe)) & I915_GTT_PAGE_MASK;
-       if (!intel_gvt_ggtt_validate_range(vgpu, plane->base, 0)) {
-               gvt_vgpu_err("invalid gma address: %lx\n",
-                            (unsigned long)plane->base);
+       if (!intel_gvt_ggtt_validate_range(vgpu, plane->base, 0))
                return  -EINVAL;
-       }
 
        plane->base_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm, plane->base);
        if (plane->base_gpa == INTEL_GVT_INVALID_ADDR) {
-               gvt_vgpu_err("invalid gma address: %lx\n",
-                               (unsigned long)plane->base);
+               gvt_vgpu_err("Translate primary plane gma 0x%x to gpa fail\n",
+                               plane->base);
                return  -EINVAL;
        }
 
@@ -371,16 +368,13 @@ int intel_vgpu_decode_cursor_plane(struct intel_vgpu *vgpu,
                        alpha_plane, alpha_force);
 
        plane->base = vgpu_vreg_t(vgpu, CURBASE(pipe)) & I915_GTT_PAGE_MASK;
-       if (!intel_gvt_ggtt_validate_range(vgpu, plane->base, 0)) {
-               gvt_vgpu_err("invalid gma address: %lx\n",
-                            (unsigned long)plane->base);
+       if (!intel_gvt_ggtt_validate_range(vgpu, plane->base, 0))
                return  -EINVAL;
-       }
 
        plane->base_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm, plane->base);
        if (plane->base_gpa == INTEL_GVT_INVALID_ADDR) {
-               gvt_vgpu_err("invalid gma address: %lx\n",
-                               (unsigned long)plane->base);
+               gvt_vgpu_err("Translate cursor plane gma 0x%x to gpa fail\n",
+                               plane->base);
                return  -EINVAL;
        }
 
@@ -476,16 +470,13 @@ int intel_vgpu_decode_sprite_plane(struct intel_vgpu *vgpu,
        plane->drm_format = drm_format;
 
        plane->base = vgpu_vreg_t(vgpu, SPRSURF(pipe)) & I915_GTT_PAGE_MASK;
-       if (!intel_gvt_ggtt_validate_range(vgpu, plane->base, 0)) {
-               gvt_vgpu_err("invalid gma address: %lx\n",
-                            (unsigned long)plane->base);
+       if (!intel_gvt_ggtt_validate_range(vgpu, plane->base, 0))
                return  -EINVAL;
-       }
 
        plane->base_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm, plane->base);
        if (plane->base_gpa == INTEL_GVT_INVALID_ADDR) {
-               gvt_vgpu_err("invalid gma address: %lx\n",
-                               (unsigned long)plane->base);
+               gvt_vgpu_err("Translate sprite plane gma 0x%x to gpa fail\n",
+                               plane->base);
                return  -EINVAL;
        }
 
index d292812315077469848dc536a36fb7fa086d003d..78e55aafc8bca047d42e5f853ed7025af005e96c 100644 (file)
@@ -530,6 +530,16 @@ static void ggtt_set_guest_entry(struct intel_vgpu_mm *mm,
                           false, 0, mm->vgpu);
 }
 
+static void ggtt_get_host_entry(struct intel_vgpu_mm *mm,
+               struct intel_gvt_gtt_entry *entry, unsigned long index)
+{
+       struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
+
+       GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT);
+
+       pte_ops->get_entry(NULL, entry, index, false, 0, mm->vgpu);
+}
+
 static void ggtt_set_host_entry(struct intel_vgpu_mm *mm,
                struct intel_gvt_gtt_entry *entry, unsigned long index)
 {
@@ -1818,6 +1828,18 @@ int intel_vgpu_emulate_ggtt_mmio_read(struct intel_vgpu *vgpu, unsigned int off,
        return ret;
 }
 
+static void ggtt_invalidate_pte(struct intel_vgpu *vgpu,
+               struct intel_gvt_gtt_entry *entry)
+{
+       struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops;
+       unsigned long pfn;
+
+       pfn = pte_ops->get_pfn(entry);
+       if (pfn != vgpu->gvt->gtt.scratch_mfn)
+               intel_gvt_hypervisor_dma_unmap_guest_page(vgpu,
+                                               pfn << PAGE_SHIFT);
+}
+
 static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off,
        void *p_data, unsigned int bytes)
 {
@@ -1844,10 +1866,10 @@ static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off,
 
        memcpy((void *)&e.val64 + (off & (info->gtt_entry_size - 1)), p_data,
                        bytes);
-       m = e;
 
        if (ops->test_present(&e)) {
                gfn = ops->get_pfn(&e);
+               m = e;
 
                /* one PTE update may be issued in multiple writes and the
                 * first write may not construct a valid gfn
@@ -1868,8 +1890,12 @@ static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off,
                        ops->set_pfn(&m, gvt->gtt.scratch_mfn);
                } else
                        ops->set_pfn(&m, dma_addr >> PAGE_SHIFT);
-       } else
+       } else {
+               ggtt_get_host_entry(ggtt_mm, &m, g_gtt_index);
+               ggtt_invalidate_pte(vgpu, &m);
                ops->set_pfn(&m, gvt->gtt.scratch_mfn);
+               ops->clear_present(&m);
+       }
 
 out:
        ggtt_set_host_entry(ggtt_mm, &m, g_gtt_index);
@@ -2030,7 +2056,7 @@ int intel_vgpu_init_gtt(struct intel_vgpu *vgpu)
                return PTR_ERR(gtt->ggtt_mm);
        }
 
-       intel_vgpu_reset_ggtt(vgpu);
+       intel_vgpu_reset_ggtt(vgpu, false);
 
        return create_scratch_page_tree(vgpu);
 }
@@ -2315,17 +2341,19 @@ void intel_vgpu_invalidate_ppgtt(struct intel_vgpu *vgpu)
 /**
  * intel_vgpu_reset_ggtt - reset the GGTT entry
  * @vgpu: a vGPU
+ * @invalidate_old: invalidate old entries
  *
  * This function is called at the vGPU create stage
  * to reset all the GGTT entries.
  *
  */
-void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu)
+void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu, bool invalidate_old)
 {
        struct intel_gvt *gvt = vgpu->gvt;
        struct drm_i915_private *dev_priv = gvt->dev_priv;
        struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops;
        struct intel_gvt_gtt_entry entry = {.type = GTT_TYPE_GGTT_PTE};
+       struct intel_gvt_gtt_entry old_entry;
        u32 index;
        u32 num_entries;
 
@@ -2334,13 +2362,23 @@ void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu)
 
        index = vgpu_aperture_gmadr_base(vgpu) >> PAGE_SHIFT;
        num_entries = vgpu_aperture_sz(vgpu) >> PAGE_SHIFT;
-       while (num_entries--)
+       while (num_entries--) {
+               if (invalidate_old) {
+                       ggtt_get_host_entry(vgpu->gtt.ggtt_mm, &old_entry, index);
+                       ggtt_invalidate_pte(vgpu, &old_entry);
+               }
                ggtt_set_host_entry(vgpu->gtt.ggtt_mm, &entry, index++);
+       }
 
        index = vgpu_hidden_gmadr_base(vgpu) >> PAGE_SHIFT;
        num_entries = vgpu_hidden_sz(vgpu) >> PAGE_SHIFT;
-       while (num_entries--)
+       while (num_entries--) {
+               if (invalidate_old) {
+                       ggtt_get_host_entry(vgpu->gtt.ggtt_mm, &old_entry, index);
+                       ggtt_invalidate_pte(vgpu, &old_entry);
+               }
                ggtt_set_host_entry(vgpu->gtt.ggtt_mm, &entry, index++);
+       }
 
        ggtt_invalidate(dev_priv);
 }
@@ -2360,5 +2398,5 @@ void intel_vgpu_reset_gtt(struct intel_vgpu *vgpu)
         * removing the shadow pages.
         */
        intel_vgpu_destroy_all_ppgtt_mm(vgpu);
-       intel_vgpu_reset_ggtt(vgpu);
+       intel_vgpu_reset_ggtt(vgpu, true);
 }
index a8b369cd352b89d567fb9f8926e58debf76cfc40..3792f2b7f4ff0686832458efcf533248c4aa356d 100644 (file)
@@ -193,7 +193,7 @@ struct intel_vgpu_gtt {
 
 extern int intel_vgpu_init_gtt(struct intel_vgpu *vgpu);
 extern void intel_vgpu_clean_gtt(struct intel_vgpu *vgpu);
-void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu);
+void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu, bool invalidate_old);
 void intel_vgpu_invalidate_ppgtt(struct intel_vgpu *vgpu);
 
 extern int intel_gvt_init_gtt(struct intel_gvt *gvt);
index 8c5d5d005854217057e9bc347db5ce074cc46adb..a33c1c3e4a21e2ff96486b79440ef4553149b324 100644 (file)
@@ -1150,6 +1150,7 @@ static int handle_g2v_notification(struct intel_vgpu *vgpu, int notification)
        switch (notification) {
        case VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE:
                root_entry_type = GTT_TYPE_PPGTT_ROOT_L3_ENTRY;
+               /* fall through */
        case VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE:
                mm = intel_vgpu_get_ppgtt_mm(vgpu, root_entry_type, pdps);
                return PTR_ERR_OR_ZERO(mm);
index c16a492449d7d158a6f2d9d13c42fafa523df962..1466d8769ec9facfc73c9ae4db922d0e09944840 100644 (file)
@@ -1301,7 +1301,7 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd,
 
        }
 
-       return 0;
+       return -ENOTTY;
 }
 
 static ssize_t
index 84ca369f15a5c04f478a716eb7dfb13c96993f43..3b4daafebdcb3fc465696d9a8d88c25f14a2e8ff 100644 (file)
@@ -1105,30 +1105,32 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv)
 
        ret = i915_ggtt_probe_hw(dev_priv);
        if (ret)
-               return ret;
+               goto err_perf;
 
-       /* WARNING: Apparently we must kick fbdev drivers before vgacon,
-        * otherwise the vga fbdev driver falls over. */
+       /*
+        * WARNING: Apparently we must kick fbdev drivers before vgacon,
+        * otherwise the vga fbdev driver falls over.
+        */
        ret = i915_kick_out_firmware_fb(dev_priv);
        if (ret) {
                DRM_ERROR("failed to remove conflicting framebuffer drivers\n");
-               goto out_ggtt;
+               goto err_ggtt;
        }
 
        ret = i915_kick_out_vgacon(dev_priv);
        if (ret) {
                DRM_ERROR("failed to remove conflicting VGA console\n");
-               goto out_ggtt;
+               goto err_ggtt;
        }
 
        ret = i915_ggtt_init_hw(dev_priv);
        if (ret)
-               return ret;
+               goto err_ggtt;
 
        ret = i915_ggtt_enable_hw(dev_priv);
        if (ret) {
                DRM_ERROR("failed to enable GGTT\n");
-               goto out_ggtt;
+               goto err_ggtt;
        }
 
        pci_set_master(pdev);
@@ -1139,7 +1141,7 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv)
                if (ret) {
                        DRM_ERROR("failed to set DMA mask\n");
 
-                       goto out_ggtt;
+                       goto err_ggtt;
                }
        }
 
@@ -1157,7 +1159,7 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv)
                if (ret) {
                        DRM_ERROR("failed to set DMA mask\n");
 
-                       goto out_ggtt;
+                       goto err_ggtt;
                }
        }
 
@@ -1190,13 +1192,14 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv)
 
        ret = intel_gvt_init(dev_priv);
        if (ret)
-               goto out_ggtt;
+               goto err_ggtt;
 
        return 0;
 
-out_ggtt:
+err_ggtt:
        i915_ggtt_cleanup_hw(dev_priv);
-
+err_perf:
+       i915_perf_fini(dev_priv);
        return ret;
 }
 
index 8c170db8495d0317379ba76381042b7828a51551..0414228cd2b5f3c66489eafe7cf1152c7583db25 100644 (file)
@@ -728,7 +728,7 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
 
                err = radix_tree_insert(handles_vma, handle, vma);
                if (unlikely(err)) {
-                       kfree(lut);
+                       kmem_cache_free(eb->i915->luts, lut);
                        goto err_obj;
                }
 
index d596a8302ca3cca529977a10ebc02965855525af..854bd51b9478a59d6b0c18820c85498674842a66 100644 (file)
@@ -778,6 +778,9 @@ i915_gem_userptr_ioctl(struct drm_device *dev,
                            I915_USERPTR_UNSYNCHRONIZED))
                return -EINVAL;
 
+       if (!args->user_size)
+               return -EINVAL;
+
        if (offset_in_page(args->user_ptr | args->user_size))
                return -EINVAL;
 
index d8feb9053e0cc8e1bc5552189b9ff020c6e7495c..f0519e31543a6506d4eaf3d3213311c81be2ea1c 100644 (file)
@@ -473,20 +473,37 @@ static u64 get_rc6(struct drm_i915_private *i915)
                spin_lock_irqsave(&i915->pmu.lock, flags);
                spin_lock(&kdev->power.lock);
 
-               if (!i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur)
-                       i915->pmu.suspended_jiffies_last =
-                                               kdev->power.suspended_jiffies;
+               /*
+                * After the above branch intel_runtime_pm_get_if_in_use failed
+                * to get the runtime PM reference we cannot assume we are in
+                * runtime suspend since we can either: a) race with coming out
+                * of it before we took the power.lock, or b) there are other
+                * states than suspended which can bring us here.
+                *
+                * We need to double-check that we are indeed currently runtime
+                * suspended and if not we cannot do better than report the last
+                * known RC6 value.
+                */
+               if (kdev->power.runtime_status == RPM_SUSPENDED) {
+                       if (!i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur)
+                               i915->pmu.suspended_jiffies_last =
+                                                 kdev->power.suspended_jiffies;
 
-               val = kdev->power.suspended_jiffies -
-                     i915->pmu.suspended_jiffies_last;
-               val += jiffies - kdev->power.accounting_timestamp;
+                       val = kdev->power.suspended_jiffies -
+                             i915->pmu.suspended_jiffies_last;
+                       val += jiffies - kdev->power.accounting_timestamp;
 
-               spin_unlock(&kdev->power.lock);
+                       val = jiffies_to_nsecs(val);
+                       val += i915->pmu.sample[__I915_SAMPLE_RC6].cur;
 
-               val = jiffies_to_nsecs(val);
-               val += i915->pmu.sample[__I915_SAMPLE_RC6].cur;
-               i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val;
+                       i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val;
+               } else if (i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
+                       val = i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
+               } else {
+                       val = i915->pmu.sample[__I915_SAMPLE_RC6].cur;
+               }
 
+               spin_unlock(&kdev->power.lock);
                spin_unlock_irqrestore(&i915->pmu.lock, flags);
        }
 
index e6a8c0ee7df113704d103eab2b1f7586253970f7..8a69a9275e28d3c87a4443bb950d7ab2d6351a9c 100644 (file)
@@ -7326,6 +7326,9 @@ enum {
 #define SLICE_ECO_CHICKEN0                     _MMIO(0x7308)
 #define   PIXEL_MASK_CAMMING_DISABLE           (1 << 14)
 
+#define GEN9_WM_CHICKEN3                       _MMIO(0x5588)
+#define   GEN9_FACTOR_IN_CLR_VAL_HIZ           (1 << 9)
+
 /* WaCatErrorRejectionIssue */
 #define GEN7_SQ_CHICKEN_MBCUNIT_CONFIG         _MMIO(0x9030)
 #define  GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB      (1<<11)
index 709d6ca680747d8d4458c72945093a8052d6f8c4..3ea566f99450e37f10317712b651b8104bda2a28 100644 (file)
@@ -729,7 +729,7 @@ static void i915_audio_component_codec_wake_override(struct device *kdev,
        struct drm_i915_private *dev_priv = kdev_to_i915(kdev);
        u32 tmp;
 
-       if (!IS_GEN9_BC(dev_priv))
+       if (!IS_GEN9(dev_priv))
                return;
 
        i915_audio_component_get_power(kdev);
index c5c7530ba1570618d06a04f789029944296366de..447b721c3be9c2da800d0c39fb4208d1ab93e913 100644 (file)
@@ -1256,7 +1256,6 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port,
                return;
 
        aux_channel = child->aux_channel;
-       ddc_pin = child->ddc_pin;
 
        is_dvi = child->device_type & DEVICE_TYPE_TMDS_DVI_SIGNALING;
        is_dp = child->device_type & DEVICE_TYPE_DISPLAYPORT_OUTPUT;
@@ -1303,9 +1302,15 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port,
                DRM_DEBUG_KMS("Port %c is internal DP\n", port_name(port));
 
        if (is_dvi) {
-               info->alternate_ddc_pin = map_ddc_pin(dev_priv, ddc_pin);
-
-               sanitize_ddc_pin(dev_priv, port);
+               ddc_pin = map_ddc_pin(dev_priv, child->ddc_pin);
+               if (intel_gmbus_is_valid_pin(dev_priv, ddc_pin)) {
+                       info->alternate_ddc_pin = ddc_pin;
+                       sanitize_ddc_pin(dev_priv, port);
+               } else {
+                       DRM_DEBUG_KMS("Port %c has invalid DDC pin %d, "
+                                     "sticking to defaults\n",
+                                     port_name(port), ddc_pin);
+               }
        }
 
        if (is_dp) {
index fc8b2c6e35088410a61f81eb317bfaa09848d6cf..704ddb4d3ca7ef6f40c565eeb07a5cbe8c427a97 100644 (file)
@@ -2140,10 +2140,22 @@ int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state)
                }
        }
 
-       /* According to BSpec, "The CD clock frequency must be at least twice
+       /*
+        * According to BSpec, "The CD clock frequency must be at least twice
         * the frequency of the Azalia BCLK." and BCLK is 96 MHz by default.
+        *
+        * FIXME: Check the actual, not default, BCLK being used.
+        *
+        * FIXME: This does not depend on ->has_audio because the higher CDCLK
+        * is required for audio probe, also when there are no audio capable
+        * displays connected at probe time. This leads to unnecessarily high
+        * CDCLK when audio is not required.
+        *
+        * FIXME: This limit is only applied when there are displays connected
+        * at probe time. If we probe without displays, we'll still end up using
+        * the platform minimum CDCLK, failing audio probe.
         */
-       if (crtc_state->has_audio && INTEL_GEN(dev_priv) >= 9)
+       if (INTEL_GEN(dev_priv) >= 9)
                min_cdclk = max(2 * 96000, min_cdclk);
 
        /*
@@ -2290,9 +2302,44 @@ static int bdw_modeset_calc_cdclk(struct drm_atomic_state *state)
        return 0;
 }
 
+static int skl_dpll0_vco(struct intel_atomic_state *intel_state)
+{
+       struct drm_i915_private *dev_priv = to_i915(intel_state->base.dev);
+       struct intel_crtc *crtc;
+       struct intel_crtc_state *crtc_state;
+       int vco, i;
+
+       vco = intel_state->cdclk.logical.vco;
+       if (!vco)
+               vco = dev_priv->skl_preferred_vco_freq;
+
+       for_each_new_intel_crtc_in_state(intel_state, crtc, crtc_state, i) {
+               if (!crtc_state->base.enable)
+                       continue;
+
+               if (!intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP))
+                       continue;
+
+               /*
+                * DPLL0 VCO may need to be adjusted to get the correct
+                * clock for eDP. This will affect cdclk as well.
+                */
+               switch (crtc_state->port_clock / 2) {
+               case 108000:
+               case 216000:
+                       vco = 8640000;
+                       break;
+               default:
+                       vco = 8100000;
+                       break;
+               }
+       }
+
+       return vco;
+}
+
 static int skl_modeset_calc_cdclk(struct drm_atomic_state *state)
 {
-       struct drm_i915_private *dev_priv = to_i915(state->dev);
        struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
        int min_cdclk, cdclk, vco;
 
@@ -2300,9 +2347,7 @@ static int skl_modeset_calc_cdclk(struct drm_atomic_state *state)
        if (min_cdclk < 0)
                return min_cdclk;
 
-       vco = intel_state->cdclk.logical.vco;
-       if (!vco)
-               vco = dev_priv->skl_preferred_vco_freq;
+       vco = skl_dpll0_vco(intel_state);
 
        /*
         * FIXME should also account for plane ratio
index 41e6c75a7f3c6c4cdc55527230b594b6a68c7b2a..f9550ea46c26093c2865e1001de046be5a425eb8 100644 (file)
@@ -35,6 +35,7 @@
  */
 
 #define I915_CSR_GLK "i915/glk_dmc_ver1_04.bin"
+MODULE_FIRMWARE(I915_CSR_GLK);
 #define GLK_CSR_VERSION_REQUIRED       CSR_VERSION(1, 4)
 
 #define I915_CSR_CNL "i915/cnl_dmc_ver1_07.bin"
index 3b48fd2561feccc48b5e0c549ee68e5d25214555..56004ffbd8bbf4499f87e92823ba10e38eff55da 100644 (file)
@@ -15178,6 +15178,8 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev)
                memset(&crtc->base.mode, 0, sizeof(crtc->base.mode));
                if (crtc_state->base.active) {
                        intel_mode_from_pipe_config(&crtc->base.mode, crtc_state);
+                       crtc->base.mode.hdisplay = crtc_state->pipe_src_w;
+                       crtc->base.mode.vdisplay = crtc_state->pipe_src_h;
                        intel_mode_from_pipe_config(&crtc_state->base.adjusted_mode, crtc_state);
                        WARN_ON(drm_atomic_set_mode_for_crtc(crtc->base.state, &crtc->base.mode));
 
index 9a4a51e79fa12a8c75a773258d33db5214655ea6..b7b4cfdeb974a67c7eba222504ea8474d8bcd0df 100644 (file)
@@ -1881,26 +1881,6 @@ intel_dp_compute_config(struct intel_encoder *encoder,
                                reduce_m_n);
        }
 
-       /*
-        * DPLL0 VCO may need to be adjusted to get the correct
-        * clock for eDP. This will affect cdclk as well.
-        */
-       if (intel_dp_is_edp(intel_dp) && IS_GEN9_BC(dev_priv)) {
-               int vco;
-
-               switch (pipe_config->port_clock / 2) {
-               case 108000:
-               case 216000:
-                       vco = 8640000;
-                       break;
-               default:
-                       vco = 8100000;
-                       break;
-               }
-
-               to_intel_atomic_state(pipe_config->base.state)->cdclk.logical.vco = vco;
-       }
-
        if (!HAS_DDI(dev_priv))
                intel_dp_set_clock(encoder, pipe_config);
 
index d4368589b3553657e14adbcc9c54a56ed5f30865..a80fbad9be0fe4defd46c597d2e76f9b479228b4 100644 (file)
  * check the condition before the timeout.
  */
 #define __wait_for(OP, COND, US, Wmin, Wmax) ({ \
-       unsigned long timeout__ = jiffies + usecs_to_jiffies(US) + 1;   \
+       const ktime_t end__ = ktime_add_ns(ktime_get_raw(), 1000ll * (US)); \
        long wait__ = (Wmin); /* recommended min for usleep is 10 us */ \
        int ret__;                                                      \
        might_sleep();                                                  \
        for (;;) {                                                      \
-               bool expired__ = time_after(jiffies, timeout__);        \
+               const bool expired__ = ktime_after(ktime_get_raw(), end__); \
                OP;                                                     \
                if (COND) {                                             \
                        ret__ = 0;                                      \
index 4ba139c27fbad43cb81435229e92bab7eba6a1e1..f7c25828d3bbacb743ecd468177cc510a5f16d4c 100644 (file)
@@ -1149,6 +1149,10 @@ static int gen9_init_workarounds(struct intel_engine_cs *engine)
        WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_GPGPU_LEVEL_MASK,
                            GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
 
+       /* WaClearHIZ_WM_CHICKEN3:bxt,glk */
+       if (IS_GEN9_LP(dev_priv))
+               WA_SET_BIT_MASKED(GEN9_WM_CHICKEN3, GEN9_FACTOR_IN_CLR_VAL_HIZ);
+
        /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */
        ret = wa_ring_whitelist_reg(engine, GEN9_CTX_PREEMPT_REG);
        if (ret)
index 6f12adc063650c1ad4f696922dee1215a98398c0..6467a5cc2ca30e1ac33cca2c2dc08177e886ae1a 100644 (file)
@@ -806,7 +806,7 @@ void intel_fbdev_output_poll_changed(struct drm_device *dev)
                return;
 
        intel_fbdev_sync(ifbdev);
-       if (ifbdev->vma)
+       if (ifbdev->vma || ifbdev->helper.deferred_setup)
                drm_fb_helper_hotplug_event(&ifbdev->helper);
 }
 
index 697af5add78bac2668987809fdbd94bc08aa0dc7..8704f7f8d0725664d41140dc2a3cd5e35d1aa612 100644 (file)
@@ -577,6 +577,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
                 * know the next preemption status we see corresponds
                 * to this ELSP update.
                 */
+               GEM_BUG_ON(!execlists_is_active(execlists,
+                                               EXECLISTS_ACTIVE_USER));
                GEM_BUG_ON(!port_count(&port[0]));
                if (port_count(&port[0]) > 1)
                        goto unlock;
@@ -738,6 +740,8 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists)
                memset(port, 0, sizeof(*port));
                port++;
        }
+
+       execlists_clear_active(execlists, EXECLISTS_ACTIVE_USER);
 }
 
 static void execlists_cancel_requests(struct intel_engine_cs *engine)
@@ -880,6 +884,7 @@ static void execlists_submission_tasklet(unsigned long data)
 
                        head = execlists->csb_head;
                        tail = READ_ONCE(buf[write_idx]);
+                       rmb(); /* Hopefully paired with a wmb() in HW */
                }
                GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n",
                          engine->name,
@@ -1001,6 +1006,11 @@ static void execlists_submission_tasklet(unsigned long data)
 
        if (fw)
                intel_uncore_forcewake_put(dev_priv, execlists->fw_domains);
+
+       /* If the engine is now idle, so should be the flag; and vice versa. */
+       GEM_BUG_ON(execlists_is_active(&engine->execlists,
+                                      EXECLISTS_ACTIVE_USER) ==
+                  !port_isset(engine->execlists.port));
 }
 
 static void queue_request(struct intel_engine_cs *engine,
index d35d2d50f5951542bc58ec8bd19a51952300bb16..8691c86f579c7dc2eba793af80cd4ca0ce392595 100644 (file)
@@ -326,7 +326,8 @@ static void intel_enable_lvds(struct intel_encoder *encoder,
 
        I915_WRITE(PP_CONTROL(0), I915_READ(PP_CONTROL(0)) | PANEL_POWER_ON);
        POSTING_READ(lvds_encoder->reg);
-       if (intel_wait_for_register(dev_priv, PP_STATUS(0), PP_ON, PP_ON, 1000))
+
+       if (intel_wait_for_register(dev_priv, PP_STATUS(0), PP_ON, PP_ON, 5000))
                DRM_ERROR("timed out waiting for panel to power on\n");
 
        intel_panel_enable_backlight(pipe_config, conn_state);
index 53ea564f971ef6acd0ecf01d79254f2993450a78..66de4b2dc8b75c7f4faebd3c8d2f8af54c6a6871 100644 (file)
@@ -641,19 +641,18 @@ void skl_enable_dc6(struct drm_i915_private *dev_priv)
 
        DRM_DEBUG_KMS("Enabling DC6\n");
 
-       gen9_set_dc_state(dev_priv, DC_STATE_EN_UPTO_DC6);
+       /* Wa Display #1183: skl,kbl,cfl */
+       if (IS_GEN9_BC(dev_priv))
+               I915_WRITE(GEN8_CHICKEN_DCPR_1, I915_READ(GEN8_CHICKEN_DCPR_1) |
+                          SKL_SELECT_ALTERNATE_DC_EXIT);
 
+       gen9_set_dc_state(dev_priv, DC_STATE_EN_UPTO_DC6);
 }
 
 void skl_disable_dc6(struct drm_i915_private *dev_priv)
 {
        DRM_DEBUG_KMS("Disabling DC6\n");
 
-       /* Wa Display #1183: skl,kbl,cfl */
-       if (IS_GEN9_BC(dev_priv))
-               I915_WRITE(GEN8_CHICKEN_DCPR_1, I915_READ(GEN8_CHICKEN_DCPR_1) |
-                          SKL_SELECT_ALTERNATE_DC_EXIT);
-
        gen9_set_dc_state(dev_priv, DC_STATE_DISABLE);
 }
 
index 6e5e1aa54ce19774340bd752785f25b8dcd20389..b001699297c486ab075a76bedd3c27d0c95572ee 100644 (file)
@@ -351,6 +351,7 @@ static void mdp4_crtc_atomic_flush(struct drm_crtc *crtc,
 
        spin_lock_irqsave(&dev->event_lock, flags);
        mdp4_crtc->event = crtc->state->event;
+       crtc->state->event = NULL;
        spin_unlock_irqrestore(&dev->event_lock, flags);
 
        blend_setup(crtc);
index 9893e43ba6c5e530af187b1114f853f87c1fcd88..76b96081916f0b9567a8b021d890cd572cad0534 100644 (file)
@@ -708,6 +708,7 @@ static void mdp5_crtc_atomic_flush(struct drm_crtc *crtc,
 
        spin_lock_irqsave(&dev->event_lock, flags);
        mdp5_crtc->event = crtc->state->event;
+       crtc->state->event = NULL;
        spin_unlock_irqrestore(&dev->event_lock, flags);
 
        /*
index b4a8aa4490eed8ced3aaf167d4172331abcd5d4f..005760bee708ea5d0c59c1935df0b4224c334ee5 100644 (file)
@@ -171,7 +171,8 @@ uint32_t mdp_get_formats(uint32_t *pixel_formats, uint32_t max_formats,
        return i;
 }
 
-const struct msm_format *mdp_get_format(struct msm_kms *kms, uint32_t format)
+const struct msm_format *mdp_get_format(struct msm_kms *kms, uint32_t format,
+               uint64_t modifier)
 {
        int i;
        for (i = 0; i < ARRAY_SIZE(formats); i++) {
index 1185487e7e5e36c6f208a9e6254a17aec1931700..4fa8dbe4e165cdd834ffc76d08949b073b3c61cf 100644 (file)
@@ -98,7 +98,7 @@ struct mdp_format {
 #define MDP_FORMAT_IS_YUV(mdp_format) ((mdp_format)->is_yuv)
 
 uint32_t mdp_get_formats(uint32_t *formats, uint32_t max_formats, bool rgb_only);
-const struct msm_format *mdp_get_format(struct msm_kms *kms, uint32_t format);
+const struct msm_format *mdp_get_format(struct msm_kms *kms, uint32_t format, uint64_t modifier);
 
 /* MDP capabilities */
 #define MDP_CAP_SMP            BIT(0)  /* Shared Memory Pool                 */
index 7a03a94897088a474aa966ea86f778e5dde238e9..8baba30d6c659cfde69c52cd69b2adc3203c54a9 100644 (file)
@@ -173,6 +173,7 @@ struct msm_dsi_host {
 
        bool registered;
        bool power_on;
+       bool enabled;
        int irq;
 };
 
@@ -775,7 +776,7 @@ static inline enum dsi_cmd_dst_format dsi_get_cmd_fmt(
        switch (mipi_fmt) {
        case MIPI_DSI_FMT_RGB888:       return CMD_DST_FORMAT_RGB888;
        case MIPI_DSI_FMT_RGB666_PACKED:
-       case MIPI_DSI_FMT_RGB666:       return VID_DST_FORMAT_RGB666;
+       case MIPI_DSI_FMT_RGB666:       return CMD_DST_FORMAT_RGB666;
        case MIPI_DSI_FMT_RGB565:       return CMD_DST_FORMAT_RGB565;
        default:                        return CMD_DST_FORMAT_RGB888;
        }
@@ -986,13 +987,19 @@ static void dsi_set_tx_power_mode(int mode, struct msm_dsi_host *msm_host)
 
 static void dsi_wait4video_done(struct msm_dsi_host *msm_host)
 {
+       u32 ret = 0;
+       struct device *dev = &msm_host->pdev->dev;
+
        dsi_intr_ctrl(msm_host, DSI_IRQ_MASK_VIDEO_DONE, 1);
 
        reinit_completion(&msm_host->video_comp);
 
-       wait_for_completion_timeout(&msm_host->video_comp,
+       ret = wait_for_completion_timeout(&msm_host->video_comp,
                        msecs_to_jiffies(70));
 
+       if (ret <= 0)
+               dev_err(dev, "wait for video done timed out\n");
+
        dsi_intr_ctrl(msm_host, DSI_IRQ_MASK_VIDEO_DONE, 0);
 }
 
@@ -1001,7 +1008,7 @@ static void dsi_wait4video_eng_busy(struct msm_dsi_host *msm_host)
        if (!(msm_host->mode_flags & MIPI_DSI_MODE_VIDEO))
                return;
 
-       if (msm_host->power_on) {
+       if (msm_host->power_on && msm_host->enabled) {
                dsi_wait4video_done(msm_host);
                /* delay 4 ms to skip BLLP */
                usleep_range(2000, 4000);
@@ -2203,7 +2210,7 @@ int msm_dsi_host_enable(struct mipi_dsi_host *host)
         *      pm_runtime_put_autosuspend(&msm_host->pdev->dev);
         * }
         */
-
+       msm_host->enabled = true;
        return 0;
 }
 
@@ -2211,6 +2218,7 @@ int msm_dsi_host_disable(struct mipi_dsi_host *host)
 {
        struct msm_dsi_host *msm_host = to_msm_dsi_host(host);
 
+       msm_host->enabled = false;
        dsi_op_mode_config(msm_host,
                !!(msm_host->mode_flags & MIPI_DSI_MODE_VIDEO), false);
 
index 8e9d5c255820272ebe0a759ed51e4aa0ee6047b9..9a9fa0c75a131083f32c57f5cd89a0e02c7b87d1 100644 (file)
@@ -265,6 +265,115 @@ int msm_dsi_dphy_timing_calc_v2(struct msm_dsi_dphy_timing *timing,
        return 0;
 }
 
+int msm_dsi_dphy_timing_calc_v3(struct msm_dsi_dphy_timing *timing,
+       struct msm_dsi_phy_clk_request *clk_req)
+{
+       const unsigned long bit_rate = clk_req->bitclk_rate;
+       const unsigned long esc_rate = clk_req->escclk_rate;
+       s32 ui, ui_x8, lpx;
+       s32 tmax, tmin;
+       s32 pcnt0 = 50;
+       s32 pcnt1 = 50;
+       s32 pcnt2 = 10;
+       s32 pcnt3 = 30;
+       s32 pcnt4 = 10;
+       s32 pcnt5 = 2;
+       s32 coeff = 1000; /* Precision, should avoid overflow */
+       s32 hb_en, hb_en_ckln;
+       s32 temp;
+
+       if (!bit_rate || !esc_rate)
+               return -EINVAL;
+
+       timing->hs_halfbyte_en = 0;
+       hb_en = 0;
+       timing->hs_halfbyte_en_ckln = 0;
+       hb_en_ckln = 0;
+
+       ui = mult_frac(NSEC_PER_MSEC, coeff, bit_rate / 1000);
+       ui_x8 = ui << 3;
+       lpx = mult_frac(NSEC_PER_MSEC, coeff, esc_rate / 1000);
+
+       temp = S_DIV_ROUND_UP(38 * coeff, ui_x8);
+       tmin = max_t(s32, temp, 0);
+       temp = (95 * coeff) / ui_x8;
+       tmax = max_t(s32, temp, 0);
+       timing->clk_prepare = linear_inter(tmax, tmin, pcnt0, 0, false);
+
+       temp = 300 * coeff - (timing->clk_prepare << 3) * ui;
+       tmin = S_DIV_ROUND_UP(temp, ui_x8) - 1;
+       tmax = (tmin > 255) ? 511 : 255;
+       timing->clk_zero = linear_inter(tmax, tmin, pcnt5, 0, false);
+
+       tmin = DIV_ROUND_UP(60 * coeff + 3 * ui, ui_x8);
+       temp = 105 * coeff + 12 * ui - 20 * coeff;
+       tmax = (temp + 3 * ui) / ui_x8;
+       timing->clk_trail = linear_inter(tmax, tmin, pcnt3, 0, false);
+
+       temp = S_DIV_ROUND_UP(40 * coeff + 4 * ui, ui_x8);
+       tmin = max_t(s32, temp, 0);
+       temp = (85 * coeff + 6 * ui) / ui_x8;
+       tmax = max_t(s32, temp, 0);
+       timing->hs_prepare = linear_inter(tmax, tmin, pcnt1, 0, false);
+
+       temp = 145 * coeff + 10 * ui - (timing->hs_prepare << 3) * ui;
+       tmin = S_DIV_ROUND_UP(temp, ui_x8) - 1;
+       tmax = 255;
+       timing->hs_zero = linear_inter(tmax, tmin, pcnt4, 0, false);
+
+       tmin = DIV_ROUND_UP(60 * coeff + 4 * ui, ui_x8) - 1;
+       temp = 105 * coeff + 12 * ui - 20 * coeff;
+       tmax = (temp / ui_x8) - 1;
+       timing->hs_trail = linear_inter(tmax, tmin, pcnt3, 0, false);
+
+       temp = 50 * coeff + ((hb_en << 2) - 8) * ui;
+       timing->hs_rqst = S_DIV_ROUND_UP(temp, ui_x8);
+
+       tmin = DIV_ROUND_UP(100 * coeff, ui_x8) - 1;
+       tmax = 255;
+       timing->hs_exit = linear_inter(tmax, tmin, pcnt2, 0, false);
+
+       temp = 50 * coeff + ((hb_en_ckln << 2) - 8) * ui;
+       timing->hs_rqst_ckln = S_DIV_ROUND_UP(temp, ui_x8);
+
+       temp = 60 * coeff + 52 * ui - 43 * ui;
+       tmin = DIV_ROUND_UP(temp, ui_x8) - 1;
+       tmax = 63;
+       timing->shared_timings.clk_post =
+               linear_inter(tmax, tmin, pcnt2, 0, false);
+
+       temp = 8 * ui + (timing->clk_prepare << 3) * ui;
+       temp += (((timing->clk_zero + 3) << 3) + 11) * ui;
+       temp += hb_en_ckln ? (((timing->hs_rqst_ckln << 3) + 4) * ui) :
+               (((timing->hs_rqst_ckln << 3) + 8) * ui);
+       tmin = S_DIV_ROUND_UP(temp, ui_x8) - 1;
+       tmax = 63;
+       if (tmin > tmax) {
+               temp = linear_inter(tmax << 1, tmin, pcnt2, 0, false);
+               timing->shared_timings.clk_pre = temp >> 1;
+               timing->shared_timings.clk_pre_inc_by_2 = 1;
+       } else {
+               timing->shared_timings.clk_pre =
+                       linear_inter(tmax, tmin, pcnt2, 0, false);
+                       timing->shared_timings.clk_pre_inc_by_2 = 0;
+       }
+
+       timing->ta_go = 3;
+       timing->ta_sure = 0;
+       timing->ta_get = 4;
+
+       DBG("%d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d",
+               timing->shared_timings.clk_pre, timing->shared_timings.clk_post,
+               timing->shared_timings.clk_pre_inc_by_2, timing->clk_zero,
+               timing->clk_trail, timing->clk_prepare, timing->hs_exit,
+               timing->hs_zero, timing->hs_prepare, timing->hs_trail,
+               timing->hs_rqst, timing->hs_rqst_ckln, timing->hs_halfbyte_en,
+               timing->hs_halfbyte_en_ckln, timing->hs_prep_dly,
+               timing->hs_prep_dly_ckln);
+
+       return 0;
+}
+
 void msm_dsi_phy_set_src_pll(struct msm_dsi_phy *phy, int pll_id, u32 reg,
                                u32 bit_mask)
 {
index c56268cbdb3d81925410be764515d0f6be8a9864..a24ab80994a3c08710723f6a0325512785196a84 100644 (file)
@@ -101,6 +101,8 @@ int msm_dsi_dphy_timing_calc(struct msm_dsi_dphy_timing *timing,
                             struct msm_dsi_phy_clk_request *clk_req);
 int msm_dsi_dphy_timing_calc_v2(struct msm_dsi_dphy_timing *timing,
                                struct msm_dsi_phy_clk_request *clk_req);
+int msm_dsi_dphy_timing_calc_v3(struct msm_dsi_dphy_timing *timing,
+                               struct msm_dsi_phy_clk_request *clk_req);
 void msm_dsi_phy_set_src_pll(struct msm_dsi_phy *phy, int pll_id, u32 reg,
                                u32 bit_mask);
 int msm_dsi_phy_init_common(struct msm_dsi_phy *phy);
index 0af951aaeea1e2d563d4bd2238241e87791a65f7..b3fffc8dbb2ab572aa688aabe8ef4546728e3fb8 100644 (file)
@@ -79,34 +79,6 @@ static void dsi_phy_hw_v3_0_lane_settings(struct msm_dsi_phy *phy)
        dsi_phy_write(lane_base + REG_DSI_10nm_PHY_LN_TX_DCTRL(3), 0x04);
 }
 
-static int msm_dsi_dphy_timing_calc_v3(struct msm_dsi_dphy_timing *timing,
-                                      struct msm_dsi_phy_clk_request *clk_req)
-{
-       /*
-        * TODO: These params need to be computed, they're currently hardcoded
-        * for a 1440x2560@60Hz panel with a byteclk of 100.618 Mhz, and a
-        * default escape clock of 19.2 Mhz.
-        */
-
-       timing->hs_halfbyte_en = 0;
-       timing->clk_zero = 0x1c;
-       timing->clk_prepare = 0x07;
-       timing->clk_trail = 0x07;
-       timing->hs_exit = 0x23;
-       timing->hs_zero = 0x21;
-       timing->hs_prepare = 0x07;
-       timing->hs_trail = 0x07;
-       timing->hs_rqst = 0x05;
-       timing->ta_sure = 0x00;
-       timing->ta_go = 0x03;
-       timing->ta_get = 0x04;
-
-       timing->shared_timings.clk_pre = 0x2d;
-       timing->shared_timings.clk_post = 0x0d;
-
-       return 0;
-}
-
 static int dsi_10nm_phy_enable(struct msm_dsi_phy *phy, int src_pll_id,
                               struct msm_dsi_phy_clk_request *clk_req)
 {
index 0e0c87252ab07c9777cd5f623e71d92c86ea0175..7a16242bf8bf28bb5d7493fb47ded89c754f7092 100644 (file)
@@ -183,7 +183,8 @@ static struct drm_framebuffer *msm_framebuffer_init(struct drm_device *dev,
        hsub = drm_format_horz_chroma_subsampling(mode_cmd->pixel_format);
        vsub = drm_format_vert_chroma_subsampling(mode_cmd->pixel_format);
 
-       format = kms->funcs->get_format(kms, mode_cmd->pixel_format);
+       format = kms->funcs->get_format(kms, mode_cmd->pixel_format,
+                       mode_cmd->modifier[0]);
        if (!format) {
                dev_err(dev->dev, "unsupported pixel format: %4.4s\n",
                                (char *)&mode_cmd->pixel_format);
index c178563fcd4dc56a2d1fbdaf4197c847b5c11904..456622b4633558b7d0d98a1d57729aac9d33c57f 100644 (file)
@@ -92,8 +92,7 @@ static int msm_fbdev_create(struct drm_fb_helper *helper,
 
        if (IS_ERR(fb)) {
                dev_err(dev->dev, "failed to allocate fb\n");
-               ret = PTR_ERR(fb);
-               goto fail;
+               return PTR_ERR(fb);
        }
 
        bo = msm_framebuffer_bo(fb, 0);
@@ -151,13 +150,7 @@ static int msm_fbdev_create(struct drm_fb_helper *helper,
 
 fail_unlock:
        mutex_unlock(&dev->struct_mutex);
-fail:
-
-       if (ret) {
-               if (fb)
-                       drm_framebuffer_remove(fb);
-       }
-
+       drm_framebuffer_remove(fb);
        return ret;
 }
 
index 95196479f651b18697229765432a0493f6b7d4ce..f583bb4222f9ad13dc17d74ec13d65ad16734506 100644 (file)
@@ -132,17 +132,19 @@ static void put_pages(struct drm_gem_object *obj)
        struct msm_gem_object *msm_obj = to_msm_bo(obj);
 
        if (msm_obj->pages) {
-               /* For non-cached buffers, ensure the new pages are clean
-                * because display controller, GPU, etc. are not coherent:
-                */
-               if (msm_obj->flags & (MSM_BO_WC|MSM_BO_UNCACHED))
-                       dma_unmap_sg(obj->dev->dev, msm_obj->sgt->sgl,
-                                       msm_obj->sgt->nents, DMA_BIDIRECTIONAL);
+               if (msm_obj->sgt) {
+                       /* For non-cached buffers, ensure the new
+                        * pages are clean because display controller,
+                        * GPU, etc. are not coherent:
+                        */
+                       if (msm_obj->flags & (MSM_BO_WC|MSM_BO_UNCACHED))
+                               dma_unmap_sg(obj->dev->dev, msm_obj->sgt->sgl,
+                                            msm_obj->sgt->nents,
+                                            DMA_BIDIRECTIONAL);
 
-               if (msm_obj->sgt)
                        sg_free_table(msm_obj->sgt);
-
-               kfree(msm_obj->sgt);
+                       kfree(msm_obj->sgt);
+               }
 
                if (use_pages(obj))
                        drm_gem_put_pages(obj, msm_obj->pages, true, false);
index 17d5824417ad32a0fbddaf94069a0bb453c8403c..aaa329dc020ece445a5334329ae8ab34f68c8035 100644 (file)
@@ -48,8 +48,11 @@ struct msm_kms_funcs {
        /* functions to wait for atomic commit completed on each CRTC */
        void (*wait_for_crtc_commit_done)(struct msm_kms *kms,
                                        struct drm_crtc *crtc);
+       /* get msm_format w/ optional format modifiers from drm_mode_fb_cmd2 */
+       const struct msm_format *(*get_format)(struct msm_kms *kms,
+                                       const uint32_t format,
+                                       const uint64_t modifiers);
        /* misc: */
-       const struct msm_format *(*get_format)(struct msm_kms *kms, uint32_t format);
        long (*round_pixclk)(struct msm_kms *kms, unsigned long rate,
                        struct drm_encoder *encoder);
        int (*set_split_display)(struct msm_kms *kms,
index 6f402c4f2bdd695cfe5d9344bcabba12e5a30e12..ab61c038f42c9d3eae6bc5c67ea3a9920cda8766 100644 (file)
@@ -214,7 +214,6 @@ nouveau_bo_new(struct nouveau_cli *cli, u64 size, int align,
        INIT_LIST_HEAD(&nvbo->entry);
        INIT_LIST_HEAD(&nvbo->vma_list);
        nvbo->bo.bdev = &drm->ttm.bdev;
-       nvbo->cli = cli;
 
        /* This is confusing, and doesn't actually mean we want an uncached
         * mapping, but is what NOUVEAU_GEM_DOMAIN_COHERENT gets translated
index be8e00b49cdef1a961a7fcbd3f0cc25770be24cb..73c48440d4d7fb53cb3b09d6cb933b330489b0e0 100644 (file)
@@ -26,8 +26,6 @@ struct nouveau_bo {
 
        struct list_head vma_list;
 
-       struct nouveau_cli *cli;
-
        unsigned contig:1;
        unsigned page:5;
        unsigned kind:8;
index dff51a0ee0281e8f5924ffc0135d8b4baf8542f9..8c093ca4222e25b04b52096d4066159cdeb65451 100644 (file)
@@ -63,7 +63,7 @@ nouveau_vram_manager_new(struct ttm_mem_type_manager *man,
                         struct ttm_mem_reg *reg)
 {
        struct nouveau_bo *nvbo = nouveau_bo(bo);
-       struct nouveau_drm *drm = nvbo->cli->drm;
+       struct nouveau_drm *drm = nouveau_bdev(bo->bdev);
        struct nouveau_mem *mem;
        int ret;
 
@@ -103,7 +103,7 @@ nouveau_gart_manager_new(struct ttm_mem_type_manager *man,
                         struct ttm_mem_reg *reg)
 {
        struct nouveau_bo *nvbo = nouveau_bo(bo);
-       struct nouveau_drm *drm = nvbo->cli->drm;
+       struct nouveau_drm *drm = nouveau_bdev(bo->bdev);
        struct nouveau_mem *mem;
        int ret;
 
@@ -131,7 +131,7 @@ nv04_gart_manager_new(struct ttm_mem_type_manager *man,
                      struct ttm_mem_reg *reg)
 {
        struct nouveau_bo *nvbo = nouveau_bo(bo);
-       struct nouveau_drm *drm = nvbo->cli->drm;
+       struct nouveau_drm *drm = nouveau_bdev(bo->bdev);
        struct nouveau_mem *mem;
        int ret;
 
index 8bd739cfd00d97f895ab2be06e3405e272e84992..2b3ccd85075058b86e0fb17e07eb9f3e791abf5e 100644 (file)
@@ -3264,10 +3264,11 @@ nv50_mstm_destroy_connector(struct drm_dp_mst_topology_mgr *mgr,
 
        drm_connector_unregister(&mstc->connector);
 
-       drm_modeset_lock_all(drm->dev);
        drm_fb_helper_remove_one_connector(&drm->fbcon->helper, &mstc->connector);
+
+       drm_modeset_lock(&drm->dev->mode_config.connection_mutex, NULL);
        mstc->port = NULL;
-       drm_modeset_unlock_all(drm->dev);
+       drm_modeset_unlock(&drm->dev->mode_config.connection_mutex);
 
        drm_connector_unreference(&mstc->connector);
 }
@@ -3277,9 +3278,7 @@ nv50_mstm_register_connector(struct drm_connector *connector)
 {
        struct nouveau_drm *drm = nouveau_drm(connector->dev);
 
-       drm_modeset_lock_all(drm->dev);
        drm_fb_helper_add_one_connector(&drm->fbcon->helper, connector);
-       drm_modeset_unlock_all(drm->dev);
 
        drm_connector_register(connector);
 }
index 5e2e65e888476f6bd39e17aa58a3b8e1137caf90..7f3ac6b13b56745540843bf92ff43fb3dab6f34c 100644 (file)
@@ -828,6 +828,12 @@ static void dispc_ovl_set_scale_coef(struct dispc_device *dispc,
        h_coef = dispc_ovl_get_scale_coef(fir_hinc, true);
        v_coef = dispc_ovl_get_scale_coef(fir_vinc, five_taps);
 
+       if (!h_coef || !v_coef) {
+               dev_err(&dispc->pdev->dev, "%s: failed to find scale coefs\n",
+                       __func__);
+               return;
+       }
+
        for (i = 0; i < 8; i++) {
                u32 h, hv;
 
@@ -2342,7 +2348,7 @@ static int dispc_ovl_calc_scaling_24xx(struct dispc_device *dispc,
        }
 
        if (in_width > maxsinglelinewidth) {
-               DSSERR("Cannot scale max input width exceeded");
+               DSSERR("Cannot scale max input width exceeded\n");
                return -EINVAL;
        }
        return 0;
@@ -2424,13 +2430,13 @@ static int dispc_ovl_calc_scaling_34xx(struct dispc_device *dispc,
        }
 
        if (in_width > (maxsinglelinewidth * 2)) {
-               DSSERR("Cannot setup scaling");
-               DSSERR("width exceeds maximum width possible");
+               DSSERR("Cannot setup scaling\n");
+               DSSERR("width exceeds maximum width possible\n");
                return -EINVAL;
        }
 
        if (in_width > maxsinglelinewidth && *five_taps) {
-               DSSERR("cannot setup scaling with five taps");
+               DSSERR("cannot setup scaling with five taps\n");
                return -EINVAL;
        }
        return 0;
@@ -2472,7 +2478,7 @@ static int dispc_ovl_calc_scaling_44xx(struct dispc_device *dispc,
                        in_width > maxsinglelinewidth && ++*decim_x);
 
        if (in_width > maxsinglelinewidth) {
-               DSSERR("Cannot scale width exceeds max line width");
+               DSSERR("Cannot scale width exceeds max line width\n");
                return -EINVAL;
        }
 
@@ -2490,7 +2496,7 @@ static int dispc_ovl_calc_scaling_44xx(struct dispc_device *dispc,
                 * bandwidth. Despite what theory says this appears to
                 * be true also for 16-bit color formats.
                 */
-               DSSERR("Not enough bandwidth, too much downscaling (x-decimation factor %d > 4)", *decim_x);
+               DSSERR("Not enough bandwidth, too much downscaling (x-decimation factor %d > 4)\n", *decim_x);
 
                return -EINVAL;
        }
@@ -4633,7 +4639,7 @@ static int dispc_errata_i734_wa_init(struct dispc_device *dispc)
                                                i734_buf.size, &i734_buf.paddr,
                                                GFP_KERNEL);
        if (!i734_buf.vaddr) {
-               dev_err(&dispc->pdev->dev, "%s: dma_alloc_writecombine failed",
+               dev_err(&dispc->pdev->dev, "%s: dma_alloc_writecombine failed\n",
                        __func__);
                return -ENOMEM;
        }
index 97c88861d67aaf3ca19bf8f60d648095dd4427da..5879f45f6fc9bb5889f80884b7a352fcb6836363 100644 (file)
@@ -679,7 +679,7 @@ static int hdmi_audio_config(struct device *dev,
                             struct omap_dss_audio *dss_audio)
 {
        struct omap_hdmi *hd = dev_get_drvdata(dev);
-       int ret;
+       int ret = 0;
 
        mutex_lock(&hd->lock);
 
index 35ed2add6189c68e405dc5cf0715ad329c5ece9a..813ba42f27539ce94b85afc20295e411f6b4c123 100644 (file)
@@ -922,8 +922,13 @@ int hdmi4_core_init(struct platform_device *pdev, struct hdmi_core_data *core)
 {
        const struct hdmi4_features *features;
        struct resource *res;
+       const struct soc_device_attribute *soc;
 
-       features = soc_device_match(hdmi4_soc_devices)->data;
+       soc = soc_device_match(hdmi4_soc_devices);
+       if (!soc)
+               return -ENODEV;
+
+       features = soc->data;
        core->cts_swmode = features->cts_swmode;
        core->audio_use_mclk = features->audio_use_mclk;
 
index d28da9ac3e900e57b2e6c762af197d9a04d0ea23..ae1a001d1b838ad32c7ae39a04a7103d6d7b99b1 100644 (file)
@@ -671,7 +671,7 @@ static int hdmi_audio_config(struct device *dev,
                             struct omap_dss_audio *dss_audio)
 {
        struct omap_hdmi *hd = dev_get_drvdata(dev);
-       int ret;
+       int ret = 0;
 
        mutex_lock(&hd->lock);
 
index a0d7b1d905e85500cc011d76fefb19453384a7fe..5cde26ac937bd3c88d7b4be1e2355ca196fe0c8d 100644 (file)
@@ -121,6 +121,9 @@ static int omap_connector_get_modes(struct drm_connector *connector)
        if (dssdrv->read_edid) {
                void *edid = kzalloc(MAX_EDID, GFP_KERNEL);
 
+               if (!edid)
+                       return 0;
+
                if ((dssdrv->read_edid(dssdev, edid, MAX_EDID) > 0) &&
                                drm_edid_is_valid(edid)) {
                        drm_mode_connector_update_edid_property(
@@ -139,6 +142,9 @@ static int omap_connector_get_modes(struct drm_connector *connector)
                struct drm_display_mode *mode = drm_mode_create(dev);
                struct videomode vm = {0};
 
+               if (!mode)
+                       return 0;
+
                dssdrv->get_timings(dssdev, &vm);
 
                drm_display_mode_from_videomode(&vm, mode);
@@ -200,6 +206,10 @@ static int omap_connector_mode_valid(struct drm_connector *connector,
        if (!r) {
                /* check if vrefresh is still valid */
                new_mode = drm_mode_duplicate(dev, mode);
+
+               if (!new_mode)
+                       return MODE_BAD;
+
                new_mode->clock = vm.pixelclock / 1000;
                new_mode->vrefresh = 0;
                if (mode->vrefresh == drm_mode_vrefresh(new_mode))
index f9fa1c90b35c6bda4f7a0a2461dc4f3e042cffdc..401c02e9e6b2e2b69e3bcaf35e1e390be09dc0b4 100644 (file)
@@ -401,12 +401,16 @@ int tiler_unpin(struct tiler_block *block)
 struct tiler_block *tiler_reserve_2d(enum tiler_fmt fmt, u16 w,
                u16 h, u16 align)
 {
-       struct tiler_block *block = kzalloc(sizeof(*block), GFP_KERNEL);
+       struct tiler_block *block;
        u32 min_align = 128;
        int ret;
        unsigned long flags;
        u32 slot_bytes;
 
+       block = kzalloc(sizeof(*block), GFP_KERNEL);
+       if (!block)
+               return ERR_PTR(-ENOMEM);
+
        BUG_ON(!validfmt(fmt));
 
        /* convert width/height to slots */
index d7f7bc9f061af0a989ebe7924bd624dea4266786..817be3c418634f6a889a6e8f22652889d299761a 100644 (file)
@@ -90,7 +90,7 @@ static int l2r_t2b(u16 w, u16 h, u16 a, s16 offset,
 {
        int i;
        unsigned long index;
-       bool area_free;
+       bool area_free = false;
        unsigned long slots_per_band = PAGE_SIZE / slot_bytes;
        unsigned long bit_offset = (offset > 0) ? offset / slot_bytes : 0;
        unsigned long curr_bit = bit_offset;
index c0fb52c6d4caaae43753a387f2fa5898019524db..01665b98c57e18cd2bf5c59323e1df9e95702aff 100644 (file)
@@ -179,10 +179,9 @@ qxl_push_command_ring_release(struct qxl_device *qdev, struct qxl_release *relea
                              uint32_t type, bool interruptible)
 {
        struct qxl_command cmd;
-       struct qxl_bo_list *entry = list_first_entry(&release->bos, struct qxl_bo_list, tv.head);
 
        cmd.type = type;
-       cmd.data = qxl_bo_physical_address(qdev, to_qxl_bo(entry->tv.bo), release->release_offset);
+       cmd.data = qxl_bo_physical_address(qdev, release->release_bo, release->release_offset);
 
        return qxl_ring_push(qdev->command_ring, &cmd, interruptible);
 }
@@ -192,10 +191,9 @@ qxl_push_cursor_ring_release(struct qxl_device *qdev, struct qxl_release *releas
                             uint32_t type, bool interruptible)
 {
        struct qxl_command cmd;
-       struct qxl_bo_list *entry = list_first_entry(&release->bos, struct qxl_bo_list, tv.head);
 
        cmd.type = type;
-       cmd.data = qxl_bo_physical_address(qdev, to_qxl_bo(entry->tv.bo), release->release_offset);
+       cmd.data = qxl_bo_physical_address(qdev, release->release_bo, release->release_offset);
 
        return qxl_ring_push(qdev->cursor_ring, &cmd, interruptible);
 }
index 00a1a66b052a5c31fe925bebb4800155013e3897..864b456080c4bf9262121e2e0bbee51a47272a07 100644 (file)
@@ -167,6 +167,7 @@ struct qxl_release {
 
        int id;
        int type;
+       struct qxl_bo *release_bo;
        uint32_t release_offset;
        uint32_t surface_release_id;
        struct ww_acquire_ctx ticket;
index e238a1a2eca1cf47c88248ff3b7419579055a5bc..6cc9f3367fa05581a90280b7cc111259cc2692fa 100644 (file)
@@ -182,9 +182,9 @@ static int qxl_process_single_command(struct qxl_device *qdev,
                goto out_free_reloc;
 
        /* TODO copy slow path code from i915 */
-       fb_cmd = qxl_bo_kmap_atomic_page(qdev, cmd_bo, (release->release_offset & PAGE_SIZE));
+       fb_cmd = qxl_bo_kmap_atomic_page(qdev, cmd_bo, (release->release_offset & PAGE_MASK));
        unwritten = __copy_from_user_inatomic_nocache
-               (fb_cmd + sizeof(union qxl_release_info) + (release->release_offset & ~PAGE_SIZE),
+               (fb_cmd + sizeof(union qxl_release_info) + (release->release_offset & ~PAGE_MASK),
                 u64_to_user_ptr(cmd->command), cmd->command_size);
 
        {
index 5d84a66fed3638144917124a02c68dfd420a2baa..7cb214577275ba76ab0b6045e5863394f1ebb5f9 100644 (file)
@@ -173,6 +173,7 @@ qxl_release_free_list(struct qxl_release *release)
                list_del(&entry->tv.head);
                kfree(entry);
        }
+       release->release_bo = NULL;
 }
 
 void
@@ -296,7 +297,6 @@ int qxl_alloc_surface_release_reserved(struct qxl_device *qdev,
 {
        if (surface_cmd_type == QXL_SURFACE_CMD_DESTROY && create_rel) {
                int idr_ret;
-               struct qxl_bo_list *entry = list_first_entry(&create_rel->bos, struct qxl_bo_list, tv.head);
                struct qxl_bo *bo;
                union qxl_release_info *info;
 
@@ -304,8 +304,9 @@ int qxl_alloc_surface_release_reserved(struct qxl_device *qdev,
                idr_ret = qxl_release_alloc(qdev, QXL_RELEASE_SURFACE_CMD, release);
                if (idr_ret < 0)
                        return idr_ret;
-               bo = to_qxl_bo(entry->tv.bo);
+               bo = create_rel->release_bo;
 
+               (*release)->release_bo = bo;
                (*release)->release_offset = create_rel->release_offset + 64;
 
                qxl_release_list_add(*release, bo);
@@ -365,6 +366,7 @@ int qxl_alloc_release_reserved(struct qxl_device *qdev, unsigned long size,
 
        bo = qxl_bo_ref(qdev->current_release_bo[cur_idx]);
 
+       (*release)->release_bo = bo;
        (*release)->release_offset = qdev->current_release_bo_offset[cur_idx] * release_size_per_bo[cur_idx];
        qdev->current_release_bo_offset[cur_idx]++;
 
@@ -408,13 +410,12 @@ union qxl_release_info *qxl_release_map(struct qxl_device *qdev,
 {
        void *ptr;
        union qxl_release_info *info;
-       struct qxl_bo_list *entry = list_first_entry(&release->bos, struct qxl_bo_list, tv.head);
-       struct qxl_bo *bo = to_qxl_bo(entry->tv.bo);
+       struct qxl_bo *bo = release->release_bo;
 
-       ptr = qxl_bo_kmap_atomic_page(qdev, bo, release->release_offset & PAGE_SIZE);
+       ptr = qxl_bo_kmap_atomic_page(qdev, bo, release->release_offset & PAGE_MASK);
        if (!ptr)
                return NULL;
-       info = ptr + (release->release_offset & ~PAGE_SIZE);
+       info = ptr + (release->release_offset & ~PAGE_MASK);
        return info;
 }
 
@@ -422,11 +423,10 @@ void qxl_release_unmap(struct qxl_device *qdev,
                       struct qxl_release *release,
                       union qxl_release_info *info)
 {
-       struct qxl_bo_list *entry = list_first_entry(&release->bos, struct qxl_bo_list, tv.head);
-       struct qxl_bo *bo = to_qxl_bo(entry->tv.bo);
+       struct qxl_bo *bo = release->release_bo;
        void *ptr;
 
-       ptr = ((void *)info) - (release->release_offset & ~PAGE_SIZE);
+       ptr = ((void *)info) - (release->release_offset & ~PAGE_MASK);
        qxl_bo_kunmap_atomic_page(qdev, bo, ptr);
 }
 
index bffff4c9fbf56f92dc8a3b566c33e266c729fb95..be3f14d7746deee1b0183c113dc653e4c7e3629d 100644 (file)
@@ -94,64 +94,9 @@ static void sun4i_lvds_encoder_disable(struct drm_encoder *encoder)
        }
 }
 
-static enum drm_mode_status sun4i_lvds_encoder_mode_valid(struct drm_encoder *crtc,
-                                                         const struct drm_display_mode *mode)
-{
-       struct sun4i_lvds *lvds = drm_encoder_to_sun4i_lvds(crtc);
-       struct sun4i_tcon *tcon = lvds->tcon;
-       u32 hsync = mode->hsync_end - mode->hsync_start;
-       u32 vsync = mode->vsync_end - mode->vsync_start;
-       unsigned long rate = mode->clock * 1000;
-       long rounded_rate;
-
-       DRM_DEBUG_DRIVER("Validating modes...\n");
-
-       if (hsync < 1)
-               return MODE_HSYNC_NARROW;
-
-       if (hsync > 0x3ff)
-               return MODE_HSYNC_WIDE;
-
-       if ((mode->hdisplay < 1) || (mode->htotal < 1))
-               return MODE_H_ILLEGAL;
-
-       if ((mode->hdisplay > 0x7ff) || (mode->htotal > 0xfff))
-               return MODE_BAD_HVALUE;
-
-       DRM_DEBUG_DRIVER("Horizontal parameters OK\n");
-
-       if (vsync < 1)
-               return MODE_VSYNC_NARROW;
-
-       if (vsync > 0x3ff)
-               return MODE_VSYNC_WIDE;
-
-       if ((mode->vdisplay < 1) || (mode->vtotal < 1))
-               return MODE_V_ILLEGAL;
-
-       if ((mode->vdisplay > 0x7ff) || (mode->vtotal > 0xfff))
-               return MODE_BAD_VVALUE;
-
-       DRM_DEBUG_DRIVER("Vertical parameters OK\n");
-
-       tcon->dclk_min_div = 7;
-       tcon->dclk_max_div = 7;
-       rounded_rate = clk_round_rate(tcon->dclk, rate);
-       if (rounded_rate < rate)
-               return MODE_CLOCK_LOW;
-
-       if (rounded_rate > rate)
-               return MODE_CLOCK_HIGH;
-
-       DRM_DEBUG_DRIVER("Clock rate OK\n");
-
-       return MODE_OK;
-}
-
 static const struct drm_encoder_helper_funcs sun4i_lvds_enc_helper_funcs = {
        .disable        = sun4i_lvds_encoder_disable,
        .enable         = sun4i_lvds_encoder_enable,
-       .mode_valid     = sun4i_lvds_encoder_mode_valid,
 };
 
 static const struct drm_encoder_funcs sun4i_lvds_enc_funcs = {
index f0481b7b60c5e5330d12ecda16f7c08d258bcc2f..06c94e3a5f1521b0759148ede50b85752183e6bc 100644 (file)
@@ -910,7 +910,8 @@ static int ttm_get_pages(struct page **pages, unsigned npages, int flags,
                        while (npages >= HPAGE_PMD_NR) {
                                gfp_t huge_flags = gfp_flags;
 
-                               huge_flags |= GFP_TRANSHUGE;
+                               huge_flags |= GFP_TRANSHUGE_LIGHT | __GFP_NORETRY |
+                                       __GFP_KSWAPD_RECLAIM;
                                huge_flags &= ~__GFP_MOVABLE;
                                huge_flags &= ~__GFP_COMP;
                                p = alloc_pages(huge_flags, HPAGE_PMD_ORDER);
@@ -1027,11 +1028,15 @@ int ttm_page_alloc_init(struct ttm_mem_global *glob, unsigned max_pages)
                                  GFP_USER | GFP_DMA32, "uc dma", 0);
 
        ttm_page_pool_init_locked(&_manager->wc_pool_huge,
-                                 GFP_TRANSHUGE & ~(__GFP_MOVABLE | __GFP_COMP),
+                                 (GFP_TRANSHUGE_LIGHT | __GFP_NORETRY |
+                                  __GFP_KSWAPD_RECLAIM) &
+                                 ~(__GFP_MOVABLE | __GFP_COMP),
                                  "wc huge", order);
 
        ttm_page_pool_init_locked(&_manager->uc_pool_huge,
-                                 GFP_TRANSHUGE & ~(__GFP_MOVABLE | __GFP_COMP)
+                                 (GFP_TRANSHUGE_LIGHT | __GFP_NORETRY |
+                                  __GFP_KSWAPD_RECLAIM) &
+                                 ~(__GFP_MOVABLE | __GFP_COMP)
                                  , "uc huge", order);
 
        _manager->options.max_size = max_pages;
index 8a25d197438509ab2112ae213d32c16a4627ff8e..f63d99c302e44fe2ca5bd9f9ab65e5e89af4c1a8 100644 (file)
@@ -910,7 +910,8 @@ static gfp_t ttm_dma_pool_gfp_flags(struct ttm_dma_tt *ttm_dma, bool huge)
                gfp_flags |= __GFP_ZERO;
 
        if (huge) {
-               gfp_flags |= GFP_TRANSHUGE;
+               gfp_flags |= GFP_TRANSHUGE_LIGHT | __GFP_NORETRY |
+                       __GFP_KSWAPD_RECLAIM;
                gfp_flags &= ~__GFP_MOVABLE;
                gfp_flags &= ~__GFP_COMP;
        }
index 2decc8e2c79f58aad6ea7626a32a0f26665d23eb..add9cc97a3b63baa1194ec82a497180b11d204b5 100644 (file)
@@ -195,6 +195,7 @@ static void vc4_bo_destroy(struct vc4_bo *bo)
        vc4_bo_set_label(obj, -1);
 
        if (bo->validated_shader) {
+               kfree(bo->validated_shader->uniform_addr_offsets);
                kfree(bo->validated_shader->texture_samples);
                kfree(bo->validated_shader);
                bo->validated_shader = NULL;
@@ -591,6 +592,7 @@ void vc4_free_object(struct drm_gem_object *gem_bo)
        }
 
        if (bo->validated_shader) {
+               kfree(bo->validated_shader->uniform_addr_offsets);
                kfree(bo->validated_shader->texture_samples);
                kfree(bo->validated_shader);
                bo->validated_shader = NULL;
index bf4667481935281526def8786619cd0a537c76f7..c61dff594195afe5b4a05d37bb3bd7c6b188b007 100644 (file)
@@ -760,6 +760,7 @@ static irqreturn_t vc4_crtc_irq_handler(int irq, void *data)
 struct vc4_async_flip_state {
        struct drm_crtc *crtc;
        struct drm_framebuffer *fb;
+       struct drm_framebuffer *old_fb;
        struct drm_pending_vblank_event *event;
 
        struct vc4_seqno_cb cb;
@@ -789,6 +790,23 @@ vc4_async_page_flip_complete(struct vc4_seqno_cb *cb)
 
        drm_crtc_vblank_put(crtc);
        drm_framebuffer_put(flip_state->fb);
+
+       /* Decrement the BO usecnt in order to keep the inc/dec calls balanced
+        * when the planes are updated through the async update path.
+        * FIXME: we should move to generic async-page-flip when it's
+        * available, so that we can get rid of this hand-made cleanup_fb()
+        * logic.
+        */
+       if (flip_state->old_fb) {
+               struct drm_gem_cma_object *cma_bo;
+               struct vc4_bo *bo;
+
+               cma_bo = drm_fb_cma_get_gem_obj(flip_state->old_fb, 0);
+               bo = to_vc4_bo(&cma_bo->base);
+               vc4_bo_dec_usecnt(bo);
+               drm_framebuffer_put(flip_state->old_fb);
+       }
+
        kfree(flip_state);
 
        up(&vc4->async_modeset);
@@ -813,9 +831,22 @@ static int vc4_async_page_flip(struct drm_crtc *crtc,
        struct drm_gem_cma_object *cma_bo = drm_fb_cma_get_gem_obj(fb, 0);
        struct vc4_bo *bo = to_vc4_bo(&cma_bo->base);
 
+       /* Increment the BO usecnt here, so that we never end up with an
+        * unbalanced number of vc4_bo_{dec,inc}_usecnt() calls when the
+        * plane is later updated through the non-async path.
+        * FIXME: we should move to generic async-page-flip when it's
+        * available, so that we can get rid of this hand-made prepare_fb()
+        * logic.
+        */
+       ret = vc4_bo_inc_usecnt(bo);
+       if (ret)
+               return ret;
+
        flip_state = kzalloc(sizeof(*flip_state), GFP_KERNEL);
-       if (!flip_state)
+       if (!flip_state) {
+               vc4_bo_dec_usecnt(bo);
                return -ENOMEM;
+       }
 
        drm_framebuffer_get(fb);
        flip_state->fb = fb;
@@ -826,10 +857,23 @@ static int vc4_async_page_flip(struct drm_crtc *crtc,
        ret = down_interruptible(&vc4->async_modeset);
        if (ret) {
                drm_framebuffer_put(fb);
+               vc4_bo_dec_usecnt(bo);
                kfree(flip_state);
                return ret;
        }
 
+       /* Save the current FB before it's replaced by the new one in
+        * drm_atomic_set_fb_for_plane(). We'll need the old FB in
+        * vc4_async_page_flip_complete() to decrement the BO usecnt and keep
+        * it consistent.
+        * FIXME: we should move to generic async-page-flip when it's
+        * available, so that we can get rid of this hand-made cleanup_fb()
+        * logic.
+        */
+       flip_state->old_fb = plane->state->fb;
+       if (flip_state->old_fb)
+               drm_framebuffer_get(flip_state->old_fb);
+
        WARN_ON(drm_crtc_vblank_get(crtc) != 0);
 
        /* Immediately update the plane's legacy fb pointer, so that later
index 72c9dbd81d7f4c61661924072a34f3a39fb3a170..f185812970da7a44aa7b82d6ae88942ae673ec9a 100644 (file)
@@ -96,7 +96,6 @@ struct vc4_dpi {
        struct platform_device *pdev;
 
        struct drm_encoder *encoder;
-       struct drm_connector *connector;
 
        void __iomem *regs;
 
@@ -164,14 +163,31 @@ static void vc4_dpi_encoder_disable(struct drm_encoder *encoder)
 
 static void vc4_dpi_encoder_enable(struct drm_encoder *encoder)
 {
+       struct drm_device *dev = encoder->dev;
        struct drm_display_mode *mode = &encoder->crtc->mode;
        struct vc4_dpi_encoder *vc4_encoder = to_vc4_dpi_encoder(encoder);
        struct vc4_dpi *dpi = vc4_encoder->dpi;
+       struct drm_connector_list_iter conn_iter;
+       struct drm_connector *connector = NULL, *connector_scan;
        u32 dpi_c = DPI_ENABLE | DPI_OUTPUT_ENABLE_MODE;
        int ret;
 
-       if (dpi->connector->display_info.num_bus_formats) {
-               u32 bus_format = dpi->connector->display_info.bus_formats[0];
+       /* Look up the connector attached to DPI so we can get the
+        * bus_format.  Ideally the bridge would tell us the
+        * bus_format we want, but it doesn't yet, so assume that it's
+        * uniform throughout the bridge chain.
+        */
+       drm_connector_list_iter_begin(dev, &conn_iter);
+       drm_for_each_connector_iter(connector_scan, &conn_iter) {
+               if (connector_scan->encoder == encoder) {
+                       connector = connector_scan;
+                       break;
+               }
+       }
+       drm_connector_list_iter_end(&conn_iter);
+
+       if (connector && connector->display_info.num_bus_formats) {
+               u32 bus_format = connector->display_info.bus_formats[0];
 
                switch (bus_format) {
                case MEDIA_BUS_FMT_RGB888_1X24:
@@ -199,6 +215,9 @@ static void vc4_dpi_encoder_enable(struct drm_encoder *encoder)
                        DRM_ERROR("Unknown media bus format %d\n", bus_format);
                        break;
                }
+       } else {
+               /* Default to 24bit if no connector found. */
+               dpi_c |= VC4_SET_FIELD(DPI_FORMAT_24BIT_888_RGB, DPI_FORMAT);
        }
 
        if (mode->flags & DRM_MODE_FLAG_NHSYNC)
index 94b99c90425a488cfaec255b1f2030f9b31242b3..7c95ed5c5cac0a6fc3358b7491f7e4e8e42eb9d2 100644 (file)
@@ -130,6 +130,7 @@ static void vc4_close(struct drm_device *dev, struct drm_file *file)
        struct vc4_file *vc4file = file->driver_priv;
 
        vc4_perfmon_close_file(vc4file);
+       kfree(vc4file);
 }
 
 static const struct vm_operations_struct vc4_vm_ops = {
index ce39390be389305bdd10a2641234785f465308f0..13dcaad06798d233b620872872c8ab50ee3c001f 100644 (file)
@@ -503,7 +503,7 @@ static int vc4_plane_mode_set(struct drm_plane *plane,
         * the scl fields here.
         */
        if (num_planes == 1) {
-               scl0 = vc4_get_scl_field(state, 1);
+               scl0 = vc4_get_scl_field(state, 0);
                scl1 = scl0;
        } else {
                scl0 = vc4_get_scl_field(state, 1);
index d3f15bf609008f869f11bb132a1a9f0b2f457739..7cf82b071de2904d2169e67d9cde40df00ce5900 100644 (file)
@@ -942,6 +942,7 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
 fail:
        kfree(validation_state.branch_targets);
        if (validated_shader) {
+               kfree(validated_shader->uniform_addr_offsets);
                kfree(validated_shader->texture_samples);
                kfree(validated_shader);
        }
index 48e4f1df6e5d933bb5e4204a8866139a70de7f70..020070d483d350a58695c7fa9f21f7c2be4db463 100644 (file)
@@ -293,7 +293,7 @@ static int virtio_gpu_queue_ctrl_buffer_locked(struct virtio_gpu_device *vgdev,
        ret = virtqueue_add_sgs(vq, sgs, outcnt, incnt, vbuf, GFP_ATOMIC);
        if (ret == -ENOSPC) {
                spin_unlock(&vgdev->ctrlq.qlock);
-               wait_event(vgdev->ctrlq.ack_queue, vq->num_free);
+               wait_event(vgdev->ctrlq.ack_queue, vq->num_free >= outcnt + incnt);
                spin_lock(&vgdev->ctrlq.qlock);
                goto retry;
        } else {
@@ -368,7 +368,7 @@ static int virtio_gpu_queue_cursor(struct virtio_gpu_device *vgdev,
        ret = virtqueue_add_sgs(vq, sgs, outcnt, 0, vbuf, GFP_ATOMIC);
        if (ret == -ENOSPC) {
                spin_unlock(&vgdev->cursorq.qlock);
-               wait_event(vgdev->cursorq.ack_queue, vq->num_free);
+               wait_event(vgdev->cursorq.ack_queue, vq->num_free >= outcnt);
                spin_lock(&vgdev->cursorq.qlock);
                goto retry;
        } else {
index 2582ffd36bb57b00317616cc65d71f2ebd4ecb6c..ba0cdb743c3e50d664848c021a0f63bc72953aa2 100644 (file)
@@ -441,11 +441,11 @@ static int vmwgfx_set_config_internal(struct drm_mode_set *set)
        struct drm_crtc *crtc = set->crtc;
        struct drm_framebuffer *fb;
        struct drm_crtc *tmp;
-       struct drm_modeset_acquire_ctx *ctx;
        struct drm_device *dev = set->crtc->dev;
+       struct drm_modeset_acquire_ctx ctx;
        int ret;
 
-       ctx = dev->mode_config.acquire_ctx;
+       drm_modeset_acquire_init(&ctx, 0);
 
 restart:
        /*
@@ -458,7 +458,7 @@ static int vmwgfx_set_config_internal(struct drm_mode_set *set)
 
        fb = set->fb;
 
-       ret = crtc->funcs->set_config(set, ctx);
+       ret = crtc->funcs->set_config(set, &ctx);
        if (ret == 0) {
                crtc->primary->crtc = crtc;
                crtc->primary->fb = fb;
@@ -473,20 +473,13 @@ static int vmwgfx_set_config_internal(struct drm_mode_set *set)
        }
 
        if (ret == -EDEADLK) {
-               dev->mode_config.acquire_ctx = NULL;
-
-retry_locking:
-               drm_modeset_backoff(ctx);
-
-               ret = drm_modeset_lock_all_ctx(dev, ctx);
-               if (ret)
-                       goto retry_locking;
-
-               dev->mode_config.acquire_ctx = ctx;
-
+               drm_modeset_backoff(&ctx);
                goto restart;
        }
 
+       drm_modeset_drop_locks(&ctx);
+       drm_modeset_acquire_fini(&ctx);
+
        return ret;
 }
 
@@ -624,7 +617,6 @@ static int vmw_fb_set_par(struct fb_info *info)
        }
 
        mutex_lock(&par->bo_mutex);
-       drm_modeset_lock_all(vmw_priv->dev);
        ret = vmw_fb_kms_framebuffer(info);
        if (ret)
                goto out_unlock;
@@ -657,7 +649,6 @@ static int vmw_fb_set_par(struct fb_info *info)
                drm_mode_destroy(vmw_priv->dev, old_mode);
        par->set_mode = mode;
 
-       drm_modeset_unlock_all(vmw_priv->dev);
        mutex_unlock(&par->bo_mutex);
 
        return ret;
@@ -713,18 +704,14 @@ int vmw_fb_init(struct vmw_private *vmw_priv)
        par->max_width = fb_width;
        par->max_height = fb_height;
 
-       drm_modeset_lock_all(vmw_priv->dev);
        ret = vmw_kms_fbdev_init_data(vmw_priv, 0, par->max_width,
                                      par->max_height, &par->con,
                                      &par->crtc, &init_mode);
-       if (ret) {
-               drm_modeset_unlock_all(vmw_priv->dev);
+       if (ret)
                goto err_kms;
-       }
 
        info->var.xres = init_mode->hdisplay;
        info->var.yres = init_mode->vdisplay;
-       drm_modeset_unlock_all(vmw_priv->dev);
 
        /*
         * Create buffers and alloc memory
@@ -832,7 +819,9 @@ int vmw_fb_close(struct vmw_private *vmw_priv)
        cancel_delayed_work_sync(&par->local_work);
        unregister_framebuffer(info);
 
+       mutex_lock(&par->bo_mutex);
        (void) vmw_fb_kms_detach(par, true, true);
+       mutex_unlock(&par->bo_mutex);
 
        vfree(par->vmalloc);
        framebuffer_release(info);
index f11601b6fd747cf37db69c841e3450615af406d8..96fd7a03d2f8beac5301040adb64274422ca037b 100644 (file)
@@ -2595,6 +2595,7 @@ void vmw_kms_helper_resource_finish(struct vmw_validation_ctx *ctx,
                vmw_kms_helper_buffer_finish(res->dev_priv, NULL, ctx->buf,
                                             out_fence, NULL);
 
+       vmw_dmabuf_unreference(&ctx->buf);
        vmw_resource_unreserve(res, false, NULL, 0);
        mutex_unlock(&res->dev_priv->cmdbuf_mutex);
 }
@@ -2680,7 +2681,9 @@ int vmw_kms_fbdev_init_data(struct vmw_private *dev_priv,
        struct vmw_display_unit *du;
        struct drm_display_mode *mode;
        int i = 0;
+       int ret = 0;
 
+       mutex_lock(&dev_priv->dev->mode_config.mutex);
        list_for_each_entry(con, &dev_priv->dev->mode_config.connector_list,
                            head) {
                if (i == unit)
@@ -2691,7 +2694,8 @@ int vmw_kms_fbdev_init_data(struct vmw_private *dev_priv,
 
        if (i != unit) {
                DRM_ERROR("Could not find initial display unit.\n");
-               return -EINVAL;
+               ret = -EINVAL;
+               goto out_unlock;
        }
 
        if (list_empty(&con->modes))
@@ -2699,7 +2703,8 @@ int vmw_kms_fbdev_init_data(struct vmw_private *dev_priv,
 
        if (list_empty(&con->modes)) {
                DRM_ERROR("Could not find initial display mode.\n");
-               return -EINVAL;
+               ret = -EINVAL;
+               goto out_unlock;
        }
 
        du = vmw_connector_to_du(con);
@@ -2720,7 +2725,10 @@ int vmw_kms_fbdev_init_data(struct vmw_private *dev_priv,
                                           head);
        }
 
-       return 0;
+ out_unlock:
+       mutex_unlock(&dev_priv->dev->mode_config.mutex);
+
+       return ret;
 }
 
 /**
index 648f8127f65ae099baf3492031d30cd2c7280a64..3d667e903beb7bd76d13d8048a26a1763e835709 100644 (file)
@@ -482,6 +482,8 @@ vmw_sou_primary_plane_prepare_fb(struct drm_plane *plane,
                return ret;
        }
 
+       vps->dmabuf_size = size;
+
        /*
         * TTM already thinks the buffer is pinned, but make sure the
         * pin_count is upped.
index 60252fd796f6a86513b577f7b05cb3123ddeec81..0000434a1fbd253264d63b638d6841ef4476f2b6 100644 (file)
@@ -462,10 +462,11 @@ config HID_LENOVO
        select NEW_LEDS
        select LEDS_CLASS
        ---help---
-       Support for Lenovo devices that are not fully compliant with HID standard.
+       Support for IBM/Lenovo devices that are not fully compliant with HID standard.
 
-       Say Y if you want support for the non-compliant features of the Lenovo
-       Thinkpad standalone keyboards, e.g:
+       Say Y if you want support for horizontal scrolling of the IBM/Lenovo
+       Scrollpoint mice or the non-compliant features of the Lenovo Thinkpad
+       standalone keyboards, e.g:
        - ThinkPad USB Keyboard with TrackPoint (supports extra LEDs and trackpoint
          configuration)
        - ThinkPad Compact Bluetooth Keyboard with TrackPoint (supports Fn keys)
index 0b5cc910f62e560758f50bb03585ac27942ef199..46f5ecd11bf73b9d76b3614d8feb114440d4f65a 100644 (file)
 #define USB_VENDOR_ID_HUION            0x256c
 #define USB_DEVICE_ID_HUION_TABLET     0x006e
 
+#define USB_VENDOR_ID_IBM                                      0x04b3
+#define USB_DEVICE_ID_IBM_SCROLLPOINT_III                      0x3100
+#define USB_DEVICE_ID_IBM_SCROLLPOINT_PRO                      0x3103
+#define USB_DEVICE_ID_IBM_SCROLLPOINT_OPTICAL                  0x3105
+#define USB_DEVICE_ID_IBM_SCROLLPOINT_800DPI_OPTICAL           0x3108
+#define USB_DEVICE_ID_IBM_SCROLLPOINT_800DPI_OPTICAL_PRO       0x3109
+
 #define USB_VENDOR_ID_IDEACOM          0x1cb6
 #define USB_DEVICE_ID_IDEACOM_IDC6650  0x6650
 #define USB_DEVICE_ID_IDEACOM_IDC6651  0x6651
 #define USB_DEVICE_ID_LENOVO_TPKBD     0x6009
 #define USB_DEVICE_ID_LENOVO_CUSBKBD   0x6047
 #define USB_DEVICE_ID_LENOVO_CBTKBD    0x6048
+#define USB_DEVICE_ID_LENOVO_SCROLLPOINT_OPTICAL       0x6049
 #define USB_DEVICE_ID_LENOVO_TPPRODOCK 0x6067
 #define USB_DEVICE_ID_LENOVO_X1_COVER  0x6085
 #define USB_DEVICE_ID_LENOVO_X1_TAB    0x60a3
 #define USB_DEVICE_ID_SIS817_TOUCH     0x0817
 #define USB_DEVICE_ID_SIS_TS           0x1013
 #define USB_DEVICE_ID_SIS1030_TOUCH    0x1030
+#define USB_DEVICE_ID_SIS10FB_TOUCH    0x10fb
 
 #define USB_VENDOR_ID_SKYCABLE                 0x1223
 #define        USB_DEVICE_ID_SKYCABLE_WIRELESS_PRESENTER       0x3F07
index 1ac4ff4d57a659fc89c6a2bf36b83ba2d679fdcc..643b6eb54442ed4bc297e182ad1b7c77a25e82c0 100644 (file)
@@ -6,6 +6,17 @@
  *
  *  Copyright (c) 2012 Bernhard Seibold
  *  Copyright (c) 2014 Jamie Lentin <jm@lentin.co.uk>
+ *
+ * Linux IBM/Lenovo Scrollpoint mouse driver:
+ * - IBM Scrollpoint III
+ * - IBM Scrollpoint Pro
+ * - IBM Scrollpoint Optical
+ * - IBM Scrollpoint Optical 800dpi
+ * - IBM Scrollpoint Optical 800dpi Pro
+ * - Lenovo Scrollpoint Optical
+ *
+ *  Copyright (c) 2012 Peter De Wachter <pdewacht@gmail.com>
+ *  Copyright (c) 2018 Peter Ganzhorn <peter.ganzhorn@gmail.com>
  */
 
 /*
@@ -160,6 +171,17 @@ static int lenovo_input_mapping_cptkbd(struct hid_device *hdev,
        return 0;
 }
 
+static int lenovo_input_mapping_scrollpoint(struct hid_device *hdev,
+               struct hid_input *hi, struct hid_field *field,
+               struct hid_usage *usage, unsigned long **bit, int *max)
+{
+       if (usage->hid == HID_GD_Z) {
+               hid_map_usage(hi, usage, bit, max, EV_REL, REL_HWHEEL);
+               return 1;
+       }
+       return 0;
+}
+
 static int lenovo_input_mapping(struct hid_device *hdev,
                struct hid_input *hi, struct hid_field *field,
                struct hid_usage *usage, unsigned long **bit, int *max)
@@ -172,6 +194,14 @@ static int lenovo_input_mapping(struct hid_device *hdev,
        case USB_DEVICE_ID_LENOVO_CBTKBD:
                return lenovo_input_mapping_cptkbd(hdev, hi, field,
                                                        usage, bit, max);
+       case USB_DEVICE_ID_IBM_SCROLLPOINT_III:
+       case USB_DEVICE_ID_IBM_SCROLLPOINT_PRO:
+       case USB_DEVICE_ID_IBM_SCROLLPOINT_OPTICAL:
+       case USB_DEVICE_ID_IBM_SCROLLPOINT_800DPI_OPTICAL:
+       case USB_DEVICE_ID_IBM_SCROLLPOINT_800DPI_OPTICAL_PRO:
+       case USB_DEVICE_ID_LENOVO_SCROLLPOINT_OPTICAL:
+               return lenovo_input_mapping_scrollpoint(hdev, hi, field,
+                                                       usage, bit, max);
        default:
                return 0;
        }
@@ -883,6 +913,12 @@ static const struct hid_device_id lenovo_devices[] = {
        { HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_CUSBKBD) },
        { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_CBTKBD) },
        { HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_TPPRODOCK) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_IBM, USB_DEVICE_ID_IBM_SCROLLPOINT_III) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_IBM, USB_DEVICE_ID_IBM_SCROLLPOINT_PRO) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_IBM, USB_DEVICE_ID_IBM_SCROLLPOINT_OPTICAL) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_IBM, USB_DEVICE_ID_IBM_SCROLLPOINT_800DPI_OPTICAL) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_IBM, USB_DEVICE_ID_IBM_SCROLLPOINT_800DPI_OPTICAL_PRO) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_SCROLLPOINT_OPTICAL) },
        { }
 };
 
index 963328674e93af929fde1e20a3dc3deff3d75569..cc33622253aa5d4051a1df57fd815e0c5840185c 100644 (file)
@@ -174,6 +174,8 @@ static const struct i2c_hid_quirks {
                I2C_HID_QUIRK_NO_IRQ_AFTER_RESET },
        { I2C_VENDOR_ID_RAYD, I2C_PRODUCT_ID_RAYD_3118,
                I2C_HID_QUIRK_RESEND_REPORT_DESCR },
+       { USB_VENDOR_ID_SIS_TOUCH, USB_DEVICE_ID_SIS10FB_TOUCH,
+               I2C_HID_QUIRK_RESEND_REPORT_DESCR },
        { 0, 0 }
 };
 
index 157b44aacdffb60203cb154680f9d02a94115fdc..acc2536c80948522f66873f6e61da1499502066a 100644 (file)
@@ -77,21 +77,21 @@ static void process_recv(struct ishtp_cl *hid_ishtp_cl, void *recv_buf,
        struct ishtp_cl_data *client_data = hid_ishtp_cl->client_data;
        int curr_hid_dev = client_data->cur_hid_dev;
 
-       if (data_len < sizeof(struct hostif_msg_hdr)) {
-               dev_err(&client_data->cl_device->dev,
-                       "[hid-ish]: error, received %u which is less than data header %u\n",
-                       (unsigned int)data_len,
-                       (unsigned int)sizeof(struct hostif_msg_hdr));
-               ++client_data->bad_recv_cnt;
-               ish_hw_reset(hid_ishtp_cl->dev);
-               return;
-       }
-
        payload = recv_buf + sizeof(struct hostif_msg_hdr);
        total_len = data_len;
        cur_pos = 0;
 
        do {
+               if (cur_pos + sizeof(struct hostif_msg) > total_len) {
+                       dev_err(&client_data->cl_device->dev,
+                               "[hid-ish]: error, received %u which is less than data header %u\n",
+                               (unsigned int)data_len,
+                               (unsigned int)sizeof(struct hostif_msg_hdr));
+                       ++client_data->bad_recv_cnt;
+                       ish_hw_reset(hid_ishtp_cl->dev);
+                       break;
+               }
+
                recv_msg = (struct hostif_msg *)(recv_buf + cur_pos);
                payload_len = recv_msg->hdr.size;
 
@@ -412,9 +412,7 @@ void hid_ishtp_get_report(struct hid_device *hid, int report_id,
 {
        struct ishtp_hid_data *hid_data =  hid->driver_data;
        struct ishtp_cl_data *client_data = hid_data->client_data;
-       static unsigned char    buf[10];
-       unsigned int    len;
-       struct hostif_msg_to_sensor *msg = (struct hostif_msg_to_sensor *)buf;
+       struct hostif_msg_to_sensor msg = {};
        int     rv;
        int     i;
 
@@ -426,14 +424,11 @@ void hid_ishtp_get_report(struct hid_device *hid, int report_id,
                return;
        }
 
-       len = sizeof(struct hostif_msg_to_sensor);
-
-       memset(msg, 0, sizeof(struct hostif_msg_to_sensor));
-       msg->hdr.command = (report_type == HID_FEATURE_REPORT) ?
+       msg.hdr.command = (report_type == HID_FEATURE_REPORT) ?
                HOSTIF_GET_FEATURE_REPORT : HOSTIF_GET_INPUT_REPORT;
        for (i = 0; i < client_data->num_hid_devices; ++i) {
                if (hid == client_data->hid_sensor_hubs[i]) {
-                       msg->hdr.device_id =
+                       msg.hdr.device_id =
                                client_data->hid_devices[i].dev_id;
                        break;
                }
@@ -442,8 +437,9 @@ void hid_ishtp_get_report(struct hid_device *hid, int report_id,
        if (i == client_data->num_hid_devices)
                return;
 
-       msg->report_id = report_id;
-       rv = ishtp_cl_send(client_data->hid_ishtp_cl, buf, len);
+       msg.report_id = report_id;
+       rv = ishtp_cl_send(client_data->hid_ishtp_cl, (uint8_t *)&msg,
+                           sizeof(msg));
        if (rv)
                hid_ishtp_trace(client_data,  "%s hid %p send failed\n",
                                __func__, hid);
index f272cdd9bd558311c27a82729c5eb314cade2a1f..2623a567ffba5ae51e90653e47bea42127ea9b02 100644 (file)
@@ -418,7 +418,7 @@ static struct ishtp_cl_device *ishtp_bus_add_device(struct ishtp_device *dev,
                list_del(&device->device_link);
                spin_unlock_irqrestore(&dev->device_list_lock, flags);
                dev_err(dev->devc, "Failed to register ISHTP client device\n");
-               kfree(device);
+               put_device(&device->dev);
                return NULL;
        }
 
index b54ef1ffcbec329b99520365a63bf347d16a66d2..ee7a37eb159acf41fcb6c40ac685c7f659672b29 100644 (file)
@@ -1213,8 +1213,10 @@ static int __wacom_devm_sysfs_create_group(struct wacom *wacom,
        devres->root = root;
 
        error = sysfs_create_group(devres->root, group);
-       if (error)
+       if (error) {
+               devres_free(devres);
                return error;
+       }
 
        devres_add(&wacom->hdev->dev, devres);
 
index f249a442845804d8f22cdab29ec7e4a708874566..6ec307c93ecef937dbd47b1865df009b028c8d49 100644 (file)
@@ -272,7 +272,7 @@ config SENSORS_K8TEMP
 
 config SENSORS_K10TEMP
        tristate "AMD Family 10h+ temperature sensor"
-       depends on X86 && PCI
+       depends on X86 && PCI && AMD_NB
        help
          If you say yes here you get support for the temperature
          sensor(s) inside your CPU. Supported are later revisions of
index 051a72eecb2455794d51becf66ea66348e6f17c9..3b73dee6fdc68ba6aa2619b083fb4c2dffbf063a 100644 (file)
@@ -23,6 +23,7 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/pci.h>
+#include <asm/amd_nb.h>
 #include <asm/processor.h>
 
 MODULE_DESCRIPTION("AMD Family 10h+ CPU core temperature monitor");
@@ -40,6 +41,10 @@ static DEFINE_MUTEX(nb_smu_ind_mutex);
 #define PCI_DEVICE_ID_AMD_17H_DF_F3    0x1463
 #endif
 
+#ifndef PCI_DEVICE_ID_AMD_17H_M10H_DF_F3
+#define PCI_DEVICE_ID_AMD_17H_M10H_DF_F3       0x15eb
+#endif
+
 /* CPUID function 0x80000001, ebx */
 #define CPUID_PKGTYPE_MASK     0xf0000000
 #define CPUID_PKGTYPE_F                0x00000000
@@ -59,10 +64,12 @@ static DEFINE_MUTEX(nb_smu_ind_mutex);
 #define  NB_CAP_HTC                    0x00000400
 
 /*
- * For F15h M60h, functionality of REG_REPORTED_TEMPERATURE
- * has been moved to D0F0xBC_xD820_0CA4 [Reported Temperature
- * Control]
+ * For F15h M60h and M70h, REG_HARDWARE_THERMAL_CONTROL
+ * and REG_REPORTED_TEMPERATURE have been moved to
+ * D0F0xBC_xD820_0C64 [Hardware Temperature Control]
+ * D0F0xBC_xD820_0CA4 [Reported Temperature Control]
  */
+#define F15H_M60H_HARDWARE_TEMP_CTRL_OFFSET    0xd8200c64
 #define F15H_M60H_REPORTED_TEMP_CTRL_OFFSET    0xd8200ca4
 
 /* F17h M01h Access througn SMN */
@@ -70,8 +77,10 @@ static DEFINE_MUTEX(nb_smu_ind_mutex);
 
 struct k10temp_data {
        struct pci_dev *pdev;
+       void (*read_htcreg)(struct pci_dev *pdev, u32 *regval);
        void (*read_tempreg)(struct pci_dev *pdev, u32 *regval);
        int temp_offset;
+       u32 temp_adjust_mask;
 };
 
 struct tctl_offset {
@@ -84,6 +93,7 @@ static const struct tctl_offset tctl_offset_table[] = {
        { 0x17, "AMD Ryzen 5 1600X", 20000 },
        { 0x17, "AMD Ryzen 7 1700X", 20000 },
        { 0x17, "AMD Ryzen 7 1800X", 20000 },
+       { 0x17, "AMD Ryzen 7 2700X", 10000 },
        { 0x17, "AMD Ryzen Threadripper 1950X", 27000 },
        { 0x17, "AMD Ryzen Threadripper 1920X", 27000 },
        { 0x17, "AMD Ryzen Threadripper 1900X", 27000 },
@@ -92,6 +102,11 @@ static const struct tctl_offset tctl_offset_table[] = {
        { 0x17, "AMD Ryzen Threadripper 1910", 10000 },
 };
 
+static void read_htcreg_pci(struct pci_dev *pdev, u32 *regval)
+{
+       pci_read_config_dword(pdev, REG_HARDWARE_THERMAL_CONTROL, regval);
+}
+
 static void read_tempreg_pci(struct pci_dev *pdev, u32 *regval)
 {
        pci_read_config_dword(pdev, REG_REPORTED_TEMPERATURE, regval);
@@ -108,6 +123,12 @@ static void amd_nb_index_read(struct pci_dev *pdev, unsigned int devfn,
        mutex_unlock(&nb_smu_ind_mutex);
 }
 
+static void read_htcreg_nb_f15(struct pci_dev *pdev, u32 *regval)
+{
+       amd_nb_index_read(pdev, PCI_DEVFN(0, 0), 0xb8,
+                         F15H_M60H_HARDWARE_TEMP_CTRL_OFFSET, regval);
+}
+
 static void read_tempreg_nb_f15(struct pci_dev *pdev, u32 *regval)
 {
        amd_nb_index_read(pdev, PCI_DEVFN(0, 0), 0xb8,
@@ -116,8 +137,8 @@ static void read_tempreg_nb_f15(struct pci_dev *pdev, u32 *regval)
 
 static void read_tempreg_nb_f17(struct pci_dev *pdev, u32 *regval)
 {
-       amd_nb_index_read(pdev, PCI_DEVFN(0, 0), 0x60,
-                         F17H_M01H_REPORTED_TEMP_CTRL_OFFSET, regval);
+       amd_smn_read(amd_pci_dev_to_node_id(pdev),
+                    F17H_M01H_REPORTED_TEMP_CTRL_OFFSET, regval);
 }
 
 static ssize_t temp1_input_show(struct device *dev,
@@ -129,6 +150,8 @@ static ssize_t temp1_input_show(struct device *dev,
 
        data->read_tempreg(data->pdev, &regval);
        temp = (regval >> 21) * 125;
+       if (regval & data->temp_adjust_mask)
+               temp -= 49000;
        if (temp > data->temp_offset)
                temp -= data->temp_offset;
        else
@@ -152,8 +175,7 @@ static ssize_t show_temp_crit(struct device *dev,
        u32 regval;
        int value;
 
-       pci_read_config_dword(data->pdev,
-                             REG_HARDWARE_THERMAL_CONTROL, &regval);
+       data->read_htcreg(data->pdev, &regval);
        value = ((regval >> 16) & 0x7f) * 500 + 52000;
        if (show_hyst)
                value -= ((regval >> 24) & 0xf) * 500;
@@ -173,13 +195,18 @@ static umode_t k10temp_is_visible(struct kobject *kobj,
        struct pci_dev *pdev = data->pdev;
 
        if (index >= 2) {
-               u32 reg_caps, reg_htc;
+               u32 reg;
+
+               if (!data->read_htcreg)
+                       return 0;
 
                pci_read_config_dword(pdev, REG_NORTHBRIDGE_CAPABILITIES,
-                                     &reg_caps);
-               pci_read_config_dword(pdev, REG_HARDWARE_THERMAL_CONTROL,
-                                     &reg_htc);
-               if (!(reg_caps & NB_CAP_HTC) || !(reg_htc & HTC_ENABLE))
+                                     &reg);
+               if (!(reg & NB_CAP_HTC))
+                       return 0;
+
+               data->read_htcreg(data->pdev, &reg);
+               if (!(reg & HTC_ENABLE))
                        return 0;
        }
        return attr->mode;
@@ -259,12 +286,16 @@ static int k10temp_probe(struct pci_dev *pdev,
        data->pdev = pdev;
 
        if (boot_cpu_data.x86 == 0x15 && (boot_cpu_data.x86_model == 0x60 ||
-                                         boot_cpu_data.x86_model == 0x70))
+                                         boot_cpu_data.x86_model == 0x70)) {
+               data->read_htcreg = read_htcreg_nb_f15;
                data->read_tempreg = read_tempreg_nb_f15;
-       else if (boot_cpu_data.x86 == 0x17)
+       } else if (boot_cpu_data.x86 == 0x17) {
+               data->temp_adjust_mask = 0x80000;
                data->read_tempreg = read_tempreg_nb_f17;
-       else
+       } else {
+               data->read_htcreg = read_htcreg_pci;
                data->read_tempreg = read_tempreg_pci;
+       }
 
        for (i = 0; i < ARRAY_SIZE(tctl_offset_table); i++) {
                const struct tctl_offset *entry = &tctl_offset_table[i];
@@ -292,6 +323,7 @@ static const struct pci_device_id k10temp_id_table[] = {
        { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_16H_NB_F3) },
        { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F3) },
        { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_17H_DF_F3) },
+       { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_17H_M10H_DF_F3) },
        {}
 };
 MODULE_DEVICE_TABLE(pci, k10temp_id_table);
index 8b0bc4fc06e8ccccdd94029bcb541b436d473469..b0bc77bf2cd9b3a92f6c186add19aa34b5a6cc7d 100644 (file)
@@ -1380,8 +1380,8 @@ static int __init nct6683_find(int sioaddr, struct nct6683_sio_data *sio_data)
        /* Activate logical device if needed */
        val = superio_inb(sioaddr, SIO_REG_ENABLE);
        if (!(val & 0x01)) {
-               pr_err("EC is disabled\n");
-               goto fail;
+               pr_warn("Forcibly enabling EC access. Data may be unusable.\n");
+               superio_outb(sioaddr, SIO_REG_ENABLE, val | 0x01);
        }
 
        superio_exit(sioaddr);
index 363bf56eb0f29c89965d48317282653c6598f417..91976b6ca30002b985ef6594f82fcf10aabf91db 100644 (file)
@@ -170,7 +170,10 @@ static int scmi_hwmon_probe(struct scmi_device *sdev)
        scmi_chip_info.info = ptr_scmi_ci;
        chip_info = &scmi_chip_info;
 
-       for (type = 0; type < hwmon_max && nr_count[type]; type++) {
+       for (type = 0; type < hwmon_max; type++) {
+               if (!nr_count[type])
+                       continue;
+
                scmi_hwmon_add_chan_info(scmi_hwmon_chan, dev, nr_count[type],
                                         type, hwmon_attributes[type]);
                *ptr_scmi_ci++ = scmi_hwmon_chan++;
index c4865b08d7fb9e3b194dba2830b837fe6ab17819..8d21b9825d71764dc950a47dec4a3bc3dc102e25 100644 (file)
@@ -707,7 +707,6 @@ config I2C_MPC
 config I2C_MT65XX
        tristate "MediaTek I2C adapter"
        depends on ARCH_MEDIATEK || COMPILE_TEST
-       depends on HAS_DMA
        help
          This selects the MediaTek(R) Integrated Inter Circuit bus driver
          for MT65xx and MT81xx.
@@ -885,7 +884,6 @@ config I2C_SH7760
 
 config I2C_SH_MOBILE
        tristate "SuperH Mobile I2C Controller"
-       depends on HAS_DMA
        depends on ARCH_SHMOBILE || ARCH_RENESAS || COMPILE_TEST
        help
          If you say yes to this option, support will be included for the
@@ -1098,7 +1096,6 @@ config I2C_XLP9XX
 
 config I2C_RCAR
        tristate "Renesas R-Car I2C Controller"
-       depends on HAS_DMA
        depends on ARCH_RENESAS || COMPILE_TEST
        select I2C_SLAVE
        help
index fd36c39ddf4e86efd4ae8f12bd9f4ab9df4da860..0cdba29ae0a9ad25212f21a56acf76f46c1b5213 100644 (file)
@@ -209,7 +209,10 @@ static void i2c_dw_xfer_init(struct dw_i2c_dev *dev)
        i2c_dw_disable_int(dev);
 
        /* Enable the adapter */
-       __i2c_dw_enable_and_wait(dev, true);
+       __i2c_dw_enable(dev, true);
+
+       /* Dummy read to avoid the register getting stuck on Bay Trail */
+       dw_readl(dev, DW_IC_ENABLE_STATUS);
 
        /* Clear and enable interrupts */
        dw_readl(dev, DW_IC_CLR_INTR);
index 2aa0e83174c52895a0fb1416e8a17a00d31b24c6..dae8ac618a5221fdd886afab417f88945af7f143 100644 (file)
@@ -564,10 +564,10 @@ static int pmcmsptwi_master_xfer(struct i2c_adapter *adap,
                 * TODO: We could potentially loop and retry in the case
                 * of MSP_TWI_XFER_TIMEOUT.
                 */
-               return -1;
+               return -EIO;
        }
 
-       return 0;
+       return num;
 }
 
 static u32 pmcmsptwi_i2c_func(struct i2c_adapter *adapter)
index 25fcc3c1e32bf3d9a41fa345982039fb234dbcbd..4053259bccb8d704d9d386287086841690174844 100644 (file)
@@ -86,6 +86,7 @@ struct sprd_i2c {
        u32 count;
        int irq;
        int err;
+       bool is_suspended;
 };
 
 static void sprd_i2c_set_count(struct sprd_i2c *i2c_dev, u32 count)
@@ -283,6 +284,9 @@ static int sprd_i2c_master_xfer(struct i2c_adapter *i2c_adap,
        struct sprd_i2c *i2c_dev = i2c_adap->algo_data;
        int im, ret;
 
+       if (i2c_dev->is_suspended)
+               return -EBUSY;
+
        ret = pm_runtime_get_sync(i2c_dev->dev);
        if (ret < 0)
                return ret;
@@ -364,13 +368,12 @@ static irqreturn_t sprd_i2c_isr_thread(int irq, void *dev_id)
        struct sprd_i2c *i2c_dev = dev_id;
        struct i2c_msg *msg = i2c_dev->msg;
        bool ack = !(readl(i2c_dev->base + I2C_STATUS) & I2C_RX_ACK);
-       u32 i2c_count = readl(i2c_dev->base + I2C_COUNT);
        u32 i2c_tran;
 
        if (msg->flags & I2C_M_RD)
                i2c_tran = i2c_dev->count >= I2C_FIFO_FULL_THLD;
        else
-               i2c_tran = i2c_count;
+               i2c_tran = i2c_dev->count;
 
        /*
         * If we got one ACK from slave when writing data, and we did not
@@ -408,14 +411,13 @@ static irqreturn_t sprd_i2c_isr(int irq, void *dev_id)
 {
        struct sprd_i2c *i2c_dev = dev_id;
        struct i2c_msg *msg = i2c_dev->msg;
-       u32 i2c_count = readl(i2c_dev->base + I2C_COUNT);
        bool ack = !(readl(i2c_dev->base + I2C_STATUS) & I2C_RX_ACK);
        u32 i2c_tran;
 
        if (msg->flags & I2C_M_RD)
                i2c_tran = i2c_dev->count >= I2C_FIFO_FULL_THLD;
        else
-               i2c_tran = i2c_count;
+               i2c_tran = i2c_dev->count;
 
        /*
         * If we did not get one ACK from slave when writing data, then we
@@ -586,11 +588,23 @@ static int sprd_i2c_remove(struct platform_device *pdev)
 
 static int __maybe_unused sprd_i2c_suspend_noirq(struct device *pdev)
 {
+       struct sprd_i2c *i2c_dev = dev_get_drvdata(pdev);
+
+       i2c_lock_adapter(&i2c_dev->adap);
+       i2c_dev->is_suspended = true;
+       i2c_unlock_adapter(&i2c_dev->adap);
+
        return pm_runtime_force_suspend(pdev);
 }
 
 static int __maybe_unused sprd_i2c_resume_noirq(struct device *pdev)
 {
+       struct sprd_i2c *i2c_dev = dev_get_drvdata(pdev);
+
+       i2c_lock_adapter(&i2c_dev->adap);
+       i2c_dev->is_suspended = false;
+       i2c_unlock_adapter(&i2c_dev->adap);
+
        return pm_runtime_force_resume(pdev);
 }
 
index e4be86b3de9a28b201bcd197470ab3a3a78ee683..7235c7302bb7cd000db814ec12de03193a22ef01 100644 (file)
@@ -337,7 +337,7 @@ static int vprbrd_i2c_xfer(struct i2c_adapter *i2c, struct i2c_msg *msgs,
                }
                mutex_unlock(&vb->lock);
        }
-       return 0;
+       return num;
 error:
        mutex_unlock(&vb->lock);
        return error;
index a9126b3cda61bc95f6a9d1282821ab7552484534..7c3b4740b94b644509ae3658fa2eda4885086bba 100644 (file)
@@ -445,10 +445,17 @@ static int acpi_gsb_i2c_read_bytes(struct i2c_client *client,
        msgs[1].buf = buffer;
 
        ret = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs));
-       if (ret < 0)
-               dev_err(&client->adapter->dev, "i2c read failed\n");
-       else
+       if (ret < 0) {
+               /* Getting a NACK is unfortunately normal with some DSTDs */
+               if (ret == -EREMOTEIO)
+                       dev_dbg(&client->adapter->dev, "i2c read %d bytes from client@%#x starting at reg %#x failed, error: %d\n",
+                               data_len, client->addr, cmd, ret);
+               else
+                       dev_err(&client->adapter->dev, "i2c read %d bytes from client@%#x starting at reg %#x failed, error: %d\n",
+                               data_len, client->addr, cmd, ret);
+       } else {
                memcpy(data, buffer, data_len);
+       }
 
        kfree(buffer);
        return ret;
index 036a03f0d0a6866001d0badd11cb9b30516ca5de..1667b6e7674f4a0439befcd544d74753e41d43f3 100644 (file)
@@ -280,7 +280,7 @@ static noinline int i2cdev_ioctl_rdwr(struct i2c_client *client,
                 */
                if (msgs[i].flags & I2C_M_RECV_LEN) {
                        if (!(msgs[i].flags & I2C_M_RD) ||
-                           msgs[i].buf[0] < 1 ||
+                           msgs[i].len < 1 || msgs[i].buf[0] < 1 ||
                            msgs[i].len < msgs[i].buf[0] +
                                             I2C_SMBUS_BLOCK_MAX) {
                                res = -EINVAL;
index ee270e065ba999ea75360474751e72884d2de0bf..2a972ed6851b2265ae89ee44f621e7ee4ba174da 100644 (file)
@@ -61,9 +61,12 @@ config INFINIBAND_ON_DEMAND_PAGING
          pages on demand instead.
 
 config INFINIBAND_ADDR_TRANS
-       bool
+       bool "RDMA/CM"
        depends on INFINIBAND
        default y
+       ---help---
+         Support for RDMA communication manager (CM).
+         This allows for a generic connection abstraction over RDMA.
 
 config INFINIBAND_ADDR_TRANS_CONFIGFS
        bool
index e337b08de2ffb3cd73b2ea335c6ff0e09375d50a..fb2d347f760f14c18e033f889732d58340dd8660 100644 (file)
@@ -291,14 +291,18 @@ static int find_gid(struct ib_gid_table *table, const union ib_gid *gid,
                 * so lookup free slot only if requested.
                 */
                if (pempty && empty < 0) {
-                       if (data->props & GID_TABLE_ENTRY_INVALID) {
-                               /* Found an invalid (free) entry; allocate it */
-                               if (data->props & GID_TABLE_ENTRY_DEFAULT) {
-                                       if (default_gid)
-                                               empty = curr_index;
-                               } else {
-                                       empty = curr_index;
-                               }
+                       if (data->props & GID_TABLE_ENTRY_INVALID &&
+                           (default_gid ==
+                            !!(data->props & GID_TABLE_ENTRY_DEFAULT))) {
+                               /*
+                                * Found an invalid (free) entry; allocate it.
+                                * If default GID is requested, then our
+                                * found slot must be one of the DEFAULT
+                                * reserved slots or we fail.
+                                * This ensures that only DEFAULT reserved
+                                * slots are used for default property GIDs.
+                                */
+                               empty = curr_index;
                        }
                }
 
@@ -420,8 +424,10 @@ int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
        return ret;
 }
 
-int ib_cache_gid_del(struct ib_device *ib_dev, u8 port,
-                    union ib_gid *gid, struct ib_gid_attr *attr)
+static int
+_ib_cache_gid_del(struct ib_device *ib_dev, u8 port,
+                 union ib_gid *gid, struct ib_gid_attr *attr,
+                 unsigned long mask, bool default_gid)
 {
        struct ib_gid_table *table;
        int ret = 0;
@@ -431,11 +437,7 @@ int ib_cache_gid_del(struct ib_device *ib_dev, u8 port,
 
        mutex_lock(&table->lock);
 
-       ix = find_gid(table, gid, attr, false,
-                     GID_ATTR_FIND_MASK_GID      |
-                     GID_ATTR_FIND_MASK_GID_TYPE |
-                     GID_ATTR_FIND_MASK_NETDEV,
-                     NULL);
+       ix = find_gid(table, gid, attr, default_gid, mask, NULL);
        if (ix < 0) {
                ret = -EINVAL;
                goto out_unlock;
@@ -452,6 +454,17 @@ int ib_cache_gid_del(struct ib_device *ib_dev, u8 port,
        return ret;
 }
 
+int ib_cache_gid_del(struct ib_device *ib_dev, u8 port,
+                    union ib_gid *gid, struct ib_gid_attr *attr)
+{
+       unsigned long mask = GID_ATTR_FIND_MASK_GID       |
+                            GID_ATTR_FIND_MASK_GID_TYPE |
+                            GID_ATTR_FIND_MASK_DEFAULT  |
+                            GID_ATTR_FIND_MASK_NETDEV;
+
+       return _ib_cache_gid_del(ib_dev, port, gid, attr, mask, false);
+}
+
 int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
                                     struct net_device *ndev)
 {
@@ -728,7 +741,7 @@ void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
                                  unsigned long gid_type_mask,
                                  enum ib_cache_gid_default_mode mode)
 {
-       union ib_gid gid;
+       union ib_gid gid = { };
        struct ib_gid_attr gid_attr;
        struct ib_gid_table *table;
        unsigned int gid_type;
@@ -736,7 +749,9 @@ void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
 
        table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
 
-       make_default_gid(ndev, &gid);
+       mask = GID_ATTR_FIND_MASK_GID_TYPE |
+              GID_ATTR_FIND_MASK_DEFAULT |
+              GID_ATTR_FIND_MASK_NETDEV;
        memset(&gid_attr, 0, sizeof(gid_attr));
        gid_attr.ndev = ndev;
 
@@ -747,12 +762,12 @@ void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
                gid_attr.gid_type = gid_type;
 
                if (mode == IB_CACHE_GID_DEFAULT_MODE_SET) {
-                       mask = GID_ATTR_FIND_MASK_GID_TYPE |
-                              GID_ATTR_FIND_MASK_DEFAULT;
+                       make_default_gid(ndev, &gid);
                        __ib_cache_gid_add(ib_dev, port, &gid,
                                           &gid_attr, mask, true);
                } else if (mode == IB_CACHE_GID_DEFAULT_MODE_DELETE) {
-                       ib_cache_gid_del(ib_dev, port, &gid, &gid_attr);
+                       _ib_cache_gid_del(ib_dev, port, &gid,
+                                         &gid_attr, mask, true);
                }
        }
 }
index 51a641002e103cb289f20c9481bcc96ff7a36972..a693fcd4c513ada0428115b24d90bb947fb08f1c 100644 (file)
@@ -382,6 +382,8 @@ struct cma_hdr {
 #define CMA_VERSION 0x00
 
 struct cma_req_info {
+       struct sockaddr_storage listen_addr_storage;
+       struct sockaddr_storage src_addr_storage;
        struct ib_device *device;
        int port;
        union ib_gid local_gid;
@@ -866,7 +868,6 @@ static int cma_modify_qp_rtr(struct rdma_id_private *id_priv,
 {
        struct ib_qp_attr qp_attr;
        int qp_attr_mask, ret;
-       union ib_gid sgid;
 
        mutex_lock(&id_priv->qp_mutex);
        if (!id_priv->id.qp) {
@@ -889,12 +890,6 @@ static int cma_modify_qp_rtr(struct rdma_id_private *id_priv,
        if (ret)
                goto out;
 
-       ret = ib_query_gid(id_priv->id.device, id_priv->id.port_num,
-                          rdma_ah_read_grh(&qp_attr.ah_attr)->sgid_index,
-                          &sgid, NULL);
-       if (ret)
-               goto out;
-
        BUG_ON(id_priv->cma_dev->device != id_priv->id.device);
 
        if (conn_param)
@@ -1340,11 +1335,11 @@ static bool validate_net_dev(struct net_device *net_dev,
 }
 
 static struct net_device *cma_get_net_dev(struct ib_cm_event *ib_event,
-                                         const struct cma_req_info *req)
+                                         struct cma_req_info *req)
 {
-       struct sockaddr_storage listen_addr_storage, src_addr_storage;
-       struct sockaddr *listen_addr = (struct sockaddr *)&listen_addr_storage,
-                       *src_addr = (struct sockaddr *)&src_addr_storage;
+       struct sockaddr *listen_addr =
+                       (struct sockaddr *)&req->listen_addr_storage;
+       struct sockaddr *src_addr = (struct sockaddr *)&req->src_addr_storage;
        struct net_device *net_dev;
        const union ib_gid *gid = req->has_gid ? &req->local_gid : NULL;
        int err;
@@ -1359,11 +1354,6 @@ static struct net_device *cma_get_net_dev(struct ib_cm_event *ib_event,
        if (!net_dev)
                return ERR_PTR(-ENODEV);
 
-       if (!validate_net_dev(net_dev, listen_addr, src_addr)) {
-               dev_put(net_dev);
-               return ERR_PTR(-EHOSTUNREACH);
-       }
-
        return net_dev;
 }
 
@@ -1490,15 +1480,51 @@ static struct rdma_id_private *cma_id_from_event(struct ib_cm_id *cm_id,
                }
        }
 
+       /*
+        * Net namespace might be getting deleted while route lookup,
+        * cm_id lookup is in progress. Therefore, perform netdevice
+        * validation, cm_id lookup under rcu lock.
+        * RCU lock along with netdevice state check, synchronizes with
+        * netdevice migrating to different net namespace and also avoids
+        * case where net namespace doesn't get deleted while lookup is in
+        * progress.
+        * If the device state is not IFF_UP, its properties such as ifindex
+        * and nd_net cannot be trusted to remain valid without rcu lock.
+        * net/core/dev.c change_net_namespace() ensures to synchronize with
+        * ongoing operations on net device after device is closed using
+        * synchronize_net().
+        */
+       rcu_read_lock();
+       if (*net_dev) {
+               /*
+                * If netdevice is down, it is likely that it is administratively
+                * down or it might be migrating to different namespace.
+                * In that case avoid further processing, as the net namespace
+                * or ifindex may change.
+                */
+               if (((*net_dev)->flags & IFF_UP) == 0) {
+                       id_priv = ERR_PTR(-EHOSTUNREACH);
+                       goto err;
+               }
+
+               if (!validate_net_dev(*net_dev,
+                                (struct sockaddr *)&req.listen_addr_storage,
+                                (struct sockaddr *)&req.src_addr_storage)) {
+                       id_priv = ERR_PTR(-EHOSTUNREACH);
+                       goto err;
+               }
+       }
+
        bind_list = cma_ps_find(*net_dev ? dev_net(*net_dev) : &init_net,
                                rdma_ps_from_service_id(req.service_id),
                                cma_port_from_service_id(req.service_id));
        id_priv = cma_find_listener(bind_list, cm_id, ib_event, &req, *net_dev);
+err:
+       rcu_read_unlock();
        if (IS_ERR(id_priv) && *net_dev) {
                dev_put(*net_dev);
                *net_dev = NULL;
        }
-
        return id_priv;
 }
 
index 9821ae900f6d5bf2fac8f684089968b4c19acd52..da12da1c36f60836fdb287920e7d7e9e582fc2a2 100644 (file)
@@ -114,7 +114,7 @@ int iwpm_create_mapinfo(struct sockaddr_storage *local_sockaddr,
                        struct sockaddr_storage *mapped_sockaddr,
                        u8 nl_client)
 {
-       struct hlist_head *hash_bucket_head;
+       struct hlist_head *hash_bucket_head = NULL;
        struct iwpm_mapping_info *map_info;
        unsigned long flags;
        int ret = -EINVAL;
@@ -142,6 +142,9 @@ int iwpm_create_mapinfo(struct sockaddr_storage *local_sockaddr,
                }
        }
        spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags);
+
+       if (!hash_bucket_head)
+               kfree(map_info);
        return ret;
 }
 
index c50596f7f98a52f2cdc6afacac3d98d9a7d348fb..b28452a55a08fc66b72bcc39bb7186286d108915 100644 (file)
@@ -59,7 +59,7 @@ module_param_named(recv_queue_size, mad_recvq_size, int, 0444);
 MODULE_PARM_DESC(recv_queue_size, "Size of receive queue in number of work requests");
 
 static struct list_head ib_mad_port_list;
-static u32 ib_mad_client_id = 0;
+static atomic_t ib_mad_client_id = ATOMIC_INIT(0);
 
 /* Port list lock */
 static DEFINE_SPINLOCK(ib_mad_port_list_lock);
@@ -377,7 +377,7 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
        }
 
        spin_lock_irqsave(&port_priv->reg_lock, flags);
-       mad_agent_priv->agent.hi_tid = ++ib_mad_client_id;
+       mad_agent_priv->agent.hi_tid = atomic_inc_return(&ib_mad_client_id);
 
        /*
         * Make sure MAD registration (if supplied)
index cc2966380c0cab02f1f7bc1d0fc33c4d56c0b60b..c0e4fd55e2ccb77104b7a62cdde18440ab9c0f47 100644 (file)
@@ -255,6 +255,7 @@ static void bond_delete_netdev_default_gids(struct ib_device *ib_dev,
                                            struct net_device *rdma_ndev)
 {
        struct net_device *real_dev = rdma_vlan_dev_real_dev(event_ndev);
+       unsigned long gid_type_mask;
 
        if (!rdma_ndev)
                return;
@@ -264,21 +265,22 @@ static void bond_delete_netdev_default_gids(struct ib_device *ib_dev,
 
        rcu_read_lock();
 
-       if (rdma_is_upper_dev_rcu(rdma_ndev, event_ndev) &&
-           is_eth_active_slave_of_bonding_rcu(rdma_ndev, real_dev) ==
-           BONDING_SLAVE_STATE_INACTIVE) {
-               unsigned long gid_type_mask;
-
+       if (((rdma_ndev != event_ndev &&
+             !rdma_is_upper_dev_rcu(rdma_ndev, event_ndev)) ||
+            is_eth_active_slave_of_bonding_rcu(rdma_ndev, real_dev)
+                                                ==
+            BONDING_SLAVE_STATE_INACTIVE)) {
                rcu_read_unlock();
+               return;
+       }
 
-               gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
+       rcu_read_unlock();
 
-               ib_cache_gid_set_default_gid(ib_dev, port, rdma_ndev,
-                                            gid_type_mask,
-                                            IB_CACHE_GID_DEFAULT_MODE_DELETE);
-       } else {
-               rcu_read_unlock();
-       }
+       gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
+
+       ib_cache_gid_set_default_gid(ib_dev, port, rdma_ndev,
+                                    gid_type_mask,
+                                    IB_CACHE_GID_DEFAULT_MODE_DELETE);
 }
 
 static void enum_netdev_ipv4_ips(struct ib_device *ib_dev,
index 74329483af6d424d97970719d1b1c1733737b334..eab43b17e9cf24f6f2bd1152dbc3cb2f188757a3 100644 (file)
@@ -159,6 +159,23 @@ static void ucma_put_ctx(struct ucma_context *ctx)
                complete(&ctx->comp);
 }
 
+/*
+ * Same as ucm_get_ctx but requires that ->cm_id->device is valid, eg that the
+ * CM_ID is bound.
+ */
+static struct ucma_context *ucma_get_ctx_dev(struct ucma_file *file, int id)
+{
+       struct ucma_context *ctx = ucma_get_ctx(file, id);
+
+       if (IS_ERR(ctx))
+               return ctx;
+       if (!ctx->cm_id->device) {
+               ucma_put_ctx(ctx);
+               return ERR_PTR(-EINVAL);
+       }
+       return ctx;
+}
+
 static void ucma_close_event_id(struct work_struct *work)
 {
        struct ucma_event *uevent_close =  container_of(work, struct ucma_event, close_work);
@@ -683,7 +700,7 @@ static ssize_t ucma_resolve_ip(struct ucma_file *file,
        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
                return -EFAULT;
 
-       if (!rdma_addr_size_in6(&cmd.src_addr) ||
+       if ((cmd.src_addr.sin6_family && !rdma_addr_size_in6(&cmd.src_addr)) ||
            !rdma_addr_size_in6(&cmd.dst_addr))
                return -EINVAL;
 
@@ -734,7 +751,7 @@ static ssize_t ucma_resolve_route(struct ucma_file *file,
        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
                return -EFAULT;
 
-       ctx = ucma_get_ctx(file, cmd.id);
+       ctx = ucma_get_ctx_dev(file, cmd.id);
        if (IS_ERR(ctx))
                return PTR_ERR(ctx);
 
@@ -1050,7 +1067,7 @@ static ssize_t ucma_connect(struct ucma_file *file, const char __user *inbuf,
        if (!cmd.conn_param.valid)
                return -EINVAL;
 
-       ctx = ucma_get_ctx(file, cmd.id);
+       ctx = ucma_get_ctx_dev(file, cmd.id);
        if (IS_ERR(ctx))
                return PTR_ERR(ctx);
 
@@ -1092,7 +1109,7 @@ static ssize_t ucma_accept(struct ucma_file *file, const char __user *inbuf,
        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
                return -EFAULT;
 
-       ctx = ucma_get_ctx(file, cmd.id);
+       ctx = ucma_get_ctx_dev(file, cmd.id);
        if (IS_ERR(ctx))
                return PTR_ERR(ctx);
 
@@ -1120,7 +1137,7 @@ static ssize_t ucma_reject(struct ucma_file *file, const char __user *inbuf,
        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
                return -EFAULT;
 
-       ctx = ucma_get_ctx(file, cmd.id);
+       ctx = ucma_get_ctx_dev(file, cmd.id);
        if (IS_ERR(ctx))
                return PTR_ERR(ctx);
 
@@ -1139,7 +1156,7 @@ static ssize_t ucma_disconnect(struct ucma_file *file, const char __user *inbuf,
        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
                return -EFAULT;
 
-       ctx = ucma_get_ctx(file, cmd.id);
+       ctx = ucma_get_ctx_dev(file, cmd.id);
        if (IS_ERR(ctx))
                return PTR_ERR(ctx);
 
@@ -1167,15 +1184,10 @@ static ssize_t ucma_init_qp_attr(struct ucma_file *file,
        if (cmd.qp_state > IB_QPS_ERR)
                return -EINVAL;
 
-       ctx = ucma_get_ctx(file, cmd.id);
+       ctx = ucma_get_ctx_dev(file, cmd.id);
        if (IS_ERR(ctx))
                return PTR_ERR(ctx);
 
-       if (!ctx->cm_id->device) {
-               ret = -EINVAL;
-               goto out;
-       }
-
        resp.qp_attr_mask = 0;
        memset(&qp_attr, 0, sizeof qp_attr);
        qp_attr.qp_state = cmd.qp_state;
@@ -1316,13 +1328,13 @@ static ssize_t ucma_set_option(struct ucma_file *file, const char __user *inbuf,
        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
                return -EFAULT;
 
+       if (unlikely(cmd.optlen > KMALLOC_MAX_SIZE))
+               return -EINVAL;
+
        ctx = ucma_get_ctx(file, cmd.id);
        if (IS_ERR(ctx))
                return PTR_ERR(ctx);
 
-       if (unlikely(cmd.optlen > KMALLOC_MAX_SIZE))
-               return -EINVAL;
-
        optval = memdup_user(u64_to_user_ptr(cmd.optval),
                             cmd.optlen);
        if (IS_ERR(optval)) {
@@ -1384,7 +1396,7 @@ static ssize_t ucma_process_join(struct ucma_file *file,
        else
                return -EINVAL;
 
-       ctx = ucma_get_ctx(file, cmd->id);
+       ctx = ucma_get_ctx_dev(file, cmd->id);
        if (IS_ERR(ctx))
                return PTR_ERR(ctx);
 
index 13cb5e4deb8664568cf535183be3c85f56238953..21a887c9523bc3a08fb7992a958cdfc4e4d9b3ad 100644 (file)
@@ -691,6 +691,7 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
 
        mr->device  = pd->device;
        mr->pd      = pd;
+       mr->dm      = NULL;
        mr->uobject = uobj;
        atomic_inc(&pd->usecnt);
        mr->res.type = RDMA_RESTRACK_MR;
@@ -765,6 +766,11 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
 
        mr = uobj->object;
 
+       if (mr->dm) {
+               ret = -EINVAL;
+               goto put_uobjs;
+       }
+
        if (cmd.flags & IB_MR_REREG_ACCESS) {
                ret = ib_check_mr_access(cmd.access_flags);
                if (ret)
index 8c93970dc8f15e99c85e9a6de788d153749f4a4f..8d32c4ae368cf2b5f5f201d4a67128e0299fb43a 100644 (file)
@@ -234,6 +234,15 @@ static int uverbs_validate_kernel_mandatory(const struct uverbs_method_spec *met
                        return -EINVAL;
        }
 
+       for (; i < method_spec->num_buckets; i++) {
+               struct uverbs_attr_spec_hash *attr_spec_bucket =
+                       method_spec->attr_buckets[i];
+
+               if (!bitmap_empty(attr_spec_bucket->mandatory_attrs_bitmask,
+                                 attr_spec_bucket->num_attrs))
+                       return -EINVAL;
+       }
+
        return 0;
 }
 
index cbcec3da12f685054c93d102e3e67556f5a71045..b4f016dfa23dbbcf2bdc83e77cdb93674112f2d0 100644 (file)
@@ -363,28 +363,28 @@ static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY)(struct ib_device
 
 static const struct uverbs_attr_spec uverbs_flow_action_esp_keymat[] = {
        [IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM] = {
-               .ptr = {
+               .ptr = {
                        .type = UVERBS_ATTR_TYPE_PTR_IN,
                        UVERBS_ATTR_TYPE(struct ib_uverbs_flow_action_esp_keymat_aes_gcm),
                        .flags = UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO,
-               },
+               } },
        },
 };
 
 static const struct uverbs_attr_spec uverbs_flow_action_esp_replay[] = {
        [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_NONE] = {
-               .ptr = {
+               .ptr = {
                        .type = UVERBS_ATTR_TYPE_PTR_IN,
                        /* No need to specify any data */
                        .len = 0,
-               }
+               } }
        },
        [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_BMP] = {
-               .ptr = {
+               .ptr = {
                        .type = UVERBS_ATTR_TYPE_PTR_IN,
                        UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_replay_bmp, size),
                        .flags = UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO,
-               }
+               } }
        },
 };
 
index 7eff3aeffe01f37e8b0f2a6494911ca1e362eb6b..6ddfb1fade79abda9344820674d7ab46650f4cd4 100644 (file)
@@ -1656,6 +1656,7 @@ struct ib_mr *ib_alloc_mr(struct ib_pd *pd,
        if (!IS_ERR(mr)) {
                mr->device  = pd->device;
                mr->pd      = pd;
+               mr->dm      = NULL;
                mr->uobject = NULL;
                atomic_inc(&pd->usecnt);
                mr->need_inval = false;
index 6f2b26126c64a4503b6a3bf8b8c3991b65b65012..2be2e1ac1b5f162a6513d1e1bf94a684e618eb41 100644 (file)
@@ -315,7 +315,7 @@ static void advance_oldest_read(struct t4_wq *wq)
  * Deal with out-of-order and/or completions that complete
  * prior unsignalled WRs.
  */
-void c4iw_flush_hw_cq(struct c4iw_cq *chp)
+void c4iw_flush_hw_cq(struct c4iw_cq *chp, struct c4iw_qp *flush_qhp)
 {
        struct t4_cqe *hw_cqe, *swcqe, read_cqe;
        struct c4iw_qp *qhp;
@@ -339,6 +339,13 @@ void c4iw_flush_hw_cq(struct c4iw_cq *chp)
                if (qhp == NULL)
                        goto next_cqe;
 
+               if (flush_qhp != qhp) {
+                       spin_lock(&qhp->lock);
+
+                       if (qhp->wq.flushed == 1)
+                               goto next_cqe;
+               }
+
                if (CQE_OPCODE(hw_cqe) == FW_RI_TERMINATE)
                        goto next_cqe;
 
@@ -390,6 +397,8 @@ void c4iw_flush_hw_cq(struct c4iw_cq *chp)
 next_cqe:
                t4_hwcq_consume(&chp->cq);
                ret = t4_next_hw_cqe(&chp->cq, &hw_cqe);
+               if (qhp && flush_qhp != qhp)
+                       spin_unlock(&qhp->lock);
        }
 }
 
index feeb8ee6f4a2b6c4ece7580dc182e5ae5377256d..44161ca4d2a86d6dd3cdb96404877ef23cb4cb38 100644 (file)
@@ -875,6 +875,11 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
 
        rdev->status_page->db_off = 0;
 
+       init_completion(&rdev->rqt_compl);
+       init_completion(&rdev->pbl_compl);
+       kref_init(&rdev->rqt_kref);
+       kref_init(&rdev->pbl_kref);
+
        return 0;
 err_free_status_page_and_wr_log:
        if (c4iw_wr_log && rdev->wr_log)
@@ -893,13 +898,15 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
 
 static void c4iw_rdev_close(struct c4iw_rdev *rdev)
 {
-       destroy_workqueue(rdev->free_workq);
        kfree(rdev->wr_log);
        c4iw_release_dev_ucontext(rdev, &rdev->uctx);
        free_page((unsigned long)rdev->status_page);
        c4iw_pblpool_destroy(rdev);
        c4iw_rqtpool_destroy(rdev);
+       wait_for_completion(&rdev->pbl_compl);
+       wait_for_completion(&rdev->rqt_compl);
        c4iw_ocqp_pool_destroy(rdev);
+       destroy_workqueue(rdev->free_workq);
        c4iw_destroy_resource(&rdev->resource);
 }
 
index cc929002c05eb3ee244f95b1a33329fa72ab139a..8310277171211a363667c2f12b5d3010b9745f5a 100644 (file)
@@ -185,6 +185,10 @@ struct c4iw_rdev {
        struct wr_log_entry *wr_log;
        int wr_log_size;
        struct workqueue_struct *free_workq;
+       struct completion rqt_compl;
+       struct completion pbl_compl;
+       struct kref rqt_kref;
+       struct kref pbl_kref;
 };
 
 static inline int c4iw_fatal_error(struct c4iw_rdev *rdev)
@@ -1049,7 +1053,7 @@ u32 c4iw_pblpool_alloc(struct c4iw_rdev *rdev, int size);
 void c4iw_pblpool_free(struct c4iw_rdev *rdev, u32 addr, int size);
 u32 c4iw_ocqp_pool_alloc(struct c4iw_rdev *rdev, int size);
 void c4iw_ocqp_pool_free(struct c4iw_rdev *rdev, u32 addr, int size);
-void c4iw_flush_hw_cq(struct c4iw_cq *chp);
+void c4iw_flush_hw_cq(struct c4iw_cq *chp, struct c4iw_qp *flush_qhp);
 void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count);
 int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp);
 int c4iw_flush_rq(struct t4_wq *wq, struct t4_cq *cq, int count);
index de77b6027d695da106004fdee247bfc8f1479e84..ae167b68660862d170d481890f7d02ca48d1061b 100644 (file)
@@ -1343,12 +1343,12 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp,
        qhp->wq.flushed = 1;
        t4_set_wq_in_error(&qhp->wq);
 
-       c4iw_flush_hw_cq(rchp);
+       c4iw_flush_hw_cq(rchp, qhp);
        c4iw_count_rcqes(&rchp->cq, &qhp->wq, &count);
        rq_flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count);
 
        if (schp != rchp)
-               c4iw_flush_hw_cq(schp);
+               c4iw_flush_hw_cq(schp, qhp);
        sq_flushed = c4iw_flush_sq(qhp);
 
        spin_unlock(&qhp->lock);
index 3cf25997ed2b58dbda6f64a600206ed7929de30e..0ef25ae05e6fee35e4a7baf3f48181ee37f6deb5 100644 (file)
@@ -260,12 +260,22 @@ u32 c4iw_pblpool_alloc(struct c4iw_rdev *rdev, int size)
                rdev->stats.pbl.cur += roundup(size, 1 << MIN_PBL_SHIFT);
                if (rdev->stats.pbl.cur > rdev->stats.pbl.max)
                        rdev->stats.pbl.max = rdev->stats.pbl.cur;
+               kref_get(&rdev->pbl_kref);
        } else
                rdev->stats.pbl.fail++;
        mutex_unlock(&rdev->stats.lock);
        return (u32)addr;
 }
 
+static void destroy_pblpool(struct kref *kref)
+{
+       struct c4iw_rdev *rdev;
+
+       rdev = container_of(kref, struct c4iw_rdev, pbl_kref);
+       gen_pool_destroy(rdev->pbl_pool);
+       complete(&rdev->pbl_compl);
+}
+
 void c4iw_pblpool_free(struct c4iw_rdev *rdev, u32 addr, int size)
 {
        pr_debug("addr 0x%x size %d\n", addr, size);
@@ -273,6 +283,7 @@ void c4iw_pblpool_free(struct c4iw_rdev *rdev, u32 addr, int size)
        rdev->stats.pbl.cur -= roundup(size, 1 << MIN_PBL_SHIFT);
        mutex_unlock(&rdev->stats.lock);
        gen_pool_free(rdev->pbl_pool, (unsigned long)addr, size);
+       kref_put(&rdev->pbl_kref, destroy_pblpool);
 }
 
 int c4iw_pblpool_create(struct c4iw_rdev *rdev)
@@ -310,7 +321,7 @@ int c4iw_pblpool_create(struct c4iw_rdev *rdev)
 
 void c4iw_pblpool_destroy(struct c4iw_rdev *rdev)
 {
-       gen_pool_destroy(rdev->pbl_pool);
+       kref_put(&rdev->pbl_kref, destroy_pblpool);
 }
 
 /*
@@ -331,12 +342,22 @@ u32 c4iw_rqtpool_alloc(struct c4iw_rdev *rdev, int size)
                rdev->stats.rqt.cur += roundup(size << 6, 1 << MIN_RQT_SHIFT);
                if (rdev->stats.rqt.cur > rdev->stats.rqt.max)
                        rdev->stats.rqt.max = rdev->stats.rqt.cur;
+               kref_get(&rdev->rqt_kref);
        } else
                rdev->stats.rqt.fail++;
        mutex_unlock(&rdev->stats.lock);
        return (u32)addr;
 }
 
+static void destroy_rqtpool(struct kref *kref)
+{
+       struct c4iw_rdev *rdev;
+
+       rdev = container_of(kref, struct c4iw_rdev, rqt_kref);
+       gen_pool_destroy(rdev->rqt_pool);
+       complete(&rdev->rqt_compl);
+}
+
 void c4iw_rqtpool_free(struct c4iw_rdev *rdev, u32 addr, int size)
 {
        pr_debug("addr 0x%x size %d\n", addr, size << 6);
@@ -344,6 +365,7 @@ void c4iw_rqtpool_free(struct c4iw_rdev *rdev, u32 addr, int size)
        rdev->stats.rqt.cur -= roundup(size << 6, 1 << MIN_RQT_SHIFT);
        mutex_unlock(&rdev->stats.lock);
        gen_pool_free(rdev->rqt_pool, (unsigned long)addr, size << 6);
+       kref_put(&rdev->rqt_kref, destroy_rqtpool);
 }
 
 int c4iw_rqtpool_create(struct c4iw_rdev *rdev)
@@ -380,7 +402,7 @@ int c4iw_rqtpool_create(struct c4iw_rdev *rdev)
 
 void c4iw_rqtpool_destroy(struct c4iw_rdev *rdev)
 {
-       gen_pool_destroy(rdev->rqt_pool);
+       kref_put(&rdev->rqt_kref, destroy_rqtpool);
 }
 
 /*
index a97055dd4fbdeeefcd9be4b39deebb5939e958eb..b5fab55cc275068166369c184a759ab9a925849d 100644 (file)
@@ -412,7 +412,6 @@ static void hfi1_cleanup_sdma_notifier(struct hfi1_msix_entry *msix)
 static int get_irq_affinity(struct hfi1_devdata *dd,
                            struct hfi1_msix_entry *msix)
 {
-       int ret;
        cpumask_var_t diff;
        struct hfi1_affinity_node *entry;
        struct cpu_mask_set *set = NULL;
@@ -424,10 +423,6 @@ static int get_irq_affinity(struct hfi1_devdata *dd,
        extra[0] = '\0';
        cpumask_clear(&msix->mask);
 
-       ret = zalloc_cpumask_var(&diff, GFP_KERNEL);
-       if (!ret)
-               return -ENOMEM;
-
        entry = node_affinity_lookup(dd->node);
 
        switch (msix->type) {
@@ -458,6 +453,9 @@ static int get_irq_affinity(struct hfi1_devdata *dd,
         * finds its CPU here.
         */
        if (cpu == -1 && set) {
+               if (!zalloc_cpumask_var(&diff, GFP_KERNEL))
+                       return -ENOMEM;
+
                if (cpumask_equal(&set->mask, &set->used)) {
                        /*
                         * We've used up all the CPUs, bump up the generation
@@ -469,6 +467,8 @@ static int get_irq_affinity(struct hfi1_devdata *dd,
                cpumask_andnot(diff, &set->mask, &set->used);
                cpu = cpumask_first(diff);
                cpumask_set_cpu(cpu, &set->used);
+
+               free_cpumask_var(diff);
        }
 
        cpumask_set_cpu(cpu, &msix->mask);
@@ -482,7 +482,6 @@ static int get_irq_affinity(struct hfi1_devdata *dd,
                hfi1_setup_sdma_notifier(msix);
        }
 
-       free_cpumask_var(diff);
        return 0;
 }
 
index 46d1475b2154fc22485ad1eb0052b1feb3cf54b3..bd837a048bf49602c5659b32932f583f71f8b2d4 100644 (file)
@@ -433,31 +433,43 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
                               bool do_cnp)
 {
        struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
+       struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
        struct ib_other_headers *ohdr = pkt->ohdr;
        struct ib_grh *grh = pkt->grh;
        u32 rqpn = 0, bth1;
-       u16 pkey, rlid, dlid = ib_get_dlid(pkt->hdr);
+       u16 pkey;
+       u32 rlid, slid, dlid = 0;
        u8 hdr_type, sc, svc_type;
        bool is_mcast = false;
 
+       /* can be called from prescan */
        if (pkt->etype == RHF_RCV_TYPE_BYPASS) {
                is_mcast = hfi1_is_16B_mcast(dlid);
                pkey = hfi1_16B_get_pkey(pkt->hdr);
                sc = hfi1_16B_get_sc(pkt->hdr);
+               dlid = hfi1_16B_get_dlid(pkt->hdr);
+               slid = hfi1_16B_get_slid(pkt->hdr);
                hdr_type = HFI1_PKT_TYPE_16B;
        } else {
                is_mcast = (dlid > be16_to_cpu(IB_MULTICAST_LID_BASE)) &&
                           (dlid != be16_to_cpu(IB_LID_PERMISSIVE));
                pkey = ib_bth_get_pkey(ohdr);
                sc = hfi1_9B_get_sc5(pkt->hdr, pkt->rhf);
+               dlid = ib_get_dlid(pkt->hdr);
+               slid = ib_get_slid(pkt->hdr);
                hdr_type = HFI1_PKT_TYPE_9B;
        }
 
        switch (qp->ibqp.qp_type) {
+       case IB_QPT_UD:
+               dlid = ppd->lid;
+               rlid = slid;
+               rqpn = ib_get_sqpn(pkt->ohdr);
+               svc_type = IB_CC_SVCTYPE_UD;
+               break;
        case IB_QPT_SMI:
        case IB_QPT_GSI:
-       case IB_QPT_UD:
-               rlid = ib_get_slid(pkt->hdr);
+               rlid = slid;
                rqpn = ib_get_sqpn(pkt->ohdr);
                svc_type = IB_CC_SVCTYPE_UD;
                break;
@@ -482,7 +494,6 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
                                              dlid, rlid, sc, grh);
 
        if (!is_mcast && (bth1 & IB_BECN_SMASK)) {
-               struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
                u32 lqpn = bth1 & RVT_QPN_MASK;
                u8 sl = ibp->sc_to_sl[sc];
 
index 32c48265405ea42bdb6440f8a80832aacc290dec..cac2c62bc42d6d496c307e66dd4ac6c7f0ada16e 100644 (file)
@@ -1537,13 +1537,13 @@ void set_link_ipg(struct hfi1_pportdata *ppd);
 void process_becn(struct hfi1_pportdata *ppd, u8 sl, u32 rlid, u32 lqpn,
                  u32 rqpn, u8 svc_type);
 void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn,
-               u32 pkey, u32 slid, u32 dlid, u8 sc5,
+               u16 pkey, u32 slid, u32 dlid, u8 sc5,
                const struct ib_grh *old_grh);
 void return_cnp_16B(struct hfi1_ibport *ibp, struct rvt_qp *qp,
-                   u32 remote_qpn, u32 pkey, u32 slid, u32 dlid,
+                   u32 remote_qpn, u16 pkey, u32 slid, u32 dlid,
                    u8 sc5, const struct ib_grh *old_grh);
 typedef void (*hfi1_handle_cnp)(struct hfi1_ibport *ibp, struct rvt_qp *qp,
-                               u32 remote_qpn, u32 pkey, u32 slid, u32 dlid,
+                               u32 remote_qpn, u16 pkey, u32 slid, u32 dlid,
                                u8 sc5, const struct ib_grh *old_grh);
 
 #define PKEY_CHECK_INVALID -1
@@ -2437,7 +2437,7 @@ static inline void hfi1_make_16b_hdr(struct hfi1_16b_header *hdr,
                ((slid >> OPA_16B_SLID_SHIFT) << OPA_16B_SLID_HIGH_SHIFT);
        lrh2 = (lrh2 & ~OPA_16B_DLID_MASK) |
                ((dlid >> OPA_16B_DLID_SHIFT) << OPA_16B_DLID_HIGH_SHIFT);
-       lrh2 = (lrh2 & ~OPA_16B_PKEY_MASK) | (pkey << OPA_16B_PKEY_SHIFT);
+       lrh2 = (lrh2 & ~OPA_16B_PKEY_MASK) | ((u32)pkey << OPA_16B_PKEY_SHIFT);
        lrh2 = (lrh2 & ~OPA_16B_L4_MASK) | l4;
 
        hdr->lrh[0] = lrh0;
index 33eba23567422e1a9d82f8008dfd89c0302aaba9..6309edf811df6e33d2ea3f64e6d7592231c8dd3a 100644 (file)
@@ -88,9 +88,9 @@
  * pio buffers per ctxt, etc.)  Zero means use one user context per CPU.
  */
 int num_user_contexts = -1;
-module_param_named(num_user_contexts, num_user_contexts, uint, S_IRUGO);
+module_param_named(num_user_contexts, num_user_contexts, int, 0444);
 MODULE_PARM_DESC(
-       num_user_contexts, "Set max number of user contexts to use");
+       num_user_contexts, "Set max number of user contexts to use (default: -1 will use the real (non-HT) CPU count)");
 
 uint krcvqs[RXE_NUM_DATA_VL];
 int krcvqsset;
@@ -1209,30 +1209,49 @@ static void finalize_asic_data(struct hfi1_devdata *dd,
        kfree(ad);
 }
 
-static void __hfi1_free_devdata(struct kobject *kobj)
+/**
+ * hfi1_clean_devdata - cleans up per-unit data structure
+ * @dd: pointer to a valid devdata structure
+ *
+ * It cleans up all data structures set up by
+ * by hfi1_alloc_devdata().
+ */
+static void hfi1_clean_devdata(struct hfi1_devdata *dd)
 {
-       struct hfi1_devdata *dd =
-               container_of(kobj, struct hfi1_devdata, kobj);
        struct hfi1_asic_data *ad;
        unsigned long flags;
 
        spin_lock_irqsave(&hfi1_devs_lock, flags);
-       idr_remove(&hfi1_unit_table, dd->unit);
-       list_del(&dd->list);
+       if (!list_empty(&dd->list)) {
+               idr_remove(&hfi1_unit_table, dd->unit);
+               list_del_init(&dd->list);
+       }
        ad = release_asic_data(dd);
        spin_unlock_irqrestore(&hfi1_devs_lock, flags);
-       if (ad)
-               finalize_asic_data(dd, ad);
+
+       finalize_asic_data(dd, ad);
        free_platform_config(dd);
        rcu_barrier(); /* wait for rcu callbacks to complete */
        free_percpu(dd->int_counter);
        free_percpu(dd->rcv_limit);
        free_percpu(dd->send_schedule);
        free_percpu(dd->tx_opstats);
+       dd->int_counter   = NULL;
+       dd->rcv_limit     = NULL;
+       dd->send_schedule = NULL;
+       dd->tx_opstats    = NULL;
        sdma_clean(dd, dd->num_sdma);
        rvt_dealloc_device(&dd->verbs_dev.rdi);
 }
 
+static void __hfi1_free_devdata(struct kobject *kobj)
+{
+       struct hfi1_devdata *dd =
+               container_of(kobj, struct hfi1_devdata, kobj);
+
+       hfi1_clean_devdata(dd);
+}
+
 static struct kobj_type hfi1_devdata_type = {
        .release = __hfi1_free_devdata,
 };
@@ -1265,6 +1284,8 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra)
                return ERR_PTR(-ENOMEM);
        dd->num_pports = nports;
        dd->pport = (struct hfi1_pportdata *)(dd + 1);
+       dd->pcidev = pdev;
+       pci_set_drvdata(pdev, dd);
 
        INIT_LIST_HEAD(&dd->list);
        idr_preload(GFP_KERNEL);
@@ -1331,9 +1352,7 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra)
        return dd;
 
 bail:
-       if (!list_empty(&dd->list))
-               list_del_init(&dd->list);
-       rvt_dealloc_device(&dd->verbs_dev.rdi);
+       hfi1_clean_devdata(dd);
        return ERR_PTR(ret);
 }
 
index 83d66e862207c6f7a31ab5e7eac48a441b82e115..c1c982908b4bb2ad000fc1aa507be0036f78789d 100644 (file)
@@ -163,9 +163,6 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev)
        resource_size_t addr;
        int ret = 0;
 
-       dd->pcidev = pdev;
-       pci_set_drvdata(pdev, dd);
-
        addr = pci_resource_start(pdev, 0);
        len = pci_resource_len(pdev, 0);
 
index d486355880cb0da37e23755e8ed49197fed90fc8..cbf7faa5038ca9e7e271363ce80f9174b97da907 100644 (file)
@@ -199,6 +199,7 @@ void free_platform_config(struct hfi1_devdata *dd)
 {
        /* Release memory allocated for eprom or fallback file read. */
        kfree(dd->platform_config.data);
+       dd->platform_config.data = NULL;
 }
 
 void get_port_type(struct hfi1_pportdata *ppd)
index 1869f639c3aec7283a2e502b126d5f31573386bb..b5966991d64744e710dbb7e38574b91c676bacf8 100644 (file)
@@ -204,6 +204,8 @@ static void clean_i2c_bus(struct hfi1_i2c_bus *bus)
 
 void clean_up_i2c(struct hfi1_devdata *dd, struct hfi1_asic_data *ad)
 {
+       if (!ad)
+               return;
        clean_i2c_bus(ad->i2c_bus0);
        ad->i2c_bus0 = NULL;
        clean_i2c_bus(ad->i2c_bus1);
index 3daa94bdae3a9adebc7908577db4bd7290b8e048..c0071ca4147ae818eaea5246949c1db18d08b2c6 100644 (file)
@@ -733,6 +733,20 @@ static inline void hfi1_make_ruc_bth(struct rvt_qp *qp,
        ohdr->bth[2] = cpu_to_be32(bth2);
 }
 
+/**
+ * hfi1_make_ruc_header_16B - build a 16B header
+ * @qp: the queue pair
+ * @ohdr: a pointer to the destination header memory
+ * @bth0: bth0 passed in from the RC/UC builder
+ * @bth2: bth2 passed in from the RC/UC builder
+ * @middle: non zero implies indicates ahg "could" be used
+ * @ps: the current packet state
+ *
+ * This routine may disarm ahg under these situations:
+ * - packet needs a GRH
+ * - BECN needed
+ * - migration state not IB_MIG_MIGRATED
+ */
 static inline void hfi1_make_ruc_header_16B(struct rvt_qp *qp,
                                            struct ib_other_headers *ohdr,
                                            u32 bth0, u32 bth2, int middle,
@@ -777,6 +791,12 @@ static inline void hfi1_make_ruc_header_16B(struct rvt_qp *qp,
        else
                middle = 0;
 
+       if (qp->s_flags & RVT_S_ECN) {
+               qp->s_flags &= ~RVT_S_ECN;
+               /* we recently received a FECN, so return a BECN */
+               becn = true;
+               middle = 0;
+       }
        if (middle)
                build_ahg(qp, bth2);
        else
@@ -784,11 +804,6 @@ static inline void hfi1_make_ruc_header_16B(struct rvt_qp *qp,
 
        bth0 |= pkey;
        bth0 |= extra_bytes << 20;
-       if (qp->s_flags & RVT_S_ECN) {
-               qp->s_flags &= ~RVT_S_ECN;
-               /* we recently received a FECN, so return a BECN */
-               becn = true;
-       }
        hfi1_make_ruc_bth(qp, ohdr, bth0, bth1, bth2);
 
        if (!ppd->lid)
@@ -806,6 +821,20 @@ static inline void hfi1_make_ruc_header_16B(struct rvt_qp *qp,
                          pkey, becn, 0, l4, priv->s_sc);
 }
 
+/**
+ * hfi1_make_ruc_header_9B - build a 9B header
+ * @qp: the queue pair
+ * @ohdr: a pointer to the destination header memory
+ * @bth0: bth0 passed in from the RC/UC builder
+ * @bth2: bth2 passed in from the RC/UC builder
+ * @middle: non zero implies indicates ahg "could" be used
+ * @ps: the current packet state
+ *
+ * This routine may disarm ahg under these situations:
+ * - packet needs a GRH
+ * - BECN needed
+ * - migration state not IB_MIG_MIGRATED
+ */
 static inline void hfi1_make_ruc_header_9B(struct rvt_qp *qp,
                                           struct ib_other_headers *ohdr,
                                           u32 bth0, u32 bth2, int middle,
@@ -839,6 +868,12 @@ static inline void hfi1_make_ruc_header_9B(struct rvt_qp *qp,
        else
                middle = 0;
 
+       if (qp->s_flags & RVT_S_ECN) {
+               qp->s_flags &= ~RVT_S_ECN;
+               /* we recently received a FECN, so return a BECN */
+               bth1 |= (IB_BECN_MASK << IB_BECN_SHIFT);
+               middle = 0;
+       }
        if (middle)
                build_ahg(qp, bth2);
        else
@@ -846,11 +881,6 @@ static inline void hfi1_make_ruc_header_9B(struct rvt_qp *qp,
 
        bth0 |= pkey;
        bth0 |= extra_bytes << 20;
-       if (qp->s_flags & RVT_S_ECN) {
-               qp->s_flags &= ~RVT_S_ECN;
-               /* we recently received a FECN, so return a BECN */
-               bth1 |= (IB_BECN_MASK << IB_BECN_SHIFT);
-       }
        hfi1_make_ruc_bth(qp, ohdr, bth0, bth1, bth2);
        hfi1_make_ib_hdr(&ps->s_txreq->phdr.hdr.ibh,
                         lrh0,
index bcf3b0bebac8b69cb0fd77dd42c08fe86df2526d..69c17a5ef03871b73c0810fd3945f2111ac56e4f 100644 (file)
@@ -628,7 +628,7 @@ int hfi1_lookup_pkey_idx(struct hfi1_ibport *ibp, u16 pkey)
 }
 
 void return_cnp_16B(struct hfi1_ibport *ibp, struct rvt_qp *qp,
-                   u32 remote_qpn, u32 pkey, u32 slid, u32 dlid,
+                   u32 remote_qpn, u16 pkey, u32 slid, u32 dlid,
                    u8 sc5, const struct ib_grh *old_grh)
 {
        u64 pbc, pbc_flags = 0;
@@ -687,7 +687,7 @@ void return_cnp_16B(struct hfi1_ibport *ibp, struct rvt_qp *qp,
 }
 
 void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn,
-               u32 pkey, u32 slid, u32 dlid, u8 sc5,
+               u16 pkey, u32 slid, u32 dlid, u8 sc5,
                const struct ib_grh *old_grh)
 {
        u64 pbc, pbc_flags = 0;
index 0eeabfbee192efed31c46d948ee6db264d1085fd..63b5b3edabcbad7b9ea6a002334a62433fef8a6a 100644 (file)
@@ -912,7 +912,7 @@ int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev,
                obj_per_chunk = buf_chunk_size / obj_size;
                num_hem = (nobj + obj_per_chunk - 1) / obj_per_chunk;
                bt_chunk_num = bt_chunk_size / 8;
-               if (table->type >= HEM_TYPE_MTT)
+               if (type >= HEM_TYPE_MTT)
                        num_bt_l0 = bt_chunk_num;
 
                table->hem = kcalloc(num_hem, sizeof(*table->hem),
@@ -920,7 +920,7 @@ int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev,
                if (!table->hem)
                        goto err_kcalloc_hem_buf;
 
-               if (check_whether_bt_num_3(table->type, hop_num)) {
+               if (check_whether_bt_num_3(type, hop_num)) {
                        unsigned long num_bt_l1;
 
                        num_bt_l1 = (num_hem + bt_chunk_num - 1) /
@@ -939,8 +939,8 @@ int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev,
                                goto err_kcalloc_l1_dma;
                }
 
-               if (check_whether_bt_num_2(table->type, hop_num) ||
-                       check_whether_bt_num_3(table->type, hop_num)) {
+               if (check_whether_bt_num_2(type, hop_num) ||
+                       check_whether_bt_num_3(type, hop_num)) {
                        table->bt_l0 = kcalloc(num_bt_l0, sizeof(*table->bt_l0),
                                               GFP_KERNEL);
                        if (!table->bt_l0)
@@ -1039,14 +1039,14 @@ void hns_roce_cleanup_hem_table(struct hns_roce_dev *hr_dev,
 void hns_roce_cleanup_hem(struct hns_roce_dev *hr_dev)
 {
        hns_roce_cleanup_hem_table(hr_dev, &hr_dev->cq_table.table);
-       hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.irrl_table);
        if (hr_dev->caps.trrl_entry_sz)
                hns_roce_cleanup_hem_table(hr_dev,
                                           &hr_dev->qp_table.trrl_table);
+       hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.irrl_table);
        hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.qp_table);
        hns_roce_cleanup_hem_table(hr_dev, &hr_dev->mr_table.mtpt_table);
-       hns_roce_cleanup_hem_table(hr_dev, &hr_dev->mr_table.mtt_table);
        if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE))
                hns_roce_cleanup_hem_table(hr_dev,
                                           &hr_dev->mr_table.mtt_cqe_table);
+       hns_roce_cleanup_hem_table(hr_dev, &hr_dev->mr_table.mtt_table);
 }
index 8b84ab7800d8cec4bf4a939f5290e83c176f2dc1..25916e8522eda4e061c63e2b652db1fa8fdf5e41 100644 (file)
@@ -71,6 +71,11 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, struct ib_send_wr *wr,
                        return -EINVAL;
                }
 
+               if (wr->opcode == IB_WR_RDMA_READ) {
+                       dev_err(hr_dev->dev, "Not support inline data!\n");
+                       return -EINVAL;
+               }
+
                for (i = 0; i < wr->num_sge; i++) {
                        memcpy(wqe, ((void *)wr->sg_list[i].addr),
                               wr->sg_list[i].length);
@@ -148,7 +153,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                     ibqp->qp_type != IB_QPT_GSI &&
                     ibqp->qp_type != IB_QPT_UD)) {
                dev_err(dev, "Not supported QP(0x%x)type!\n", ibqp->qp_type);
-               *bad_wr = NULL;
+               *bad_wr = wr;
                return -EOPNOTSUPP;
        }
 
@@ -182,7 +187,8 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                qp->sq.wrid[(qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1)] =
                                                                      wr->wr_id;
 
-               owner_bit = ~(qp->sq.head >> ilog2(qp->sq.wqe_cnt)) & 0x1;
+               owner_bit =
+                      ~(((qp->sq.head + nreq) >> ilog2(qp->sq.wqe_cnt)) & 0x1);
 
                /* Corresponding to the QP type, wqe process separately */
                if (ibqp->qp_type == IB_QPT_GSI) {
@@ -456,6 +462,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                } else {
                        dev_err(dev, "Illegal qp_type(0x%x)\n", ibqp->qp_type);
                        spin_unlock_irqrestore(&qp->sq.lock, flags);
+                       *bad_wr = wr;
                        return -EOPNOTSUPP;
                }
        }
@@ -2592,10 +2599,12 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp,
        roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_SQPN_M,
                       V2_QPC_BYTE_4_SQPN_S, 0);
 
-       roce_set_field(context->byte_56_dqpn_err, V2_QPC_BYTE_56_DQPN_M,
-                      V2_QPC_BYTE_56_DQPN_S, hr_qp->qpn);
-       roce_set_field(qpc_mask->byte_56_dqpn_err, V2_QPC_BYTE_56_DQPN_M,
-                      V2_QPC_BYTE_56_DQPN_S, 0);
+       if (attr_mask & IB_QP_DEST_QPN) {
+               roce_set_field(context->byte_56_dqpn_err, V2_QPC_BYTE_56_DQPN_M,
+                              V2_QPC_BYTE_56_DQPN_S, hr_qp->qpn);
+               roce_set_field(qpc_mask->byte_56_dqpn_err,
+                              V2_QPC_BYTE_56_DQPN_M, V2_QPC_BYTE_56_DQPN_S, 0);
+       }
        roce_set_field(context->byte_168_irrl_idx,
                       V2_QPC_BYTE_168_SQ_SHIFT_BAK_M,
                       V2_QPC_BYTE_168_SQ_SHIFT_BAK_S,
@@ -2650,8 +2659,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
                return -EINVAL;
        }
 
-       if ((attr_mask & IB_QP_ALT_PATH) || (attr_mask & IB_QP_ACCESS_FLAGS) ||
-           (attr_mask & IB_QP_PKEY_INDEX) || (attr_mask & IB_QP_QKEY)) {
+       if (attr_mask & IB_QP_ALT_PATH) {
                dev_err(dev, "INIT2RTR attr_mask (0x%x) error\n", attr_mask);
                return -EINVAL;
        }
@@ -2800,10 +2808,12 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
                               V2_QPC_BYTE_140_RR_MAX_S, 0);
        }
 
-       roce_set_field(context->byte_56_dqpn_err, V2_QPC_BYTE_56_DQPN_M,
-                      V2_QPC_BYTE_56_DQPN_S, attr->dest_qp_num);
-       roce_set_field(qpc_mask->byte_56_dqpn_err, V2_QPC_BYTE_56_DQPN_M,
-                      V2_QPC_BYTE_56_DQPN_S, 0);
+       if (attr_mask & IB_QP_DEST_QPN) {
+               roce_set_field(context->byte_56_dqpn_err, V2_QPC_BYTE_56_DQPN_M,
+                              V2_QPC_BYTE_56_DQPN_S, attr->dest_qp_num);
+               roce_set_field(qpc_mask->byte_56_dqpn_err,
+                              V2_QPC_BYTE_56_DQPN_M, V2_QPC_BYTE_56_DQPN_S, 0);
+       }
 
        /* Configure GID index */
        port_num = rdma_ah_get_port_num(&attr->ah_attr);
@@ -2845,7 +2855,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
        if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_UD)
                roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_MTU_M,
                               V2_QPC_BYTE_24_MTU_S, IB_MTU_4096);
-       else
+       else if (attr_mask & IB_QP_PATH_MTU)
                roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_MTU_M,
                               V2_QPC_BYTE_24_MTU_S, attr->path_mtu);
 
@@ -2922,11 +2932,9 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp,
                return -EINVAL;
        }
 
-       /* If exist optional param, return error */
-       if ((attr_mask & IB_QP_ALT_PATH) || (attr_mask & IB_QP_ACCESS_FLAGS) ||
-           (attr_mask & IB_QP_QKEY) || (attr_mask & IB_QP_PATH_MIG_STATE) ||
-           (attr_mask & IB_QP_CUR_STATE) ||
-           (attr_mask & IB_QP_MIN_RNR_TIMER)) {
+       /* Not support alternate path and path migration */
+       if ((attr_mask & IB_QP_ALT_PATH) ||
+           (attr_mask & IB_QP_PATH_MIG_STATE)) {
                dev_err(dev, "RTR2RTS attr_mask (0x%x)error\n", attr_mask);
                return -EINVAL;
        }
@@ -3161,7 +3169,8 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
                   (cur_state == IB_QPS_RTR && new_state == IB_QPS_ERR) ||
                   (cur_state == IB_QPS_RTS && new_state == IB_QPS_ERR) ||
                   (cur_state == IB_QPS_SQD && new_state == IB_QPS_ERR) ||
-                  (cur_state == IB_QPS_SQE && new_state == IB_QPS_ERR)) {
+                  (cur_state == IB_QPS_SQE && new_state == IB_QPS_ERR) ||
+                  (cur_state == IB_QPS_ERR && new_state == IB_QPS_ERR)) {
                /* Nothing */
                ;
        } else {
@@ -4478,7 +4487,7 @@ static int hns_roce_v2_create_eq(struct hns_roce_dev *hr_dev,
        ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, eq->eqn, 0,
                                eq_cmd, HNS_ROCE_CMD_TIMEOUT_MSECS);
        if (ret) {
-               dev_err(dev, "[mailbox cmd] creat eqc failed.\n");
+               dev_err(dev, "[mailbox cmd] create eqc failed.\n");
                goto err_cmd_mbox;
        }
 
index e289a924e7890a6b955522d818d4660584920d2f..d4aad34c21e2ca8a9bc36db6ba1f682ac1f1972f 100644 (file)
@@ -620,7 +620,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
                                        to_hr_ucontext(ib_pd->uobject->context),
                                        ucmd.db_addr, &hr_qp->rdb);
                        if (ret) {
-                               dev_err(dev, "rp record doorbell map failed!\n");
+                               dev_err(dev, "rq record doorbell map failed!\n");
                                goto err_mtt;
                        }
                }
index 17f4f151a97f1beb2accecc8024fe08384ab602d..61d8b06375bb8520c7bed6a60a76b833cdb713e8 100644 (file)
@@ -346,7 +346,7 @@ int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem, u64 start_va,
        /* Add to the first block the misalignment that it suffers from. */
        total_len += (first_block_start & ((1ULL << block_shift) - 1ULL));
        last_block_end = current_block_start + current_block_len;
-       last_block_aligned_end = round_up(last_block_end, 1 << block_shift);
+       last_block_aligned_end = round_up(last_block_end, 1ULL << block_shift);
        total_len += (last_block_aligned_end - last_block_end);
 
        if (total_len & ((1ULL << block_shift) - 1ULL))
index 50af8915e7ec799e87297c1b678a4ae9eed4f262..199648adac749723bce64e5f920151890ff9a608 100644 (file)
@@ -673,7 +673,8 @@ static int set_qp_rss(struct mlx4_ib_dev *dev, struct mlx4_ib_rss *rss_ctx,
                                          MLX4_IB_RX_HASH_SRC_PORT_TCP  |
                                          MLX4_IB_RX_HASH_DST_PORT_TCP  |
                                          MLX4_IB_RX_HASH_SRC_PORT_UDP  |
-                                         MLX4_IB_RX_HASH_DST_PORT_UDP)) {
+                                         MLX4_IB_RX_HASH_DST_PORT_UDP  |
+                                         MLX4_IB_RX_HASH_INNER)) {
                pr_debug("RX Hash fields_mask has unsupported mask (0x%llx)\n",
                         ucmd->rx_hash_fields_mask);
                return (-EOPNOTSUPP);
index bce263b928211c3e2126b04f6ef00fbc41b33d03..fb4d77be019b77797b5bf281dfdd63a447fd065f 100644 (file)
@@ -1,6 +1,7 @@
 config MLX5_INFINIBAND
        tristate "Mellanox Connect-IB HCA support"
        depends on NETDEVICES && ETHERNET && PCI && MLX5_CORE
+       depends on INFINIBAND_USER_ACCESS || INFINIBAND_USER_ACCESS=n
        ---help---
          This driver provides low-level InfiniBand support for
          Mellanox Connect-IB PCI Express host channel adapters (HCAs).
index 77d257ec899be9b5ec23b7489850161a14d52d1c..6d52ea03574e591552de460be15cf22720395a5a 100644 (file)
@@ -849,7 +849,7 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
        return 0;
 
 err_cqb:
-       kfree(*cqb);
+       kvfree(*cqb);
 
 err_db:
        mlx5_ib_db_unmap_user(to_mucontext(context), &cq->db);
index daa919e5a4423adf5b658dc8668526f93e54d861..b4d8ff8ab807a445ad93cc532ef1f4fe5d4242fb 100644 (file)
@@ -52,7 +52,6 @@
 #include <linux/mlx5/port.h>
 #include <linux/mlx5/vport.h>
 #include <linux/mlx5/fs.h>
-#include <linux/mlx5/fs_helpers.h>
 #include <linux/list.h>
 #include <rdma/ib_smi.h>
 #include <rdma/ib_umem.h>
@@ -180,7 +179,7 @@ static int mlx5_netdev_event(struct notifier_block *this,
                        if (rep_ndev == ndev)
                                roce->netdev = (event == NETDEV_UNREGISTER) ?
                                        NULL : ndev;
-               } else if (ndev->dev.parent == &ibdev->mdev->pdev->dev) {
+               } else if (ndev->dev.parent == &mdev->pdev->dev) {
                        roce->netdev = (event == NETDEV_UNREGISTER) ?
                                NULL : ndev;
                }
@@ -4757,7 +4756,7 @@ mlx5_ib_get_vector_affinity(struct ib_device *ibdev, int comp_vector)
 {
        struct mlx5_ib_dev *dev = to_mdev(ibdev);
 
-       return mlx5_get_vector_affinity(dev->mdev, comp_vector);
+       return mlx5_get_vector_affinity_hint(dev->mdev, comp_vector);
 }
 
 /* The mlx5_ib_multiport_mutex should be held when calling this function */
@@ -5427,9 +5426,7 @@ static void mlx5_ib_stage_cong_debugfs_cleanup(struct mlx5_ib_dev *dev)
 static int mlx5_ib_stage_uar_init(struct mlx5_ib_dev *dev)
 {
        dev->mdev->priv.uar = mlx5_get_uars_page(dev->mdev);
-       if (!dev->mdev->priv.uar)
-               return -ENOMEM;
-       return 0;
+       return PTR_ERR_OR_ZERO(dev->mdev->priv.uar);
 }
 
 static void mlx5_ib_stage_uar_cleanup(struct mlx5_ib_dev *dev)
index 1520a2f20f980a9563c402cc8b876d89e5890c4b..90a9c461cedca5db0b7ca924087c59a8e3d58b4b 100644 (file)
@@ -866,25 +866,28 @@ static int mr_umem_get(struct ib_pd *pd, u64 start, u64 length,
                       int *order)
 {
        struct mlx5_ib_dev *dev = to_mdev(pd->device);
+       struct ib_umem *u;
        int err;
 
-       *umem = ib_umem_get(pd->uobject->context, start, length,
-                           access_flags, 0);
-       err = PTR_ERR_OR_ZERO(*umem);
+       *umem = NULL;
+
+       u = ib_umem_get(pd->uobject->context, start, length, access_flags, 0);
+       err = PTR_ERR_OR_ZERO(u);
        if (err) {
-               *umem = NULL;
-               mlx5_ib_err(dev, "umem get failed (%d)\n", err);
+               mlx5_ib_dbg(dev, "umem get failed (%d)\n", err);
                return err;
        }
 
-       mlx5_ib_cont_pages(*umem, start, MLX5_MKEY_PAGE_SHIFT_MASK, npages,
+       mlx5_ib_cont_pages(u, start, MLX5_MKEY_PAGE_SHIFT_MASK, npages,
                           page_shift, ncont, order);
        if (!*npages) {
                mlx5_ib_warn(dev, "avoid zero region\n");
-               ib_umem_release(*umem);
+               ib_umem_release(u);
                return -EINVAL;
        }
 
+       *umem = u;
+
        mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n",
                    *npages, *ncont, *order, *page_shift);
 
@@ -1458,13 +1461,12 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
        int access_flags = flags & IB_MR_REREG_ACCESS ?
                            new_access_flags :
                            mr->access_flags;
-       u64 addr = (flags & IB_MR_REREG_TRANS) ? virt_addr : mr->umem->address;
-       u64 len = (flags & IB_MR_REREG_TRANS) ? length : mr->umem->length;
        int page_shift = 0;
        int upd_flags = 0;
        int npages = 0;
        int ncont = 0;
        int order = 0;
+       u64 addr, len;
        int err;
 
        mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
@@ -1472,6 +1474,17 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
 
        atomic_sub(mr->npages, &dev->mdev->priv.reg_pages);
 
+       if (!mr->umem)
+               return -EINVAL;
+
+       if (flags & IB_MR_REREG_TRANS) {
+               addr = virt_addr;
+               len = length;
+       } else {
+               addr = mr->umem->address;
+               len = mr->umem->length;
+       }
+
        if (flags != IB_MR_REREG_PD) {
                /*
                 * Replace umem. This needs to be done whether or not UMR is
@@ -1479,6 +1492,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
                 */
                flags |= IB_MR_REREG_TRANS;
                ib_umem_release(mr->umem);
+               mr->umem = NULL;
                err = mr_umem_get(pd, addr, len, access_flags, &mr->umem,
                                  &npages, &page_shift, &ncont, &order);
                if (err)
index 7ed4b70f6447554325c66ec488c434dd6b732962..87b7c1be2a117b0a0d7840ce13d36dabb5c4a13c 100644 (file)
@@ -259,7 +259,11 @@ static int set_rq_size(struct mlx5_ib_dev *dev, struct ib_qp_cap *cap,
        } else {
                if (ucmd) {
                        qp->rq.wqe_cnt = ucmd->rq_wqe_count;
+                       if (ucmd->rq_wqe_shift > BITS_PER_BYTE * sizeof(ucmd->rq_wqe_shift))
+                               return -EINVAL;
                        qp->rq.wqe_shift = ucmd->rq_wqe_shift;
+                       if ((1 << qp->rq.wqe_shift) / sizeof(struct mlx5_wqe_data_seg) < qp->wq_sig)
+                               return -EINVAL;
                        qp->rq.max_gs = (1 << qp->rq.wqe_shift) / sizeof(struct mlx5_wqe_data_seg) - qp->wq_sig;
                        qp->rq.max_post = qp->rq.wqe_cnt;
                } else {
@@ -2451,18 +2455,18 @@ enum {
 
 static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate)
 {
-       if (rate == IB_RATE_PORT_CURRENT) {
+       if (rate == IB_RATE_PORT_CURRENT)
                return 0;
-       } else if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_300_GBPS) {
+
+       if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_300_GBPS)
                return -EINVAL;
-       } else {
-               while (rate != IB_RATE_2_5_GBPS &&
-                      !(1 << (rate + MLX5_STAT_RATE_OFFSET) &
-                        MLX5_CAP_GEN(dev->mdev, stat_rate_support)))
-                       --rate;
-       }
 
-       return rate + MLX5_STAT_RATE_OFFSET;
+       while (rate != IB_RATE_PORT_CURRENT &&
+              !(1 << (rate + MLX5_STAT_RATE_OFFSET) &
+                MLX5_CAP_GEN(dev->mdev, stat_rate_support)))
+               --rate;
+
+       return rate ? rate + MLX5_STAT_RATE_OFFSET : rate;
 }
 
 static int modify_raw_packet_eth_prio(struct mlx5_core_dev *dev,
index 0a75164cedeafbfe91f54ffb2a360c9b9047224b..007d5e8a0121eb6485a9666d1e98ee3581b34c0f 100644 (file)
@@ -461,7 +461,7 @@ static bool nes_nic_send(struct sk_buff *skb, struct net_device *netdev)
 /**
  * nes_netdev_start_xmit
  */
-static int nes_netdev_start_xmit(struct sk_buff *skb, struct net_device *netdev)
+static netdev_tx_t nes_netdev_start_xmit(struct sk_buff *skb, struct net_device *netdev)
 {
        struct nes_vnic *nesvnic = netdev_priv(netdev);
        struct nes_device *nesdev = nesvnic->nesdev;
index 61927c165b598af92e572bbc9ce9bbad2b6ecb32..4cf11063e0b597bee02532a52ea648d0a26c2eac 100644 (file)
@@ -390,7 +390,7 @@ struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = {
                .name   = "IB_OPCODE_RC_SEND_ONLY_INV",
                .mask   = RXE_IETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK
                                | RXE_COMP_MASK | RXE_RWR_MASK | RXE_SEND_MASK
-                               | RXE_END_MASK,
+                               | RXE_END_MASK  | RXE_START_MASK,
                .length = RXE_BTH_BYTES + RXE_IETH_BYTES,
                .offset = {
                        [RXE_BTH]       = 0,
index 7bdaf71b82213bea1cf66f606d5e517706d2647c..785199990457068f614de7a71678845425ee34d3 100644 (file)
@@ -728,7 +728,6 @@ int rxe_requester(void *arg)
                rollback_state(wqe, qp, &rollback_wqe, rollback_psn);
 
                if (ret == -EAGAIN) {
-                       kfree_skb(skb);
                        rxe_run_task(&qp->req.task, 1);
                        goto exit;
                }
index a65c9969f7fcfffd808aea97906978ec78477a50..955ff3b6da9c6ffcfea6f94b0b32a6a6ebe60737 100644 (file)
@@ -742,7 +742,6 @@ static enum resp_states read_reply(struct rxe_qp *qp,
        err = rxe_xmit_packet(rxe, qp, &ack_pkt, skb);
        if (err) {
                pr_err("Failed sending RDMA reply.\n");
-               kfree_skb(skb);
                return RESPST_ERR_RNR;
        }
 
@@ -954,10 +953,8 @@ static int send_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
        }
 
        err = rxe_xmit_packet(rxe, qp, &ack_pkt, skb);
-       if (err) {
+       if (err)
                pr_err_ratelimited("Failed sending ack\n");
-               kfree_skb(skb);
-       }
 
 err1:
        return err;
@@ -1141,7 +1138,6 @@ static enum resp_states duplicate_request(struct rxe_qp *qp,
                        if (rc) {
                                pr_err("Failed resending result. This flow is not handled - skb ignored\n");
                                rxe_drop_ref(qp);
-                               kfree_skb(skb_copy);
                                rc = RESPST_CLEANUP;
                                goto out;
                        }
index 161ba8c76285cb41a18f9af304860498e0af89e0..cf291f90b58fdc630af5153beac0709a053db276 100644 (file)
@@ -1094,7 +1094,7 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
        spin_unlock_irqrestore(&priv->lock, flags);
 }
 
-static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
        struct ipoib_dev_priv *priv = ipoib_priv(dev);
        struct rdma_netdev *rn = netdev_priv(dev);
index 4be3aef40bd2ebea81f337c93d6a4752d4c35f9c..267da8215e08fe71b9d4920e1accf851d7723538 100644 (file)
@@ -443,17 +443,16 @@ static u8 opa_vnic_get_rc(struct __opa_veswport_info *info,
 }
 
 /* opa_vnic_calc_entropy - calculate the packet entropy */
-u8 opa_vnic_calc_entropy(struct opa_vnic_adapter *adapter, struct sk_buff *skb)
+u8 opa_vnic_calc_entropy(struct sk_buff *skb)
 {
-       u16 hash16;
-
-       /*
-        * Get flow based 16-bit hash and then XOR the upper and lower bytes
-        * to get the entropy.
-        * __skb_tx_hash limits qcount to 16 bits. Hence, get 15-bit hash.
-        */
-       hash16 = __skb_tx_hash(adapter->netdev, skb, BIT(15));
-       return (u8)((hash16 >> 8) ^ (hash16 & 0xff));
+       u32 hash = skb_get_hash(skb);
+
+       /* store XOR of all bytes in lower 8 bits */
+       hash ^= hash >> 8;
+       hash ^= hash >> 16;
+
+       /* return lower 8 bits as entropy */
+       return (u8)(hash & 0xFF);
 }
 
 /* opa_vnic_get_def_port - get default port based on entropy */
@@ -490,7 +489,7 @@ void opa_vnic_encap_skb(struct opa_vnic_adapter *adapter, struct sk_buff *skb)
 
        hdr = skb_push(skb, OPA_VNIC_HDR_LEN);
 
-       entropy = opa_vnic_calc_entropy(adapter, skb);
+       entropy = opa_vnic_calc_entropy(skb);
        def_port = opa_vnic_get_def_port(adapter, entropy);
        len = opa_vnic_wire_length(skb);
        dlid = opa_vnic_get_dlid(adapter, skb, def_port);
index afd95f4322620789415ad15194259945093ae452..43ac61ffef4a9e6aa60dfad55538afb878fb1e86 100644 (file)
@@ -299,7 +299,7 @@ struct opa_vnic_adapter *opa_vnic_add_netdev(struct ib_device *ibdev,
 void opa_vnic_rem_netdev(struct opa_vnic_adapter *adapter);
 void opa_vnic_encap_skb(struct opa_vnic_adapter *adapter, struct sk_buff *skb);
 u8 opa_vnic_get_vl(struct opa_vnic_adapter *adapter, struct sk_buff *skb);
-u8 opa_vnic_calc_entropy(struct opa_vnic_adapter *adapter, struct sk_buff *skb);
+u8 opa_vnic_calc_entropy(struct sk_buff *skb);
 void opa_vnic_process_vema_config(struct opa_vnic_adapter *adapter);
 void opa_vnic_release_mac_tbl(struct opa_vnic_adapter *adapter);
 void opa_vnic_query_mac_tbl(struct opa_vnic_adapter *adapter,
index ce57e0f10289c6249b6a7f48cca42b5d0a6f482a..0c8aec62a42539fc90d67000361653fbf846ae3b 100644 (file)
@@ -104,7 +104,7 @@ static u16 opa_vnic_select_queue(struct net_device *netdev, struct sk_buff *skb,
 
        /* pass entropy and vl as metadata in skb */
        mdata = skb_push(skb, sizeof(*mdata));
-       mdata->entropy =  opa_vnic_calc_entropy(adapter, skb);
+       mdata->entropy = opa_vnic_calc_entropy(skb);
        mdata->vl = opa_vnic_get_vl(adapter, skb);
        rc = adapter->rn_ops->ndo_select_queue(netdev, skb,
                                               accel_priv, fallback);
index c74ee9633041d7a566dc1c2d9febf676c5bd744d..99db8fe5173af0e96551082031889fa9a7419933 100644 (file)
@@ -1,6 +1,6 @@
 config INFINIBAND_SRP
        tristate "InfiniBand SCSI RDMA Protocol"
-       depends on SCSI
+       depends on SCSI && INFINIBAND_ADDR_TRANS
        select SCSI_SRP_ATTRS
        ---help---
          Support for the SCSI RDMA Protocol over InfiniBand.  This
index 31ee83d528d9b6d0bf5716b8d940f1c1b63cbf3a..fb8b7182f05ebd7413058d54e8be7fca974dcc44 100644 (file)
@@ -1,6 +1,6 @@
 config INFINIBAND_SRPT
        tristate "InfiniBand SCSI RDMA Protocol target support"
-       depends on INFINIBAND && TARGET_CORE
+       depends on INFINIBAND && INFINIBAND_ADDR_TRANS && TARGET_CORE
        ---help---
 
          Support for the SCSI RDMA Protocol (SRP) Target driver. The
index 46115a39209821830c1a3789d1cb93d67ff35746..c81c79d01d93078fb2a4357e6f0c51aea6a2b12b 100644 (file)
@@ -31,6 +31,7 @@
 enum evdev_clock_type {
        EV_CLK_REAL = 0,
        EV_CLK_MONO,
+       EV_CLK_BOOT,
        EV_CLK_MAX
 };
 
@@ -197,10 +198,12 @@ static int evdev_set_clk_type(struct evdev_client *client, unsigned int clkid)
        case CLOCK_REALTIME:
                clk_type = EV_CLK_REAL;
                break;
-       case CLOCK_BOOTTIME:
        case CLOCK_MONOTONIC:
                clk_type = EV_CLK_MONO;
                break;
+       case CLOCK_BOOTTIME:
+               clk_type = EV_CLK_BOOT;
+               break;
        default:
                return -EINVAL;
        }
@@ -311,6 +314,8 @@ static void evdev_events(struct input_handle *handle,
 
        ev_time[EV_CLK_MONO] = ktime_get();
        ev_time[EV_CLK_REAL] = ktime_mono_to_real(ev_time[EV_CLK_MONO]);
+       ev_time[EV_CLK_BOOT] = ktime_mono_to_any(ev_time[EV_CLK_MONO],
+                                                TK_OFFS_BOOT);
 
        rcu_read_lock();
 
index 766bf26601163c37aebf265160b4d3d0ce09617f..5f04b2d946350d2ef29d579d6e002da061d93ce0 100644 (file)
@@ -88,6 +88,7 @@ static int input_leds_connect(struct input_handler *handler,
                              const struct input_device_id *id)
 {
        struct input_leds *leds;
+       struct input_led *led;
        unsigned int num_leds;
        unsigned int led_code;
        int led_no;
@@ -119,14 +120,13 @@ static int input_leds_connect(struct input_handler *handler,
 
        led_no = 0;
        for_each_set_bit(led_code, dev->ledbit, LED_CNT) {
-               struct input_led *led = &leds->leds[led_no];
+               if (!input_led_info[led_code].name)
+                       continue;
 
+               led = &leds->leds[led_no];
                led->handle = &leds->handle;
                led->code = led_code;
 
-               if (!input_led_info[led_code].name)
-                       continue;
-
                led->cdev.name = kasprintf(GFP_KERNEL, "%s::%s",
                                           dev_name(&dev->dev),
                                           input_led_info[led_code].name);
index 0a67f235ba88a7b6e25877a43b35488a31d8fdf5..38f9501acdf04c6127a45667e831c807e52be09b 100644 (file)
@@ -583,7 +583,7 @@ static void alps_process_trackstick_packet_v3(struct psmouse *psmouse)
 
        x = (s8)(((packet[0] & 0x20) << 2) | (packet[1] & 0x7f));
        y = (s8)(((packet[0] & 0x10) << 3) | (packet[2] & 0x7f));
-       z = packet[4] & 0x7c;
+       z = packet[4] & 0x7f;
 
        /*
         * The x and y values tend to be quite large, and when used
index 76edbf2c1bced8ac02fd58bd1ea2dc719d4f83b7..082defc329a8e2cb8e4f6744d6fb7cba4c51fe5e 100644 (file)
@@ -147,8 +147,11 @@ static int rmi_spi_xfer(struct rmi_spi_xport *rmi_spi,
        if (len > RMI_SPI_XFER_SIZE_LIMIT)
                return -EINVAL;
 
-       if (rmi_spi->xfer_buf_size < len)
-               rmi_spi_manage_pools(rmi_spi, len);
+       if (rmi_spi->xfer_buf_size < len) {
+               ret = rmi_spi_manage_pools(rmi_spi, len);
+               if (ret < 0)
+                       return ret;
+       }
 
        if (addr == 0)
                /*
index 4f15496fec8b56ebaa75dd22051b0ce63d9a6827..3e613afa10b4ba967a7def75f631997c0e0c6ae4 100644 (file)
@@ -362,7 +362,7 @@ config TOUCHSCREEN_HIDEEP
 
          If unsure, say N.
 
-         To compile this driver as a moudle, choose M here : the
+         To compile this driver as a module, choose M here : the
          module will be called hideep_ts.
 
 config TOUCHSCREEN_ILI210X
index 5d9699fe1b55aa007171d0f99efbbb5b0bbbd35b..09194721aed2dcc23b65a1fae48fbc17766f8671 100644 (file)
@@ -280,7 +280,8 @@ struct mxt_data {
        struct input_dev *input_dev;
        char phys[64];          /* device physical location */
        struct mxt_object *object_table;
-       struct mxt_info info;
+       struct mxt_info *info;
+       void *raw_info_block;
        unsigned int irq;
        unsigned int max_x;
        unsigned int max_y;
@@ -460,12 +461,13 @@ static int mxt_lookup_bootloader_address(struct mxt_data *data, bool retry)
 {
        u8 appmode = data->client->addr;
        u8 bootloader;
+       u8 family_id = data->info ? data->info->family_id : 0;
 
        switch (appmode) {
        case 0x4a:
        case 0x4b:
                /* Chips after 1664S use different scheme */
-               if (retry || data->info.family_id >= 0xa2) {
+               if (retry || family_id >= 0xa2) {
                        bootloader = appmode - 0x24;
                        break;
                }
@@ -692,7 +694,7 @@ mxt_get_object(struct mxt_data *data, u8 type)
        struct mxt_object *object;
        int i;
 
-       for (i = 0; i < data->info.object_num; i++) {
+       for (i = 0; i < data->info->object_num; i++) {
                object = data->object_table + i;
                if (object->type == type)
                        return object;
@@ -1462,12 +1464,12 @@ static int mxt_update_cfg(struct mxt_data *data, const struct firmware *cfg)
                data_pos += offset;
        }
 
-       if (cfg_info.family_id != data->info.family_id) {
+       if (cfg_info.family_id != data->info->family_id) {
                dev_err(dev, "Family ID mismatch!\n");
                return -EINVAL;
        }
 
-       if (cfg_info.variant_id != data->info.variant_id) {
+       if (cfg_info.variant_id != data->info->variant_id) {
                dev_err(dev, "Variant ID mismatch!\n");
                return -EINVAL;
        }
@@ -1512,7 +1514,7 @@ static int mxt_update_cfg(struct mxt_data *data, const struct firmware *cfg)
 
        /* Malloc memory to store configuration */
        cfg_start_ofs = MXT_OBJECT_START +
-                       data->info.object_num * sizeof(struct mxt_object) +
+                       data->info->object_num * sizeof(struct mxt_object) +
                        MXT_INFO_CHECKSUM_SIZE;
        config_mem_size = data->mem_size - cfg_start_ofs;
        config_mem = kzalloc(config_mem_size, GFP_KERNEL);
@@ -1563,20 +1565,6 @@ static int mxt_update_cfg(struct mxt_data *data, const struct firmware *cfg)
        return ret;
 }
 
-static int mxt_get_info(struct mxt_data *data)
-{
-       struct i2c_client *client = data->client;
-       struct mxt_info *info = &data->info;
-       int error;
-
-       /* Read 7-byte info block starting at address 0 */
-       error = __mxt_read_reg(client, 0, sizeof(*info), info);
-       if (error)
-               return error;
-
-       return 0;
-}
-
 static void mxt_free_input_device(struct mxt_data *data)
 {
        if (data->input_dev) {
@@ -1591,9 +1579,10 @@ static void mxt_free_object_table(struct mxt_data *data)
        video_unregister_device(&data->dbg.vdev);
        v4l2_device_unregister(&data->dbg.v4l2);
 #endif
-
-       kfree(data->object_table);
        data->object_table = NULL;
+       data->info = NULL;
+       kfree(data->raw_info_block);
+       data->raw_info_block = NULL;
        kfree(data->msg_buf);
        data->msg_buf = NULL;
        data->T5_address = 0;
@@ -1609,34 +1598,18 @@ static void mxt_free_object_table(struct mxt_data *data)
        data->max_reportid = 0;
 }
 
-static int mxt_get_object_table(struct mxt_data *data)
+static int mxt_parse_object_table(struct mxt_data *data,
+                                 struct mxt_object *object_table)
 {
        struct i2c_client *client = data->client;
-       size_t table_size;
-       struct mxt_object *object_table;
-       int error;
        int i;
        u8 reportid;
        u16 end_address;
 
-       table_size = data->info.object_num * sizeof(struct mxt_object);
-       object_table = kzalloc(table_size, GFP_KERNEL);
-       if (!object_table) {
-               dev_err(&data->client->dev, "Failed to allocate memory\n");
-               return -ENOMEM;
-       }
-
-       error = __mxt_read_reg(client, MXT_OBJECT_START, table_size,
-                       object_table);
-       if (error) {
-               kfree(object_table);
-               return error;
-       }
-
        /* Valid Report IDs start counting from 1 */
        reportid = 1;
        data->mem_size = 0;
-       for (i = 0; i < data->info.object_num; i++) {
+       for (i = 0; i < data->info->object_num; i++) {
                struct mxt_object *object = object_table + i;
                u8 min_id, max_id;
 
@@ -1660,8 +1633,8 @@ static int mxt_get_object_table(struct mxt_data *data)
 
                switch (object->type) {
                case MXT_GEN_MESSAGE_T5:
-                       if (data->info.family_id == 0x80 &&
-                           data->info.version < 0x20) {
+                       if (data->info->family_id == 0x80 &&
+                           data->info->version < 0x20) {
                                /*
                                 * On mXT224 firmware versions prior to V2.0
                                 * read and discard unused CRC byte otherwise
@@ -1716,24 +1689,102 @@ static int mxt_get_object_table(struct mxt_data *data)
        /* If T44 exists, T5 position has to be directly after */
        if (data->T44_address && (data->T5_address != data->T44_address + 1)) {
                dev_err(&client->dev, "Invalid T44 position\n");
-               error = -EINVAL;
-               goto free_object_table;
+               return -EINVAL;
        }
 
        data->msg_buf = kcalloc(data->max_reportid,
                                data->T5_msg_size, GFP_KERNEL);
-       if (!data->msg_buf) {
-               dev_err(&client->dev, "Failed to allocate message buffer\n");
+       if (!data->msg_buf)
+               return -ENOMEM;
+
+       return 0;
+}
+
+static int mxt_read_info_block(struct mxt_data *data)
+{
+       struct i2c_client *client = data->client;
+       int error;
+       size_t size;
+       void *id_buf, *buf;
+       uint8_t num_objects;
+       u32 calculated_crc;
+       u8 *crc_ptr;
+
+       /* If info block already allocated, free it */
+       if (data->raw_info_block)
+               mxt_free_object_table(data);
+
+       /* Read 7-byte ID information block starting at address 0 */
+       size = sizeof(struct mxt_info);
+       id_buf = kzalloc(size, GFP_KERNEL);
+       if (!id_buf)
+               return -ENOMEM;
+
+       error = __mxt_read_reg(client, 0, size, id_buf);
+       if (error)
+               goto err_free_mem;
+
+       /* Resize buffer to give space for rest of info block */
+       num_objects = ((struct mxt_info *)id_buf)->object_num;
+       size += (num_objects * sizeof(struct mxt_object))
+               + MXT_INFO_CHECKSUM_SIZE;
+
+       buf = krealloc(id_buf, size, GFP_KERNEL);
+       if (!buf) {
                error = -ENOMEM;
-               goto free_object_table;
+               goto err_free_mem;
+       }
+       id_buf = buf;
+
+       /* Read rest of info block */
+       error = __mxt_read_reg(client, MXT_OBJECT_START,
+                              size - MXT_OBJECT_START,
+                              id_buf + MXT_OBJECT_START);
+       if (error)
+               goto err_free_mem;
+
+       /* Extract & calculate checksum */
+       crc_ptr = id_buf + size - MXT_INFO_CHECKSUM_SIZE;
+       data->info_crc = crc_ptr[0] | (crc_ptr[1] << 8) | (crc_ptr[2] << 16);
+
+       calculated_crc = mxt_calculate_crc(id_buf, 0,
+                                          size - MXT_INFO_CHECKSUM_SIZE);
+
+       /*
+        * CRC mismatch can be caused by data corruption due to I2C comms
+        * issue or else device is not using Object Based Protocol (eg i2c-hid)
+        */
+       if ((data->info_crc == 0) || (data->info_crc != calculated_crc)) {
+               dev_err(&client->dev,
+                       "Info Block CRC error calculated=0x%06X read=0x%06X\n",
+                       calculated_crc, data->info_crc);
+               error = -EIO;
+               goto err_free_mem;
+       }
+
+       data->raw_info_block = id_buf;
+       data->info = (struct mxt_info *)id_buf;
+
+       dev_info(&client->dev,
+                "Family: %u Variant: %u Firmware V%u.%u.%02X Objects: %u\n",
+                data->info->family_id, data->info->variant_id,
+                data->info->version >> 4, data->info->version & 0xf,
+                data->info->build, data->info->object_num);
+
+       /* Parse object table information */
+       error = mxt_parse_object_table(data, id_buf + MXT_OBJECT_START);
+       if (error) {
+               dev_err(&client->dev, "Error %d parsing object table\n", error);
+               mxt_free_object_table(data);
+               goto err_free_mem;
        }
 
-       data->object_table = object_table;
+       data->object_table = (struct mxt_object *)(id_buf + MXT_OBJECT_START);
 
        return 0;
 
-free_object_table:
-       mxt_free_object_table(data);
+err_free_mem:
+       kfree(id_buf);
        return error;
 }
 
@@ -2046,7 +2097,7 @@ static int mxt_initialize(struct mxt_data *data)
        int error;
 
        while (1) {
-               error = mxt_get_info(data);
+               error = mxt_read_info_block(data);
                if (!error)
                        break;
 
@@ -2077,16 +2128,9 @@ static int mxt_initialize(struct mxt_data *data)
                msleep(MXT_FW_RESET_TIME);
        }
 
-       /* Get object table information */
-       error = mxt_get_object_table(data);
-       if (error) {
-               dev_err(&client->dev, "Error %d reading object table\n", error);
-               return error;
-       }
-
        error = mxt_acquire_irq(data);
        if (error)
-               goto err_free_object_table;
+               return error;
 
        error = request_firmware_nowait(THIS_MODULE, true, MXT_CFG_NAME,
                                        &client->dev, GFP_KERNEL, data,
@@ -2094,14 +2138,10 @@ static int mxt_initialize(struct mxt_data *data)
        if (error) {
                dev_err(&client->dev, "Failed to invoke firmware loader: %d\n",
                        error);
-               goto err_free_object_table;
+               return error;
        }
 
        return 0;
-
-err_free_object_table:
-       mxt_free_object_table(data);
-       return error;
 }
 
 static int mxt_set_t7_power_cfg(struct mxt_data *data, u8 sleep)
@@ -2162,7 +2202,7 @@ static int mxt_init_t7_power_cfg(struct mxt_data *data)
 static u16 mxt_get_debug_value(struct mxt_data *data, unsigned int x,
                               unsigned int y)
 {
-       struct mxt_info *info = &data->info;
+       struct mxt_info *info = data->info;
        struct mxt_dbg *dbg = &data->dbg;
        unsigned int ofs, page;
        unsigned int col = 0;
@@ -2490,7 +2530,7 @@ static const struct video_device mxt_video_device = {
 
 static void mxt_debug_init(struct mxt_data *data)
 {
-       struct mxt_info *info = &data->info;
+       struct mxt_info *info = data->info;
        struct mxt_dbg *dbg = &data->dbg;
        struct mxt_object *object;
        int error;
@@ -2576,7 +2616,6 @@ static int mxt_configure_objects(struct mxt_data *data,
                                 const struct firmware *cfg)
 {
        struct device *dev = &data->client->dev;
-       struct mxt_info *info = &data->info;
        int error;
 
        error = mxt_init_t7_power_cfg(data);
@@ -2601,11 +2640,6 @@ static int mxt_configure_objects(struct mxt_data *data,
 
        mxt_debug_init(data);
 
-       dev_info(dev,
-                "Family: %u Variant: %u Firmware V%u.%u.%02X Objects: %u\n",
-                info->family_id, info->variant_id, info->version >> 4,
-                info->version & 0xf, info->build, info->object_num);
-
        return 0;
 }
 
@@ -2614,7 +2648,7 @@ static ssize_t mxt_fw_version_show(struct device *dev,
                                   struct device_attribute *attr, char *buf)
 {
        struct mxt_data *data = dev_get_drvdata(dev);
-       struct mxt_info *info = &data->info;
+       struct mxt_info *info = data->info;
        return scnprintf(buf, PAGE_SIZE, "%u.%u.%02X\n",
                         info->version >> 4, info->version & 0xf, info->build);
 }
@@ -2624,7 +2658,7 @@ static ssize_t mxt_hw_version_show(struct device *dev,
                                   struct device_attribute *attr, char *buf)
 {
        struct mxt_data *data = dev_get_drvdata(dev);
-       struct mxt_info *info = &data->info;
+       struct mxt_info *info = data->info;
        return scnprintf(buf, PAGE_SIZE, "%u.%u\n",
                         info->family_id, info->variant_id);
 }
@@ -2663,7 +2697,7 @@ static ssize_t mxt_object_show(struct device *dev,
                return -ENOMEM;
 
        error = 0;
-       for (i = 0; i < data->info.object_num; i++) {
+       for (i = 0; i < data->info->object_num; i++) {
                object = data->object_table + i;
 
                if (!mxt_object_readable(object->type))
@@ -3034,6 +3068,15 @@ static const struct dmi_system_id mxt_dmi_table[] = {
                },
                .driver_data = samus_platform_data,
        },
+       {
+               /* Samsung Chromebook Pro */
+               .ident = "Samsung Chromebook Pro",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "Google"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "Caroline"),
+               },
+               .driver_data = samus_platform_data,
+       },
        {
                /* Other Google Chromebooks */
                .ident = "Chromebook",
@@ -3254,6 +3297,11 @@ static SIMPLE_DEV_PM_OPS(mxt_pm_ops, mxt_suspend, mxt_resume);
 
 static const struct of_device_id mxt_of_match[] = {
        { .compatible = "atmel,maxtouch", },
+       /* Compatibles listed below are deprecated */
+       { .compatible = "atmel,qt602240_ts", },
+       { .compatible = "atmel,atmel_mxt_ts", },
+       { .compatible = "atmel,atmel_mxt_tp", },
+       { .compatible = "atmel,mXT224", },
        {},
 };
 MODULE_DEVICE_TABLE(of, mxt_of_match);
index 2a99f0f14795549eacb6d88b87e30cc0ca78272e..8fb8c737fffefa18ac3ae716474a40002a36ff05 100644 (file)
@@ -83,7 +83,6 @@
 
 static DEFINE_SPINLOCK(amd_iommu_devtable_lock);
 static DEFINE_SPINLOCK(pd_bitmap_lock);
-static DEFINE_SPINLOCK(iommu_table_lock);
 
 /* List of all available dev_data structures */
 static LLIST_HEAD(dev_data_list);
@@ -3562,6 +3561,7 @@ EXPORT_SYMBOL(amd_iommu_device_info);
  *****************************************************************************/
 
 static struct irq_chip amd_ir_chip;
+static DEFINE_SPINLOCK(iommu_table_lock);
 
 static void set_dte_irq_entry(u16 devid, struct irq_remap_table *table)
 {
index f05f3cf9075675a4e466bda1d0621376fb5fea44..ddcbbdb5d65806dd7bc1e98ac95eed37c5752ca4 100644 (file)
@@ -167,40 +167,16 @@ EXPORT_SYMBOL(iommu_put_dma_cookie);
  * @list: Reserved region list from iommu_get_resv_regions()
  *
  * IOMMU drivers can use this to implement their .get_resv_regions callback
- * for general non-IOMMU-specific reservations. Currently, this covers host
- * bridge windows for PCI devices and GICv3 ITS region reservation on ACPI
- * based ARM platforms that may require HW MSI reservation.
+ * for general non-IOMMU-specific reservations. Currently, this covers GICv3
+ * ITS region reservation on ACPI based ARM platforms that may require HW MSI
+ * reservation.
  */
 void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list)
 {
-       struct pci_host_bridge *bridge;
-       struct resource_entry *window;
-
-       if (!is_of_node(dev->iommu_fwspec->iommu_fwnode) &&
-               iort_iommu_msi_get_resv_regions(dev, list) < 0)
-               return;
-
-       if (!dev_is_pci(dev))
-               return;
-
-       bridge = pci_find_host_bridge(to_pci_dev(dev)->bus);
-       resource_list_for_each_entry(window, &bridge->windows) {
-               struct iommu_resv_region *region;
-               phys_addr_t start;
-               size_t length;
-
-               if (resource_type(window->res) != IORESOURCE_MEM)
-                       continue;
 
-               start = window->res->start - window->offset;
-               length = window->res->end - window->res->start + 1;
-               region = iommu_alloc_resv_region(start, length, 0,
-                               IOMMU_RESV_RESERVED);
-               if (!region)
-                       return;
+       if (!is_of_node(dev->iommu_fwspec->iommu_fwnode))
+               iort_iommu_msi_get_resv_regions(dev, list);
 
-               list_add_tail(&region->list, list);
-       }
 }
 EXPORT_SYMBOL(iommu_dma_get_resv_regions);
 
@@ -229,6 +205,23 @@ static int cookie_init_hw_msi_region(struct iommu_dma_cookie *cookie,
        return 0;
 }
 
+static void iova_reserve_pci_windows(struct pci_dev *dev,
+               struct iova_domain *iovad)
+{
+       struct pci_host_bridge *bridge = pci_find_host_bridge(dev->bus);
+       struct resource_entry *window;
+       unsigned long lo, hi;
+
+       resource_list_for_each_entry(window, &bridge->windows) {
+               if (resource_type(window->res) != IORESOURCE_MEM)
+                       continue;
+
+               lo = iova_pfn(iovad, window->res->start - window->offset);
+               hi = iova_pfn(iovad, window->res->end - window->offset);
+               reserve_iova(iovad, lo, hi);
+       }
+}
+
 static int iova_reserve_iommu_regions(struct device *dev,
                struct iommu_domain *domain)
 {
@@ -238,6 +231,9 @@ static int iova_reserve_iommu_regions(struct device *dev,
        LIST_HEAD(resv_regions);
        int ret = 0;
 
+       if (dev_is_pci(dev))
+               iova_reserve_pci_windows(to_pci_dev(dev), iovad);
+
        iommu_get_resv_regions(dev, &resv_regions);
        list_for_each_entry(region, &resv_regions, list) {
                unsigned long lo, hi;
index accf58388bdb4892369f0bac43f928667e937035..460bed4fc5b159ecd12d99a8ea770b0d90369b8a 100644 (file)
@@ -1345,7 +1345,7 @@ void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep,
        struct qi_desc desc;
 
        if (mask) {
-               BUG_ON(addr & ((1 << (VTD_PAGE_SHIFT + mask)) - 1));
+               WARN_ON_ONCE(addr & ((1ULL << (VTD_PAGE_SHIFT + mask)) - 1));
                addr |= (1ULL << (VTD_PAGE_SHIFT + mask - 1)) - 1;
                desc.high = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
        } else
index 66f69af2c2191f6a247a82acc227ed64b85c6bff..3062a154a9fbf31cfa296ad6b50c2cd3471ad400 100644 (file)
@@ -1136,7 +1136,7 @@ static void intel_ir_reconfigure_irte(struct irq_data *irqd, bool force)
        irte->dest_id = IRTE_DEST(cfg->dest_apicid);
 
        /* Update the hardware only if the interrupt is in remapped mode. */
-       if (!force || ir_data->irq_2_iommu.mode == IRQ_REMAPPING)
+       if (force || ir_data->irq_2_iommu.mode == IRQ_REMAPPING)
                modify_irte(&ir_data->irq_2_iommu, irte);
 }
 
index 5fc8656c60f968b30148eaabbeefa4e63a2f40e2..0468acfa131fe4d1904260561c8f3052673ae0a6 100644 (file)
@@ -1098,7 +1098,7 @@ static int rk_iommu_of_xlate(struct device *dev,
        data->iommu = platform_get_drvdata(iommu_dev);
        dev->archdata.iommu = data;
 
-       of_dev_put(iommu_dev);
+       platform_device_put(iommu_dev);
 
        return 0;
 }
@@ -1175,8 +1175,15 @@ static int rk_iommu_probe(struct platform_device *pdev)
        for (i = 0; i < iommu->num_clocks; ++i)
                iommu->clocks[i].id = rk_iommu_clocks[i];
 
+       /*
+        * iommu clocks should be present for all new devices and devicetrees
+        * but there are older devicetrees without clocks out in the wild.
+        * So clocks as optional for the time being.
+        */
        err = devm_clk_bulk_get(iommu->dev, iommu->num_clocks, iommu->clocks);
-       if (err)
+       if (err == -ENOENT)
+               iommu->num_clocks = 0;
+       else if (err)
                return err;
 
        err = clk_bulk_prepare(iommu->num_clocks, iommu->clocks);
index f31265937439608314bf55d70665b55a299a4666..7f0c0be322e08a848bd42cc906c706315816f2c4 100644 (file)
@@ -1,4 +1,4 @@
-/* Copyright (c) 2015-2016, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2015-2018, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -68,7 +68,7 @@ static void combiner_handle_irq(struct irq_desc *desc)
 
                bit = readl_relaxed(combiner->regs[reg].addr);
                status = bit & combiner->regs[reg].enabled;
-               if (!status)
+               if (bit && !status)
                        pr_warn_ratelimited("Unexpected IRQ on CPU%d: (%08x %08lx %p)\n",
                                            smp_processor_id(), bit,
                                            combiner->regs[reg].enabled,
index 004cc3cc6123b0b3a9f9ea51e95c37f109315e41..7fa2631b422c89a160a6e49165bcf639d7d235e2 100644 (file)
@@ -290,7 +290,7 @@ do {                                                                        \
                if (kthread_should_stop() ||                            \
                    test_bit(CACHE_SET_IO_DISABLE, &ca->set->flags)) {  \
                        set_current_state(TASK_RUNNING);                \
-                       return 0;                                       \
+                       goto out;                                       \
                }                                                       \
                                                                        \
                schedule();                                             \
@@ -378,6 +378,9 @@ static int bch_allocator_thread(void *arg)
                        bch_prio_write(ca);
                }
        }
+out:
+       wait_for_kthread_stop();
+       return 0;
 }
 
 /* Allocation */
index d338b7086013fc1faf2b55d9a67f9a81e5e66bbe..3a0cfb237af9e682ddfd466aad2070807e1f8166 100644 (file)
@@ -392,6 +392,8 @@ struct cached_dev {
 #define DEFAULT_CACHED_DEV_ERROR_LIMIT 64
        atomic_t                io_errors;
        unsigned                error_limit;
+
+       char                    backing_dev_name[BDEVNAME_SIZE];
 };
 
 enum alloc_reserve {
@@ -464,6 +466,8 @@ struct cache {
        atomic_long_t           meta_sectors_written;
        atomic_long_t           btree_sectors_written;
        atomic_long_t           sectors_written;
+
+       char                    cache_dev_name[BDEVNAME_SIZE];
 };
 
 struct gc_stat {
index 028f7b386e014b6bae1bf1dc5d8b9f687faa0746..d030ce3025a6a6f365cb55c740e436ba43fd52bf 100644 (file)
@@ -106,7 +106,6 @@ void bch_btree_verify(struct btree *b)
 
 void bch_data_verify(struct cached_dev *dc, struct bio *bio)
 {
-       char name[BDEVNAME_SIZE];
        struct bio *check;
        struct bio_vec bv, cbv;
        struct bvec_iter iter, citer = { 0 };
@@ -134,7 +133,7 @@ void bch_data_verify(struct cached_dev *dc, struct bio *bio)
                                        bv.bv_len),
                                 dc->disk.c,
                                 "verify failed at dev %s sector %llu",
-                                bdevname(dc->bdev, name),
+                                dc->backing_dev_name,
                                 (uint64_t) bio->bi_iter.bi_sector);
 
                kunmap_atomic(p1);
@@ -251,7 +250,9 @@ void bch_debug_exit(void)
 
 int __init bch_debug_init(struct kobject *kobj)
 {
-       bcache_debug = debugfs_create_dir("bcache", NULL);
+       if (!IS_ENABLED(CONFIG_DEBUG_FS))
+               return 0;
 
+       bcache_debug = debugfs_create_dir("bcache", NULL);
        return IS_ERR_OR_NULL(bcache_debug);
 }
index 7fac97ae036ec76095124d395d870d13f078b0be..2ddf8515e6a533112356228a974bb356350b1feb 100644 (file)
@@ -52,7 +52,6 @@ void bch_submit_bbio(struct bio *bio, struct cache_set *c,
 /* IO errors */
 void bch_count_backing_io_errors(struct cached_dev *dc, struct bio *bio)
 {
-       char buf[BDEVNAME_SIZE];
        unsigned errors;
 
        WARN_ONCE(!dc, "NULL pointer of struct cached_dev");
@@ -60,7 +59,7 @@ void bch_count_backing_io_errors(struct cached_dev *dc, struct bio *bio)
        errors = atomic_add_return(1, &dc->io_errors);
        if (errors < dc->error_limit)
                pr_err("%s: IO error on backing device, unrecoverable",
-                       bio_devname(bio, buf));
+                       dc->backing_dev_name);
        else
                bch_cached_dev_error(dc);
 }
@@ -105,19 +104,18 @@ void bch_count_io_errors(struct cache *ca,
        }
 
        if (error) {
-               char buf[BDEVNAME_SIZE];
                unsigned errors = atomic_add_return(1 << IO_ERROR_SHIFT,
                                                    &ca->io_errors);
                errors >>= IO_ERROR_SHIFT;
 
                if (errors < ca->set->error_limit)
                        pr_err("%s: IO error on %s%s",
-                              bdevname(ca->bdev, buf), m,
+                              ca->cache_dev_name, m,
                               is_read ? ", recovering." : ".");
                else
                        bch_cache_set_error(ca->set,
                                            "%s: too many IO errors %s",
-                                           bdevname(ca->bdev, buf), m);
+                                           ca->cache_dev_name, m);
        }
 }
 
index a65e3365eeb970b00da4111b71283f91e1a106ba..8e3e8655ed6388308064c40935f7df05fe9f220f 100644 (file)
@@ -649,11 +649,8 @@ static void backing_request_endio(struct bio *bio)
                 */
                if (unlikely(s->iop.writeback &&
                             bio->bi_opf & REQ_PREFLUSH)) {
-                       char buf[BDEVNAME_SIZE];
-
-                       bio_devname(bio, buf);
                        pr_err("Can't flush %s: returned bi_status %i",
-                               buf, bio->bi_status);
+                               dc->backing_dev_name, bio->bi_status);
                } else {
                        /* set to orig_bio->bi_status in bio_complete() */
                        s->iop.status = bio->bi_status;
index d90d9e59ca00999c805266c81eceeeeba6ac79e0..3dea06b41d431c021af60320b9f71f9b4e706990 100644 (file)
@@ -936,7 +936,6 @@ static void cancel_writeback_rate_update_dwork(struct cached_dev *dc)
 static void cached_dev_detach_finish(struct work_struct *w)
 {
        struct cached_dev *dc = container_of(w, struct cached_dev, detach);
-       char buf[BDEVNAME_SIZE];
        struct closure cl;
        closure_init_stack(&cl);
 
@@ -967,7 +966,7 @@ static void cached_dev_detach_finish(struct work_struct *w)
 
        mutex_unlock(&bch_register_lock);
 
-       pr_info("Caching disabled for %s", bdevname(dc->bdev, buf));
+       pr_info("Caching disabled for %s", dc->backing_dev_name);
 
        /* Drop ref we took in cached_dev_detach() */
        closure_put(&dc->disk.cl);
@@ -999,29 +998,28 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
 {
        uint32_t rtime = cpu_to_le32(get_seconds());
        struct uuid_entry *u;
-       char buf[BDEVNAME_SIZE];
        struct cached_dev *exist_dc, *t;
 
-       bdevname(dc->bdev, buf);
-
        if ((set_uuid && memcmp(set_uuid, c->sb.set_uuid, 16)) ||
            (!set_uuid && memcmp(dc->sb.set_uuid, c->sb.set_uuid, 16)))
                return -ENOENT;
 
        if (dc->disk.c) {
-               pr_err("Can't attach %s: already attached", buf);
+               pr_err("Can't attach %s: already attached",
+                      dc->backing_dev_name);
                return -EINVAL;
        }
 
        if (test_bit(CACHE_SET_STOPPING, &c->flags)) {
-               pr_err("Can't attach %s: shutting down", buf);
+               pr_err("Can't attach %s: shutting down",
+                      dc->backing_dev_name);
                return -EINVAL;
        }
 
        if (dc->sb.block_size < c->sb.block_size) {
                /* Will die */
                pr_err("Couldn't attach %s: block size less than set's block size",
-                      buf);
+                      dc->backing_dev_name);
                return -EINVAL;
        }
 
@@ -1029,7 +1027,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
        list_for_each_entry_safe(exist_dc, t, &c->cached_devs, list) {
                if (!memcmp(dc->sb.uuid, exist_dc->sb.uuid, 16)) {
                        pr_err("Tried to attach %s but duplicate UUID already attached",
-                               buf);
+                               dc->backing_dev_name);
 
                        return -EINVAL;
                }
@@ -1047,13 +1045,15 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
 
        if (!u) {
                if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) {
-                       pr_err("Couldn't find uuid for %s in set", buf);
+                       pr_err("Couldn't find uuid for %s in set",
+                              dc->backing_dev_name);
                        return -ENOENT;
                }
 
                u = uuid_find_empty(c);
                if (!u) {
-                       pr_err("Not caching %s, no room for UUID", buf);
+                       pr_err("Not caching %s, no room for UUID",
+                              dc->backing_dev_name);
                        return -EINVAL;
                }
        }
@@ -1112,7 +1112,8 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
        up_write(&dc->writeback_lock);
 
        pr_info("Caching %s as %s on set %pU",
-               bdevname(dc->bdev, buf), dc->disk.disk->disk_name,
+               dc->backing_dev_name,
+               dc->disk.disk->disk_name,
                dc->disk.c->sb.set_uuid);
        return 0;
 }
@@ -1225,10 +1226,10 @@ static void register_bdev(struct cache_sb *sb, struct page *sb_page,
                                 struct block_device *bdev,
                                 struct cached_dev *dc)
 {
-       char name[BDEVNAME_SIZE];
        const char *err = "cannot allocate memory";
        struct cache_set *c;
 
+       bdevname(bdev, dc->backing_dev_name);
        memcpy(&dc->sb, sb, sizeof(struct cache_sb));
        dc->bdev = bdev;
        dc->bdev->bd_holder = dc;
@@ -1237,6 +1238,7 @@ static void register_bdev(struct cache_sb *sb, struct page *sb_page,
        bio_first_bvec_all(&dc->sb_bio)->bv_page = sb_page;
        get_page(sb_page);
 
+
        if (cached_dev_init(dc, sb->block_size << 9))
                goto err;
 
@@ -1247,7 +1249,7 @@ static void register_bdev(struct cache_sb *sb, struct page *sb_page,
        if (bch_cache_accounting_add_kobjs(&dc->accounting, &dc->disk.kobj))
                goto err;
 
-       pr_info("registered backing device %s", bdevname(bdev, name));
+       pr_info("registered backing device %s", dc->backing_dev_name);
 
        list_add(&dc->list, &uncached_devices);
        list_for_each_entry(c, &bch_cache_sets, list)
@@ -1259,7 +1261,7 @@ static void register_bdev(struct cache_sb *sb, struct page *sb_page,
 
        return;
 err:
-       pr_notice("error %s: %s", bdevname(bdev, name), err);
+       pr_notice("error %s: %s", dc->backing_dev_name, err);
        bcache_device_stop(&dc->disk);
 }
 
@@ -1367,7 +1369,7 @@ int bch_flash_dev_create(struct cache_set *c, uint64_t size)
 
 bool bch_cached_dev_error(struct cached_dev *dc)
 {
-       char name[BDEVNAME_SIZE];
+       struct cache_set *c;
 
        if (!dc || test_bit(BCACHE_DEV_CLOSING, &dc->disk.flags))
                return false;
@@ -1377,7 +1379,22 @@ bool bch_cached_dev_error(struct cached_dev *dc)
        smp_mb();
 
        pr_err("stop %s: too many IO errors on backing device %s\n",
-               dc->disk.disk->disk_name, bdevname(dc->bdev, name));
+               dc->disk.disk->disk_name, dc->backing_dev_name);
+
+       /*
+        * If the cached device is still attached to a cache set,
+        * even dc->io_disable is true and no more I/O requests
+        * accepted, cache device internal I/O (writeback scan or
+        * garbage collection) may still prevent bcache device from
+        * being stopped. So here CACHE_SET_IO_DISABLE should be
+        * set to c->flags too, to make the internal I/O to cache
+        * device rejected and stopped immediately.
+        * If c is NULL, that means the bcache device is not attached
+        * to any cache set, then no CACHE_SET_IO_DISABLE bit to set.
+        */
+       c = dc->disk.c;
+       if (c && test_and_set_bit(CACHE_SET_IO_DISABLE, &c->flags))
+               pr_info("CACHE_SET_IO_DISABLE already set");
 
        bcache_device_stop(&dc->disk);
        return true;
@@ -1395,7 +1412,7 @@ bool bch_cache_set_error(struct cache_set *c, const char *fmt, ...)
                return false;
 
        if (test_and_set_bit(CACHE_SET_IO_DISABLE, &c->flags))
-               pr_warn("CACHE_SET_IO_DISABLE already set");
+               pr_info("CACHE_SET_IO_DISABLE already set");
 
        /* XXX: we can be called from atomic context
        acquire_console_sem();
@@ -1539,6 +1556,20 @@ static void conditional_stop_bcache_device(struct cache_set *c,
                 */
                pr_warn("stop_when_cache_set_failed of %s is \"auto\" and cache is dirty, stop it to avoid potential data corruption.",
                        d->disk->disk_name);
+                       /*
+                        * There might be a small time gap that cache set is
+                        * released but bcache device is not. Inside this time
+                        * gap, regular I/O requests will directly go into
+                        * backing device as no cache set attached to. This
+                        * behavior may also introduce potential inconsistence
+                        * data in writeback mode while cache is dirty.
+                        * Therefore before calling bcache_device_stop() due
+                        * to a broken cache device, dc->io_disable should be
+                        * explicitly set to true.
+                        */
+                       dc->io_disable = true;
+                       /* make others know io_disable is true earlier */
+                       smp_mb();
                        bcache_device_stop(d);
        } else {
                /*
@@ -2003,12 +2034,10 @@ static int cache_alloc(struct cache *ca)
 static int register_cache(struct cache_sb *sb, struct page *sb_page,
                                struct block_device *bdev, struct cache *ca)
 {
-       char name[BDEVNAME_SIZE];
        const char *err = NULL; /* must be set for any error case */
        int ret = 0;
 
-       bdevname(bdev, name);
-
+       bdevname(bdev, ca->cache_dev_name);
        memcpy(&ca->sb, sb, sizeof(struct cache_sb));
        ca->bdev = bdev;
        ca->bdev->bd_holder = ca;
@@ -2045,14 +2074,14 @@ static int register_cache(struct cache_sb *sb, struct page *sb_page,
                goto out;
        }
 
-       pr_info("registered cache device %s", name);
+       pr_info("registered cache device %s", ca->cache_dev_name);
 
 out:
        kobject_put(&ca->kobj);
 
 err:
        if (err)
-               pr_notice("error %s: %s", name, err);
+               pr_notice("error %s: %s", ca->cache_dev_name, err);
 
        return ret;
 }
index 4a9547cdcdc538fe979a1d3287930b148b711c17..ad45ebe1a74b46348b7967b090e6077389b56f86 100644 (file)
@@ -244,8 +244,10 @@ static void dirty_endio(struct bio *bio)
        struct keybuf_key *w = bio->bi_private;
        struct dirty_io *io = w->private;
 
-       if (bio->bi_status)
+       if (bio->bi_status) {
                SET_KEY_DIRTY(&w->key, false);
+               bch_count_backing_io_errors(io->dc, bio);
+       }
 
        closure_put(&io->cl);
 }
index 12aa9ca21d8c656166d819011adb00e4e7a5e4e0..dc385b70e4c336fc2322c9538f86538b000a8556 100644 (file)
@@ -1681,8 +1681,9 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign
 
        if (block_size <= KMALLOC_MAX_SIZE &&
            (block_size < PAGE_SIZE || !is_power_of_2(block_size))) {
-               snprintf(slab_name, sizeof slab_name, "dm_bufio_cache-%u", c->block_size);
-               c->slab_cache = kmem_cache_create(slab_name, c->block_size, ARCH_KMALLOC_MINALIGN,
+               unsigned align = min(1U << __ffs(block_size), (unsigned)PAGE_SIZE);
+               snprintf(slab_name, sizeof slab_name, "dm_bufio_cache-%u", block_size);
+               c->slab_cache = kmem_cache_create(slab_name, block_size, align,
                                                  SLAB_RECLAIM_ACCOUNT, NULL);
                if (!c->slab_cache) {
                        r = -ENOMEM;
index 1d0af0a21fc7e8253ebdba8e47e581e7b112ef3b..84814e819e4c35130ad6444feb88082c37c35768 100644 (file)
@@ -166,7 +166,7 @@ static bool max_work_reached(struct background_tracker *b)
                atomic_read(&b->pending_demotes) >= b->max_work;
 }
 
-struct bt_work *alloc_work(struct background_tracker *b)
+static struct bt_work *alloc_work(struct background_tracker *b)
 {
        if (max_work_reached(b))
                return NULL;
index 77d9fe58dae22118e872571da2a0542a7fe44b08..514fb4aec5d162b25caa7b0d30f200f66ad8a0c3 100644 (file)
@@ -2440,7 +2440,7 @@ static void dm_integrity_free_journal_scatterlist(struct dm_integrity_c *ic, str
        unsigned i;
        for (i = 0; i < ic->journal_sections; i++)
                kvfree(sl[i]);
-       kfree(sl);
+       kvfree(sl);
 }
 
 static struct scatterlist **dm_integrity_alloc_journal_scatterlist(struct dm_integrity_c *ic, struct page_list *pl)
index 580c49cc8079f7b7e82b915a552a0b2ef7030947..5903e492bb34a307deee617726d91464d7e3d462 100644 (file)
@@ -23,6 +23,8 @@
 
 #define MAX_RECOVERY 1 /* Maximum number of regions recovered in parallel. */
 
+#define MAX_NR_MIRRORS (DM_KCOPYD_MAX_REGIONS + 1)
+
 #define DM_RAID1_HANDLE_ERRORS 0x01
 #define DM_RAID1_KEEP_LOG      0x02
 #define errors_handled(p)      ((p)->features & DM_RAID1_HANDLE_ERRORS)
@@ -255,7 +257,7 @@ static int mirror_flush(struct dm_target *ti)
        unsigned long error_bits;
 
        unsigned int i;
-       struct dm_io_region io[ms->nr_mirrors];
+       struct dm_io_region io[MAX_NR_MIRRORS];
        struct mirror *m;
        struct dm_io_request io_req = {
                .bi_op = REQ_OP_WRITE,
@@ -651,7 +653,7 @@ static void write_callback(unsigned long error, void *context)
 static void do_write(struct mirror_set *ms, struct bio *bio)
 {
        unsigned int i;
-       struct dm_io_region io[ms->nr_mirrors], *dest = io;
+       struct dm_io_region io[MAX_NR_MIRRORS], *dest = io;
        struct mirror *m;
        struct dm_io_request io_req = {
                .bi_op = REQ_OP_WRITE,
@@ -1083,7 +1085,7 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv)
        argc -= args_used;
 
        if (!argc || sscanf(argv[0], "%u%c", &nr_mirrors, &dummy) != 1 ||
-           nr_mirrors < 2 || nr_mirrors > DM_KCOPYD_MAX_REGIONS + 1) {
+           nr_mirrors < 2 || nr_mirrors > MAX_NR_MIRRORS) {
                ti->error = "Invalid number of mirrors";
                dm_dirty_log_destroy(dl);
                return -EINVAL;
@@ -1404,7 +1406,7 @@ static void mirror_status(struct dm_target *ti, status_type_t type,
        int num_feature_args = 0;
        struct mirror_set *ms = (struct mirror_set *) ti->private;
        struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh);
-       char buffer[ms->nr_mirrors + 1];
+       char buffer[MAX_NR_MIRRORS + 1];
 
        switch (type) {
        case STATUSTYPE_INFO:
index 4ea404dbcf0b936b3cc3c42dc76158813ecb88ec..0a7b0107ca78d8ed967e546f9111b0a57e987421 100644 (file)
@@ -1020,7 +1020,8 @@ int dm_set_target_max_io_len(struct dm_target *ti, sector_t len)
 EXPORT_SYMBOL_GPL(dm_set_target_max_io_len);
 
 static struct dm_target *dm_dax_get_live_target(struct mapped_device *md,
-               sector_t sector, int *srcu_idx)
+                                               sector_t sector, int *srcu_idx)
+       __acquires(md->io_barrier)
 {
        struct dm_table *map;
        struct dm_target *ti;
@@ -1037,7 +1038,7 @@ static struct dm_target *dm_dax_get_live_target(struct mapped_device *md,
 }
 
 static long dm_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff,
-               long nr_pages, void **kaddr, pfn_t *pfn)
+                                long nr_pages, void **kaddr, pfn_t *pfn)
 {
        struct mapped_device *md = dax_get_private(dax_dev);
        sector_t sector = pgoff * PAGE_SECTORS;
@@ -1065,7 +1066,7 @@ static long dm_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff,
 }
 
 static size_t dm_dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff,
-               void *addr, size_t bytes, struct iov_iter *i)
+                                   void *addr, size_t bytes, struct iov_iter *i)
 {
        struct mapped_device *md = dax_get_private(dax_dev);
        sector_t sector = pgoff * PAGE_SECTORS;
index e216cd7684094fd80fe64e2f1dd588e7b643f8f5..b07114b5efb27b9207ddd9395924cb83700e32a1 100644 (file)
@@ -20,7 +20,7 @@
 //
 // VBI support (2004) and cleanups (2005) by Hans Verkuil <hverkuil@xs4all.nl>
 //
-// Copyright (c) 2005-2006 Mauro Carvalho Chehab <mchehab@infradead.org>
+// Copyright (c) 2005-2006 Mauro Carvalho Chehab <mchehab@kernel.org>
 //     SAA7111, SAA7113 and SAA7118 support
 
 #include "saa711x_regs.h"
index a50d480e101a80a5ec3303ba76aa314aab5462b9..44fabe08234d2b1ea58d24c3e93a087caa3c6ab1 100644 (file)
@@ -2,7 +2,7 @@
  * SPDX-License-Identifier: GPL-2.0+
  * saa711x - Philips SAA711x video decoder register specifications
  *
- * Copyright (c) 2006 Mauro Carvalho Chehab <mchehab@infradead.org>
+ * Copyright (c) 2006 Mauro Carvalho Chehab <mchehab@kernel.org>
  */
 
 #define R_00_CHIP_VERSION                             0x00
index 1c5c61d829d61024f61e95718671e1d2488daa80..9b4f21237810fd4fbacdb07a26773ee8eef1df09 100644 (file)
@@ -8,7 +8,7 @@
  * Muting and tone control by Jonathan Isom <jisom@ematic.com>
  *
  * Copyright (c) 2000 Eric Sandeen <eric_sandeen@bigfoot.com>
- * Copyright (c) 2006 Mauro Carvalho Chehab <mchehab@infradead.org>
+ * Copyright (c) 2006 Mauro Carvalho Chehab <mchehab@kernel.org>
  * This code is placed under the terms of the GNU General Public License
  * Based on tda9855.c by Steve VanDeBogart (vandebo@uclink.berkeley.edu)
  * Which was based on tda8425.c by Greg Alexander (c) 1998
index 2476d812f669476a286672de807009df8f4980fb..1734ed4ede33cf66114cf9889405ddfb2335064d 100644 (file)
@@ -2,7 +2,7 @@
 //
 // tvp5150 - Texas Instruments TVP5150A/AM1 and TVP5151 video decoder driver
 //
-// Copyright (c) 2005,2006 Mauro Carvalho Chehab <mchehab@infradead.org>
+// Copyright (c) 2005,2006 Mauro Carvalho Chehab <mchehab@kernel.org>
 
 #include <dt-bindings/media/tvp5150.h>
 #include <linux/i2c.h>
index c43b7b844021c06a819d224ecb285eb8fe9f6e33..d3a764cae1a04dcb514026bbf7b5dafbc12887e1 100644 (file)
@@ -3,7 +3,7 @@
  *
  * tvp5150 - Texas Instruments TVP5150A/AM1 video decoder registers
  *
- * Copyright (c) 2005,2006 Mauro Carvalho Chehab <mchehab@infradead.org>
+ * Copyright (c) 2005,2006 Mauro Carvalho Chehab <mchehab@kernel.org>
  */
 
 #define TVP5150_VD_IN_SRC_SEL_1      0x00 /* Video input source selection #1 */
index a26c1a3f7183c6a89b96b2c6c58ca8d14739a422..4599b7e28a8d342a3f188ef5ae621542773cb9b9 100644 (file)
@@ -5,7 +5,7 @@
  * Author: Santiago Nunez-Corrales <santiago.nunez@ridgerun.com>
  *
  * This code is partially based upon the TVP5150 driver
- * written by Mauro Carvalho Chehab (mchehab@infradead.org),
+ * written by Mauro Carvalho Chehab <mchehab@kernel.org>,
  * the TVP514x driver written by Vaibhav Hiremath <hvaibhav@ti.com>
  * and the TVP7002 driver in the TI LSP 2.10.00.14. Revisions by
  * Muralidharan Karicheri and Snehaprabha Narnakaje (TI).
index 3c8c8b0a6a4c7194cb2b87ca3cea8c908464382e..7f56ba689dfe58cc13fa9de94845b5ac09625fdb 100644 (file)
@@ -5,7 +5,7 @@
  * Author: Santiago Nunez-Corrales <santiago.nunez@ridgerun.com>
  *
  * This code is partially based upon the TVP5150 driver
- * written by Mauro Carvalho Chehab (mchehab@infradead.org),
+ * written by Mauro Carvalho Chehab <mchehab@kernel.org>,
  * the TVP514x driver written by Vaibhav Hiremath <hvaibhav@ti.com>
  * and the TVP7002 driver in the TI LSP 2.10.00.14
  *
index 67ac51eff15c354e82ac5fad60d88166b6a788a8..6b87a721dc4994c956b2b70686554150dda2d268 100644 (file)
@@ -4,7 +4,7 @@
  * Copyright (C) 2010 Nokia Corporation
  *
  * Based on drivers/media/video/v4l2_dev.c code authored by
- *     Mauro Carvalho Chehab <mchehab@infradead.org> (version 2)
+ *     Mauro Carvalho Chehab <mchehab@kernel.org> (version 2)
  *     Alan Cox, <alan@lxorguk.ukuu.org.uk> (version 1)
  *
  * Contacts: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
index 9f1f9169fb5b4e2f7716e1940547350872dc39fa..346fc7f5883950bff83af96a26e3a2d1509e3e84 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * Handlers for board audio hooks, splitted from bttv-cards
  *
- * Copyright (c) 2006 Mauro Carvalho Chehab (mchehab@infradead.org)
+ * Copyright (c) 2006 Mauro Carvalho Chehab <mchehab@kernel.org>
  * This code is placed under the terms of the GNU General Public License
  */
 
index 159d07adeff857d5c6b01fbf96f95f5e08252e45..be16a537a03ac7851ae115c4cc3025f0a6fc66a8 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * Handlers for board audio hooks, splitted from bttv-cards
  *
- * Copyright (c) 2006 Mauro Carvalho Chehab (mchehab@infradead.org)
+ * Copyright (c) 2006 Mauro Carvalho Chehab <mchehab@kernel.org>
  * This code is placed under the terms of the GNU General Public License
  */
 
index 1902732f90e1c4f1c403078f7840db434176de88..2616243b2c491fa2765d2d5c7706a67ce64df1a4 100644 (file)
@@ -2447,7 +2447,7 @@ struct tvcard bttv_tvcards[] = {
        },
                /* ---- card 0x88---------------------------------- */
        [BTTV_BOARD_ACORP_Y878F] = {
-               /* Mauro Carvalho Chehab <mchehab@infradead.org> */
+               /* Mauro Carvalho Chehab <mchehab@kernel.org> */
                .name           = "Acorp Y878F",
                .video_inputs   = 3,
                /* .audio_inputs= 1, */
@@ -2688,7 +2688,7 @@ struct tvcard bttv_tvcards[] = {
        },
        [BTTV_BOARD_ENLTV_FM_2] = {
                /* Encore TV Tuner Pro ENL TV-FM-2
-                  Mauro Carvalho Chehab <mchehab@infradead.org */
+                  Mauro Carvalho Chehab <mchehab@kernel.org> */
                .name           = "Encore ENL TV-FM-2",
                .video_inputs   = 3,
                /* .audio_inputs= 1, */
index 707f57a9f9404295210b41e2533dac30d4ee86b4..de3f44b8dec68b8ebf9f22bd290dfa09813d0edc 100644 (file)
@@ -13,7 +13,7 @@
     (c) 2005-2006 Nickolay V. Shmyrev <nshmyrev@yandex.ru>
 
     Fixes to be fully V4L2 compliant by
-    (c) 2006 Mauro Carvalho Chehab <mchehab@infradead.org>
+    (c) 2006 Mauro Carvalho Chehab <mchehab@kernel.org>
 
     Cropping and overscan support
     Copyright (C) 2005, 2006 Michael H. Schimek <mschimek@gmx.at>
index eccd1e3d717a2591356356f05697db5b88778e0d..c76823eb399dca1e1a0ceacf15ad60c10d27613a 100644 (file)
@@ -8,7 +8,7 @@
                           & Marcus Metzler (mocm@thp.uni-koeln.de)
     (c) 1999-2003 Gerd Knorr <kraxel@bytesex.org>
 
-    (c) 2005 Mauro Carvalho Chehab <mchehab@infradead.org>
+    (c) 2005 Mauro Carvalho Chehab <mchehab@kernel.org>
        - Multituner support and i2c address binding
 
     This program is free software; you can redistribute it and/or modify
index be49589a61d22f911b2684e6de7274c4dfd03dbf..395ff9bba759f94e637cb37e46d00e922570d811 100644 (file)
@@ -13,7 +13,7 @@
  *  Copyright (C) 2008 <srinivasa.deevi at conexant dot com>
  *  Copyright (C) 2005 Ludovico Cavedon <cavedon@sssup.it>
  *                    Markus Rechberger <mrechberger@gmail.com>
- *                    Mauro Carvalho Chehab <mchehab@infradead.org>
+ *                    Mauro Carvalho Chehab <mchehab@kernel.org>
  *                    Sascha Sommer <saschasommer@freenet.de>
  *  Copyright (C) 2004, 2005 Chris Pascoe
  *  Copyright (C) 2003, 2004 Gerd Knorr
index ab09bb55cf45d4814ba3e5fcce5bb57ddf683699..8a28fda703a20889571c73fbf562fb582cb3606f 100644 (file)
@@ -4,7 +4,7 @@
  *
  *    (c) 2007 Trent Piepho <xyzzy@speakeasy.org>
  *    (c) 2005,2006 Ricardo Cerqueira <v4l@cerqueira.org>
- *    (c) 2005 Mauro Carvalho Chehab <mchehab@infradead.org>
+ *    (c) 2005 Mauro Carvalho Chehab <mchehab@kernel.org>
  *    Based on a dummy cx88 module by Gerd Knorr <kraxel@bytesex.org>
  *    Based on dummy.c by Jaroslav Kysela <perex@perex.cz>
  *
@@ -103,7 +103,7 @@ MODULE_PARM_DESC(index, "Index value for cx88x capture interface(s).");
 
 MODULE_DESCRIPTION("ALSA driver module for cx2388x based TV cards");
 MODULE_AUTHOR("Ricardo Cerqueira");
-MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@infradead.org>");
+MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@kernel.org>");
 MODULE_LICENSE("GPL");
 MODULE_VERSION(CX88_VERSION);
 
index 0e0952e607952d593a54d2f601edcf35b307db84..7a4876cf9f088c32abdf3dff4cac8df4c21fb7b5 100644 (file)
@@ -5,7 +5,7 @@
  *    (c) 2004 Jelle Foks <jelle@foks.us>
  *    (c) 2004 Gerd Knorr <kraxel@bytesex.org>
  *
- *    (c) 2005-2006 Mauro Carvalho Chehab <mchehab@infradead.org>
+ *    (c) 2005-2006 Mauro Carvalho Chehab <mchehab@kernel.org>
  *        - video_ioctl2 conversion
  *
  *  Includes parts from the ivtv driver <http://sourceforge.net/projects/ivtv/>
index 8bfa5b7ed91b566d065ce79adc70159c67122060..60988e95b637570043993d419e4fb56e6040fd80 100644 (file)
@@ -4,7 +4,7 @@
  *
  * (c) 2003 Gerd Knorr <kraxel@bytesex.org> [SuSE Labs]
  *
- * (c) 2005-2006 Mauro Carvalho Chehab <mchehab@infradead.org>
+ * (c) 2005-2006 Mauro Carvalho Chehab <mchehab@kernel.org>
  *     - Multituner support
  *     - video_ioctl2 conversion
  *     - PAL/M fixes
index f7692775fb5ad5ccf680bb956714b83fc088e4e9..99f88a05a7c93ed0ed7a301dfa6fade8209b6773 100644 (file)
@@ -8,7 +8,7 @@
  * (c) 2002 Yurij Sysoev <yurij@naturesoft.net>
  * (c) 1999-2003 Gerd Knorr <kraxel@bytesex.org>
  *
- * (c) 2005 Mauro Carvalho Chehab <mchehab@infradead.org>
+ * (c) 2005 Mauro Carvalho Chehab <mchehab@kernel.org>
  *     - Multituner support and i2c address binding
  *
  * This program is free software; you can redistribute it and/or modify
index 9be682cdb644f4d67a88a8a45bbaebe1ba7211d7..7b113bad70d23b2857dfcf26dd3648d465abffa3 100644 (file)
@@ -5,7 +5,7 @@
  *
  * (c) 2003-04 Gerd Knorr <kraxel@bytesex.org> [SuSE Labs]
  *
- * (c) 2005-2006 Mauro Carvalho Chehab <mchehab@infradead.org>
+ * (c) 2005-2006 Mauro Carvalho Chehab <mchehab@kernel.org>
  *     - Multituner support
  *     - video_ioctl2 conversion
  *     - PAL/M fixes
index 5ef635e72e10ff5610e0df17b711eecb86543c98..4c52ac6d8bc50c8d765a7df1f5622a7afcf6498e 100644 (file)
@@ -4,7 +4,7 @@
  * Copyright 1997 M. Kirkwood
  *
  * Converted to the radio-isa framework by Hans Verkuil <hans.verkuil@cisco.com>
- * Converted to V4L2 API by Mauro Carvalho Chehab <mchehab@infradead.org>
+ * Converted to V4L2 API by Mauro Carvalho Chehab <mchehab@kernel.org>
  * Converted to new API by Alan Cox <alan@lxorguk.ukuu.org.uk>
  * Various bugfixes and enhancements by Russell Kroll <rkroll@exploits.org>
  *
index 9e12c60273593dcbd858ac9ee560e7051be9efbe..840b7d60462b23a36ffb47414fa227894ef42eb2 100644 (file)
@@ -2,7 +2,7 @@
  * radio-aztech.c - Aztech radio card driver
  *
  * Converted to the radio-isa framework by Hans Verkuil <hans.verkuil@xs4all.nl>
- * Converted to V4L2 API by Mauro Carvalho Chehab <mchehab@infradead.org>
+ * Converted to V4L2 API by Mauro Carvalho Chehab <mchehab@kernel.org>
  * Adapted to support the Video for Linux API by
  * Russell Kroll <rkroll@exploits.org>.  Based on original tuner code by:
  *
index 3ff4c4e1435f0b5779885899b0a46a7a807aa60e..f051f8694ab9e4b419a8eb577cb245dbe439e56c 100644 (file)
@@ -15,7 +15,7 @@
  *    Various bugfixes and enhancements by Russell Kroll <rkroll@exploits.org>
  *
  * Converted to the radio-isa framework by Hans Verkuil <hans.verkuil@cisco.com>
- * Converted to V4L2 API by Mauro Carvalho Chehab <mchehab@infradead.org>
+ * Converted to V4L2 API by Mauro Carvalho Chehab <mchehab@kernel.org>
  *
  * Note: this card seems to swap the left and right audio channels!
  *
index 95f06f3b35dc7b498fc437b5e6d25eebe425bb0b..e4e7587392469b840cd5c40c38926f7f741a009b 100644 (file)
@@ -27,7 +27,7 @@
  * BUGS:
  *   - card unmutes if you change frequency
  *
- * (c) 2006, 2007 by Mauro Carvalho Chehab <mchehab@infradead.org>:
+ * (c) 2006, 2007 by Mauro Carvalho Chehab <mchehab@kernel.org>:
  *     - Conversion to V4L2 API
  *      - Uses video_ioctl2 for parsing and to add debug support
  */
index abeaedd8d43747ef6434f331d9451058ae7ccdb6..5a1470eb753e652b60b024a11ed3a631ebe49ffe 100644 (file)
@@ -7,7 +7,7 @@
  * Various bugfixes and enhancements by Russell Kroll <rkroll@exploits.org>
  *
  * Converted to the radio-isa framework by Hans Verkuil <hans.verkuil@cisco.com>
- * Converted to V4L2 API by Mauro Carvalho Chehab <mchehab@infradead.org>
+ * Converted to V4L2 API by Mauro Carvalho Chehab <mchehab@kernel.org>
  *
  * Fully tested with actual hardware and the v4l2-compliance tool.
  */
index fc4e63d36e4c75b0cb68a641d877d06b27c24eb8..4f9b97edd9eb9f7514aae2964933c77afe2d5318 100644 (file)
@@ -13,7 +13,7 @@
  *  No volume control - only mute/unmute - you have to use line volume
  *  control on SB-part of SF16-FMI/SF16-FMP/SF16-FMD
  *
- * Converted to V4L2 API by Mauro Carvalho Chehab <mchehab@infradead.org>
+ * Converted to V4L2 API by Mauro Carvalho Chehab <mchehab@kernel.org>
  */
 
 #include <linux/kernel.h>      /* __setup                      */
index 4f116ea294fb7e6b3e7486d6f1871c47f9eeb3f6..1af8f29cc7d140b1c83cf628e741294d7df25f94 100644 (file)
@@ -17,7 +17,7 @@
  *  Volume Control is done digitally
  *
  * Converted to the radio-isa framework by Hans Verkuil <hans.verkuil@cisco.com>
- * Converted to V4L2 API by Mauro Carvalho Chehab <mchehab@infradead.org>
+ * Converted to V4L2 API by Mauro Carvalho Chehab <mchehab@kernel.org>
  */
 
 #include <linux/module.h>      /* Modules                      */
index 26a8c6002121bde9182e709efce646fd5a9547cb..a4bad322ffff93a4b7de50e5f079a4e913c81966 100644 (file)
@@ -12,7 +12,7 @@
  * Scott McGrath    (smcgrath@twilight.vtc.vsc.edu)
  * William McGrath  (wmcgrath@twilight.vtc.vsc.edu)
  *
- * Converted to V4L2 API by Mauro Carvalho Chehab <mchehab@infradead.org>
+ * Converted to V4L2 API by Mauro Carvalho Chehab <mchehab@kernel.org>
  */
 
 #include <stdarg.h>
index eb72a4d13758934493127b5866914ba573fed994..d0d67ad85b8ff77cd1d0d925a02d50f7b7eea70f 100644 (file)
@@ -25,7 +25,7 @@
  * The frequency change is necessary since the card never seems to be
  * completely silent.
  *
- * Converted to V4L2 API by Mauro Carvalho Chehab <mchehab@infradead.org>
+ * Converted to V4L2 API by Mauro Carvalho Chehab <mchehab@kernel.org>
  */
 
 #include <linux/module.h>      /* Modules                        */
index 026e88eef29ce0ef4a577e692d7512f4893ef9b9..6007cd09b328d346e4d6e989ba358cef74065197 100644 (file)
@@ -27,7 +27,7 @@
  * 2002-07-15 - Fix Stereo typo
  *
  * 2006-07-24 - Converted to V4L2 API
- *             by Mauro Carvalho Chehab <mchehab@infradead.org>
+ *             by Mauro Carvalho Chehab <mchehab@kernel.org>
  *
  * Converted to the radio-isa framework by Hans Verkuil <hans.verkuil@cisco.com>
  *
index f6977df1a75ba2905dd402ea47481186aea54f38..d275d98d066a31b136d190cdcd16276135ed2ba4 100644 (file)
@@ -12,7 +12,7 @@
  *
  * On Avermedia M135A with IR model RM-JX, the same codes exist on both
  * Positivo (BR) and original IR, initial version and remote control codes
- * added by Mauro Carvalho Chehab <mchehab@infradead.org>
+ * added by Mauro Carvalho Chehab <mchehab@kernel.org>
  *
  * Positivo also ships Avermedia M135A with model RM-K6, extra control
  * codes added by Herton Ronaldo Krzesinski <herton@mandriva.com.br>
index e4e78c1f4123139acc86fa217e4cfb834d6b684e..057c13b765ef325c35dc2679c61a913f2f841e23 100644 (file)
@@ -9,7 +9,7 @@
 #include <linux/module.h>
 
 /* Encore ENLTV-FM v5.3
-   Mauro Carvalho Chehab <mchehab@infradead.org>
+   Mauro Carvalho Chehab <mchehab@kernel.org>
  */
 
 static struct rc_map_table encore_enltv_fm53[] = {
index c3d4437a6fdadcb1cd14ab3b6db2c2573f8d547a..cd0555924456662232241ba2bbabb69357ee1fae 100644 (file)
@@ -9,7 +9,7 @@
 #include <linux/module.h>
 
 /* Encore ENLTV2-FM  - silver plastic - "Wand Media" written at the botton
-    Mauro Carvalho Chehab <mchehab@infradead.org> */
+    Mauro Carvalho Chehab <mchehab@kernel.org> */
 
 static struct rc_map_table encore_enltv2[] = {
        { 0x4c, KEY_POWER2 },
index f0f88df186065f66b67f5e068c3aa582fdc53eeb..a000513398421366cbccd8afb90fc8329b7597d1 100644 (file)
@@ -9,7 +9,7 @@
 #include <linux/module.h>
 
 /* Kaiomy TVnPC U2
-   Mauro Carvalho Chehab <mchehab@infradead.org>
+   Mauro Carvalho Chehab <mchehab@kernel.org>
  */
 
 static struct rc_map_table kaiomy[] = {
index 453e04377de74ef0281864cd0035f3cfdeed70ae..db5edde3eeb18533a22ef154faa7e94384500060 100644 (file)
@@ -9,7 +9,7 @@
 #include <linux/module.h>
 
 /* Kworld Plus TV Analog Lite PCI IR
-   Mauro Carvalho Chehab <mchehab@infradead.org>
+   Mauro Carvalho Chehab <mchehab@kernel.org>
  */
 
 static struct rc_map_table kworld_plus_tv_analog[] = {
index 791130f108ff7ebc311156aa031ef6d466c93986..e4e34f2ccf74f8353060ce4a3636f2f17bb781fd 100644 (file)
@@ -9,7 +9,7 @@
 #include <linux/module.h>
 
 /*
-   Mauro Carvalho Chehab <mchehab@infradead.org>
+   Mauro Carvalho Chehab <mchehab@kernel.org>
    present on PV MPEG 8000GT
  */
 
index 88b3e80c38ad97f6226478e44eabd923d37d7a58..d78a2bdb3e36d71d35e95fd6db25e245a35c2fa5 100644 (file)
@@ -2,7 +2,7 @@
 // For Philips TEA5761 FM Chip
 // I2C address is always 0x20 (0x10 at 7-bit mode).
 //
-// Copyright (c) 2005-2007 Mauro Carvalho Chehab (mchehab@infradead.org)
+// Copyright (c) 2005-2007 Mauro Carvalho Chehab <mchehab@kernel.org>
 
 #include <linux/i2c.h>
 #include <linux/slab.h>
@@ -337,5 +337,5 @@ EXPORT_SYMBOL_GPL(tea5761_attach);
 EXPORT_SYMBOL_GPL(tea5761_autodetection);
 
 MODULE_DESCRIPTION("Philips TEA5761 FM tuner driver");
-MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@infradead.org>");
+MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@kernel.org>");
 MODULE_LICENSE("GPL v2");
index 2b2c064d7dc36714b593a29cf265c9d5a1e0749f..016d0d5ec50b83d4aed48a0e1a56efe222bbdb5b 100644 (file)
@@ -2,7 +2,7 @@
 // For Philips TEA5767 FM Chip used on some TV Cards like Prolink Pixelview
 // I2C address is always 0xC0.
 //
-// Copyright (c) 2005 Mauro Carvalho Chehab (mchehab@infradead.org)
+// Copyright (c) 2005 Mauro Carvalho Chehab <mchehab@kernel.org>
 //
 // tea5767 autodetection thanks to Torsten Seeboth and Atsushi Nakagawa
 // from their contributions on DScaler.
@@ -469,5 +469,5 @@ EXPORT_SYMBOL_GPL(tea5767_attach);
 EXPORT_SYMBOL_GPL(tea5767_autodetection);
 
 MODULE_DESCRIPTION("Philips TEA5767 FM tuner driver");
-MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@infradead.org>");
+MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@kernel.org>");
 MODULE_LICENSE("GPL v2");
index bb0437c36c03b3a86c24cb517c61e5c10e92aabc..50d017a4822a3b62923f17339db09ed3ad7dff57 100644 (file)
@@ -5,7 +5,7 @@
  * This file includes internal tipes to be used inside tuner-xc2028.
  * Shouldn't be included outside tuner-xc2028
  *
- * Copyright (c) 2007-2008 Mauro Carvalho Chehab (mchehab@infradead.org)
+ * Copyright (c) 2007-2008 Mauro Carvalho Chehab <mchehab@kernel.org>
  */
 
 /* xc3028 firmware types */
index fca85e08ebd7f35ea14e51bc02bdaffbb49e9da1..84744e1389829971e9597e8b9b39c55ea2523c9e 100644 (file)
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 // tuner-xc2028
 //
-// Copyright (c) 2007-2008 Mauro Carvalho Chehab (mchehab@infradead.org)
+// Copyright (c) 2007-2008 Mauro Carvalho Chehab <mchehab@kernel.org>
 //
 // Copyright (c) 2007 Michel Ludwig (michel.ludwig@gmail.com)
 //       - frontend interface
@@ -1518,7 +1518,7 @@ EXPORT_SYMBOL(xc2028_attach);
 
 MODULE_DESCRIPTION("Xceive xc2028/xc3028 tuner driver");
 MODULE_AUTHOR("Michel Ludwig <michel.ludwig@gmail.com>");
-MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@infradead.org>");
+MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@kernel.org>");
 MODULE_LICENSE("GPL v2");
 MODULE_FIRMWARE(XC2028_DEFAULT_FIRMWARE);
 MODULE_FIRMWARE(XC3028L_DEFAULT_FIRMWARE);
index 03fd6d4233a4b6205739755663070bf74f6ab100..7b58bc06e35caba4a9bd392aca7337affcd9ae1b 100644 (file)
@@ -2,7 +2,7 @@
  * SPDX-License-Identifier: GPL-2.0
  * tuner-xc2028
  *
- * Copyright (c) 2007-2008 Mauro Carvalho Chehab (mchehab@infradead.org)
+ * Copyright (c) 2007-2008 Mauro Carvalho Chehab <mchehab@kernel.org>
  */
 
 #ifndef __TUNER_XC2028_H__
index 3c2694a16ed14a8d98477bbfc67883389a496d2a..d1e66b503f4d90b386890738ca6458c3b0945810 100644 (file)
@@ -2,7 +2,7 @@
 //
 // em28xx-camera.c - driver for Empia EM25xx/27xx/28xx USB video capture devices
 //
-// Copyright (C) 2009 Mauro Carvalho Chehab <mchehab@infradead.org>
+// Copyright (C) 2009 Mauro Carvalho Chehab <mchehab@kernel.org>
 // Copyright (C) 2013 Frank Schäfer <fschaefer.oss@googlemail.com>
 //
 // This program is free software; you can redistribute it and/or modify
index 6e0e67d2387633cf2f1b62aacf832f6416b55b9c..7c3203d7044b11945e181da19063074c026192ba 100644 (file)
@@ -5,7 +5,7 @@
 //
 // Copyright (C) 2005 Ludovico Cavedon <cavedon@sssup.it>
 //                   Markus Rechberger <mrechberger@gmail.com>
-//                   Mauro Carvalho Chehab <mchehab@infradead.org>
+//                   Mauro Carvalho Chehab <mchehab@kernel.org>
 //                   Sascha Sommer <saschasommer@freenet.de>
 // Copyright (C) 2012 Frank Schäfer <fschaefer.oss@googlemail.com>
 //
index 36d341fb65dd298ca923c256d14757d1da88336e..f289953830900bf46813ff70468b1f0c250bc224 100644 (file)
@@ -4,7 +4,7 @@
 //
 // Copyright (C) 2005 Ludovico Cavedon <cavedon@sssup.it>
 //                   Markus Rechberger <mrechberger@gmail.com>
-//                   Mauro Carvalho Chehab <mchehab@infradead.org>
+//                   Mauro Carvalho Chehab <mchehab@kernel.org>
 //                   Sascha Sommer <saschasommer@freenet.de>
 // Copyright (C) 2012 Frank Schäfer <fschaefer.oss@googlemail.com>
 //
@@ -32,7 +32,7 @@
 
 #define DRIVER_AUTHOR "Ludovico Cavedon <cavedon@sssup.it>, " \
                      "Markus Rechberger <mrechberger@gmail.com>, " \
-                     "Mauro Carvalho Chehab <mchehab@infradead.org>, " \
+                     "Mauro Carvalho Chehab <mchehab@kernel.org>, " \
                      "Sascha Sommer <saschasommer@freenet.de>"
 
 MODULE_AUTHOR(DRIVER_AUTHOR);
index a54cb8dc52c9a867c107b69aa7e2f2b11e0e988b..3f493e0b07163ac40c632989af1fc875aecb2ed5 100644 (file)
@@ -2,7 +2,7 @@
 //
 // DVB device driver for em28xx
 //
-// (c) 2008-2011 Mauro Carvalho Chehab <mchehab@infradead.org>
+// (c) 2008-2011 Mauro Carvalho Chehab <mchehab@kernel.org>
 //
 // (c) 2008 Devin Heitmueller <devin.heitmueller@gmail.com>
 //     - Fixes for the driver to properly work with HVR-950
@@ -63,7 +63,7 @@
 #include "tc90522.h"
 #include "qm1d1c0042.h"
 
-MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@infradead.org>");
+MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@kernel.org>");
 MODULE_LICENSE("GPL v2");
 MODULE_DESCRIPTION(DRIVER_DESC " - digital TV interface");
 MODULE_VERSION(EM28XX_VERSION);
index 9151bccd859a5c54747aad9c97adc5a4dcf82077..6458682bc6e250b7513be2d6f4a457eaa2ed23f3 100644 (file)
@@ -4,7 +4,7 @@
 //
 // Copyright (C) 2005 Ludovico Cavedon <cavedon@sssup.it>
 //                   Markus Rechberger <mrechberger@gmail.com>
-//                   Mauro Carvalho Chehab <mchehab@infradead.org>
+//                   Mauro Carvalho Chehab <mchehab@kernel.org>
 //                   Sascha Sommer <saschasommer@freenet.de>
 // Copyright (C) 2013 Frank Schäfer <fschaefer.oss@googlemail.com>
 //
index 2dc1be00b8b883e25ffc178398226dc1a701c34e..f84a1208d5d3290a682f55d1792de7d74c804481 100644 (file)
@@ -4,7 +4,7 @@
 //
 // Copyright (C) 2005 Ludovico Cavedon <cavedon@sssup.it>
 //                   Markus Rechberger <mrechberger@gmail.com>
-//                   Mauro Carvalho Chehab <mchehab@infradead.org>
+//                   Mauro Carvalho Chehab <mchehab@kernel.org>
 //                   Sascha Sommer <saschasommer@freenet.de>
 //
 // This program is free software; you can redistribute it and/or modify
index d70ee13cc52e1e133812180a2d44278b7df2fc2c..68571bf36d28623cad6be5908f4f6075dd947df5 100644 (file)
@@ -5,7 +5,7 @@
 //
 // Copyright (C) 2005 Ludovico Cavedon <cavedon@sssup.it>
 //                   Markus Rechberger <mrechberger@gmail.com>
-//                   Mauro Carvalho Chehab <mchehab@infradead.org>
+//                   Mauro Carvalho Chehab <mchehab@kernel.org>
 //                   Sascha Sommer <saschasommer@freenet.de>
 // Copyright (C) 2012 Frank Schäfer <fschaefer.oss@googlemail.com>
 //
@@ -44,7 +44,7 @@
 
 #define DRIVER_AUTHOR "Ludovico Cavedon <cavedon@sssup.it>, " \
                      "Markus Rechberger <mrechberger@gmail.com>, " \
-                     "Mauro Carvalho Chehab <mchehab@infradead.org>, " \
+                     "Mauro Carvalho Chehab <mchehab@kernel.org>, " \
                      "Sascha Sommer <saschasommer@freenet.de>"
 
 static unsigned int isoc_debug;
index 63c7c61247072b8466f956f7552bd00866c35849..b0378e77ddff6a1a98b38566b01bed131464aa38 100644 (file)
@@ -4,7 +4,7 @@
  *
  * Copyright (C) 2005 Markus Rechberger <mrechberger@gmail.com>
  *                   Ludovico Cavedon <cavedon@sssup.it>
- *                   Mauro Carvalho Chehab <mchehab@infradead.org>
+ *                   Mauro Carvalho Chehab <mchehab@kernel.org>
  * Copyright (C) 2012 Frank Schäfer <fschaefer.oss@googlemail.com>
  *
  * Based on the em2800 driver from Sascha Sommer <saschasommer@freenet.de>
index a1bd94e8ce5289418a6f86e0412888ff65e8baf7..71fda38e85e08b450adeb3d53d5ddd99edbb4b93 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * zc030x registers
  *
- * Copyright (c) 2008 Mauro Carvalho Chehab <mchehab@infradead.org>
+ * Copyright (c) 2008 Mauro Carvalho Chehab <mchehab@kernel.org>
  *
  * The register aliases used here came from this driver:
  *     http://zc0302.sourceforge.net/zc0302.php
index 70939e96b85632be8ac07dc065fc1ac3d5431eb1..23df50aa0a4af6da850d08cc591e6ace135ab223 100644 (file)
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 // tm6000-cards.c - driver for TM5600/TM6000/TM6010 USB video capture devices
 //
-// Copyright (c) 2006-2007 Mauro Carvalho Chehab <mchehab@infradead.org>
+// Copyright (c) 2006-2007 Mauro Carvalho Chehab <mchehab@kernel.org>
 
 #include <linux/init.h>
 #include <linux/module.h>
index 23a1332d98e624e90c2decc749fb91bdf966ff36..d3229aa45fcb2344555683b059d837438a582cc9 100644 (file)
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 // tm6000-core.c - driver for TM5600/TM6000/TM6010 USB video capture devices
 //
-// Copyright (c) 2006-2007 Mauro Carvalho Chehab <mchehab@infradead.org>
+// Copyright (c) 2006-2007 Mauro Carvalho Chehab <mchehab@kernel.org>
 //
 // Copyright (c) 2007 Michel Ludwig <michel.ludwig@gmail.com>
 //     - DVB-T support
index c9a62bbff27a2ae1e282d6e486e48ec0313a001b..659b63febf8525d378d7c3d5ff751960b9b3119a 100644 (file)
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 // tm6000-i2c.c - driver for TM5600/TM6000/TM6010 USB video capture devices
 //
-// Copyright (c) 2006-2007 Mauro Carvalho Chehab <mchehab@infradead.org>
+// Copyright (c) 2006-2007 Mauro Carvalho Chehab <mchehab@kernel.org>
 //
 // Copyright (c) 2007 Michel Ludwig <michel.ludwig@gmail.com>
 //     - Fix SMBus Read Byte command
index 21587fcf11e3254147e2d3fb45a5db81da76e098..d10424673db95e5030bc2f072eb971df9a4697ad 100644 (file)
@@ -2,7 +2,7 @@
  * SPDX-License-Identifier: GPL-2.0
  * tm6000-regs.h - driver for TM5600/TM6000/TM6010 USB video capture devices
  *
- * Copyright (c) 2006-2007 Mauro Carvalho Chehab <mchehab@infradead.org>
+ * Copyright (c) 2006-2007 Mauro Carvalho Chehab <mchehab@kernel.org>
  */
 
 /*
index 5c615b0a7a468e9cd60bfc0aec60b32db8b21b4c..b275dbce3a1ba5cad58e3efcae308074b5f2822b 100644 (file)
@@ -2,7 +2,7 @@
  * SPDX-License-Identifier: GPL-2.0
  * tm6000-buf.c - driver for TM5600/TM6000/TM6010 USB video capture devices
  *
- * Copyright (c) 2006-2007 Mauro Carvalho Chehab <mchehab@infradead.org>
+ * Copyright (c) 2006-2007 Mauro Carvalho Chehab <mchehab@kernel.org>
  */
 
 #include <linux/videodev2.h>
index b2399d4266da28141b6c4aac94df3fb4c19fa49d..aa85fe31c8353c2a0953808c0a276ff580804a61 100644 (file)
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 // tm6000-video.c - driver for TM5600/TM6000/TM6010 USB video capture devices
 //
-// Copyright (c) 2006-2007 Mauro Carvalho Chehab <mchehab@infradead.org>
+// Copyright (c) 2006-2007 Mauro Carvalho Chehab <mchehab@kernel.org>
 //
 // Copyright (c) 2007 Michel Ludwig <michel.ludwig@gmail.com>
 //     - Fixed module load/unload
index e1e45770e28d91b2e912e41afc1bd64fcea16558..0864ed7314eb24c506d7ff586db731e5d4ab5b09 100644 (file)
@@ -2,7 +2,7 @@
  * SPDX-License-Identifier: GPL-2.0
  * tm6000.h - driver for TM5600/TM6000/TM6010 USB video capture devices
  *
- * Copyright (c) 2006-2007 Mauro Carvalho Chehab <mchehab@infradead.org>
+ * Copyright (c) 2006-2007 Mauro Carvalho Chehab <mchehab@kernel.org>
  *
  * Copyright (c) 2007 Michel Ludwig <michel.ludwig@gmail.com>
  *     - DVB-T support
index 1d0b2208e8fb67b85fcf170d4d24697858c13d2d..c080dcc75393762a6e67322327c513391ecdc0a4 100644 (file)
@@ -10,7 +10,7 @@
  *     2 of the License, or (at your option) any later version.
  *
  * Authors:    Alan Cox, <alan@lxorguk.ukuu.org.uk> (version 1)
- *              Mauro Carvalho Chehab <mchehab@infradead.org> (version 2)
+ *              Mauro Carvalho Chehab <mchehab@kernel.org> (version 2)
  *
  * Fixes:      20000516  Claudio Matsuoka <claudio@conectiva.com>
  *             - Added procfs support
@@ -1072,7 +1072,7 @@ static void __exit videodev_exit(void)
 subsys_initcall(videodev_init);
 module_exit(videodev_exit)
 
-MODULE_AUTHOR("Alan Cox, Mauro Carvalho Chehab <mchehab@infradead.org>");
+MODULE_AUTHOR("Alan Cox, Mauro Carvalho Chehab <mchehab@kernel.org>");
 MODULE_DESCRIPTION("Device registrar for Video4Linux drivers v2");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS_CHARDEV_MAJOR(VIDEO_MAJOR);
index f48c505550e0b313a0f9effb9ac8b1982d6d1588..de5d96dbe69e0cbf92d8b895bcb55d40552186cc 100644 (file)
@@ -9,7 +9,7 @@
  * 2 of the License, or (at your option) any later version.
  *
  * Authors:    Alan Cox, <alan@lxorguk.ukuu.org.uk> (version 1)
- *              Mauro Carvalho Chehab <mchehab@infradead.org> (version 2)
+ *              Mauro Carvalho Chehab <mchehab@kernel.org> (version 2)
  */
 
 #include <linux/mm.h>
index 2b3981842b4b7539a68757b6e9f25d8d0e422c54..7491b337002ce586b598fcad9b0aabc7e6c2d877 100644 (file)
@@ -1,11 +1,11 @@
 /*
  * generic helper functions for handling video4linux capture buffers
  *
- * (c) 2007 Mauro Carvalho Chehab, <mchehab@infradead.org>
+ * (c) 2007 Mauro Carvalho Chehab, <mchehab@kernel.org>
  *
  * Highly based on video-buf written originally by:
  * (c) 2001,02 Gerd Knorr <kraxel@bytesex.org>
- * (c) 2006 Mauro Carvalho Chehab, <mchehab@infradead.org>
+ * (c) 2006 Mauro Carvalho Chehab, <mchehab@kernel.org>
  * (c) 2006 Ted Walther and John Sokol
  *
  * This program is free software; you can redistribute it and/or modify
@@ -38,7 +38,7 @@ static int debug;
 module_param(debug, int, 0644);
 
 MODULE_DESCRIPTION("helper module to manage video4linux buffers");
-MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@infradead.org>");
+MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@kernel.org>");
 MODULE_LICENSE("GPL");
 
 #define dprintk(level, fmt, arg...)                                    \
index e02353e340dd78d07d0750b05af0533ffad35dae..f46132504d88eeed379985213039030090360fcd 100644 (file)
@@ -7,7 +7,7 @@
  * Copyright (c) 2008 Magnus Damm
  *
  * Based on videobuf-vmalloc.c,
- * (c) 2007 Mauro Carvalho Chehab, <mchehab@infradead.org>
+ * (c) 2007 Mauro Carvalho Chehab, <mchehab@kernel.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
index add2edb23eac452e4f86f69e6de1ba696687c722..7770034aae28922520a1eebeeaff1585a79e62ae 100644 (file)
@@ -6,11 +6,11 @@
  * into PAGE_SIZE chunks).  They also assume the driver does not need
  * to touch the video data.
  *
- * (c) 2007 Mauro Carvalho Chehab, <mchehab@infradead.org>
+ * (c) 2007 Mauro Carvalho Chehab, <mchehab@kernel.org>
  *
  * Highly based on video-buf written originally by:
  * (c) 2001,02 Gerd Knorr <kraxel@bytesex.org>
- * (c) 2006 Mauro Carvalho Chehab, <mchehab@infradead.org>
+ * (c) 2006 Mauro Carvalho Chehab, <mchehab@kernel.org>
  * (c) 2006 Ted Walther and John Sokol
  *
  * This program is free software; you can redistribute it and/or modify
@@ -48,7 +48,7 @@ static int debug;
 module_param(debug, int, 0644);
 
 MODULE_DESCRIPTION("helper module to manage video4linux dma sg buffers");
-MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@infradead.org>");
+MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@kernel.org>");
 MODULE_LICENSE("GPL");
 
 #define dprintk(level, fmt, arg...)                                    \
index 2ff7fcc77b1104fe7d1ca1a2a9d5738ede27acb7..45fe781aeeec34ed755aa8aa51a3b570859aa829 100644 (file)
@@ -6,7 +6,7 @@
  * into PAGE_SIZE chunks).  They also assume the driver does not need
  * to touch the video data.
  *
- * (c) 2007 Mauro Carvalho Chehab, <mchehab@infradead.org>
+ * (c) 2007 Mauro Carvalho Chehab, <mchehab@kernel.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -41,7 +41,7 @@ static int debug;
 module_param(debug, int, 0644);
 
 MODULE_DESCRIPTION("helper module to manage video4linux vmalloc buffers");
-MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@infradead.org>");
+MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@kernel.org>");
 MODULE_LICENSE("GPL");
 
 #define dprintk(level, fmt, arg...)                                    \
index 71a89d5d3efd773f03bbf2359b91b1fa292619f2..db8043019ec62f9a1cb589bf1e523e43050db419 100644 (file)
 
 int main(void)
 {
-       DEFINE(EMIF_SDCFG_VAL_OFFSET,
-              offsetof(struct emif_regs_amx3, emif_sdcfg_val));
-       DEFINE(EMIF_TIMING1_VAL_OFFSET,
-              offsetof(struct emif_regs_amx3, emif_timing1_val));
-       DEFINE(EMIF_TIMING2_VAL_OFFSET,
-              offsetof(struct emif_regs_amx3, emif_timing2_val));
-       DEFINE(EMIF_TIMING3_VAL_OFFSET,
-              offsetof(struct emif_regs_amx3, emif_timing3_val));
-       DEFINE(EMIF_REF_CTRL_VAL_OFFSET,
-              offsetof(struct emif_regs_amx3, emif_ref_ctrl_val));
-       DEFINE(EMIF_ZQCFG_VAL_OFFSET,
-              offsetof(struct emif_regs_amx3, emif_zqcfg_val));
-       DEFINE(EMIF_PMCR_VAL_OFFSET,
-              offsetof(struct emif_regs_amx3, emif_pmcr_val));
-       DEFINE(EMIF_PMCR_SHDW_VAL_OFFSET,
-              offsetof(struct emif_regs_amx3, emif_pmcr_shdw_val));
-       DEFINE(EMIF_RD_WR_LEVEL_RAMP_CTRL_OFFSET,
-              offsetof(struct emif_regs_amx3, emif_rd_wr_level_ramp_ctrl));
-       DEFINE(EMIF_RD_WR_EXEC_THRESH_OFFSET,
-              offsetof(struct emif_regs_amx3, emif_rd_wr_exec_thresh));
-       DEFINE(EMIF_COS_CONFIG_OFFSET,
-              offsetof(struct emif_regs_amx3, emif_cos_config));
-       DEFINE(EMIF_PRIORITY_TO_COS_MAPPING_OFFSET,
-              offsetof(struct emif_regs_amx3, emif_priority_to_cos_mapping));
-       DEFINE(EMIF_CONNECT_ID_SERV_1_MAP_OFFSET,
-              offsetof(struct emif_regs_amx3, emif_connect_id_serv_1_map));
-       DEFINE(EMIF_CONNECT_ID_SERV_2_MAP_OFFSET,
-              offsetof(struct emif_regs_amx3, emif_connect_id_serv_2_map));
-       DEFINE(EMIF_OCP_CONFIG_VAL_OFFSET,
-              offsetof(struct emif_regs_amx3, emif_ocp_config_val));
-       DEFINE(EMIF_LPDDR2_NVM_TIM_OFFSET,
-              offsetof(struct emif_regs_amx3, emif_lpddr2_nvm_tim));
-       DEFINE(EMIF_LPDDR2_NVM_TIM_SHDW_OFFSET,
-              offsetof(struct emif_regs_amx3, emif_lpddr2_nvm_tim_shdw));
-       DEFINE(EMIF_DLL_CALIB_CTRL_VAL_OFFSET,
-              offsetof(struct emif_regs_amx3, emif_dll_calib_ctrl_val));
-       DEFINE(EMIF_DLL_CALIB_CTRL_VAL_SHDW_OFFSET,
-              offsetof(struct emif_regs_amx3, emif_dll_calib_ctrl_val_shdw));
-       DEFINE(EMIF_DDR_PHY_CTLR_1_OFFSET,
-              offsetof(struct emif_regs_amx3, emif_ddr_phy_ctlr_1));
-       DEFINE(EMIF_EXT_PHY_CTRL_VALS_OFFSET,
-              offsetof(struct emif_regs_amx3, emif_ext_phy_ctrl_vals));
-       DEFINE(EMIF_REGS_AMX3_SIZE, sizeof(struct emif_regs_amx3));
-
-       BLANK();
-
-       DEFINE(EMIF_PM_BASE_ADDR_VIRT_OFFSET,
-              offsetof(struct ti_emif_pm_data, ti_emif_base_addr_virt));
-       DEFINE(EMIF_PM_BASE_ADDR_PHYS_OFFSET,
-              offsetof(struct ti_emif_pm_data, ti_emif_base_addr_phys));
-       DEFINE(EMIF_PM_CONFIG_OFFSET,
-              offsetof(struct ti_emif_pm_data, ti_emif_sram_config));
-       DEFINE(EMIF_PM_REGS_VIRT_OFFSET,
-              offsetof(struct ti_emif_pm_data, regs_virt));
-       DEFINE(EMIF_PM_REGS_PHYS_OFFSET,
-              offsetof(struct ti_emif_pm_data, regs_phys));
-       DEFINE(EMIF_PM_DATA_SIZE, sizeof(struct ti_emif_pm_data));
-
-       BLANK();
-
-       DEFINE(EMIF_PM_SAVE_CONTEXT_OFFSET,
-              offsetof(struct ti_emif_pm_functions, save_context));
-       DEFINE(EMIF_PM_RESTORE_CONTEXT_OFFSET,
-              offsetof(struct ti_emif_pm_functions, restore_context));
-       DEFINE(EMIF_PM_ENTER_SR_OFFSET,
-              offsetof(struct ti_emif_pm_functions, enter_sr));
-       DEFINE(EMIF_PM_EXIT_SR_OFFSET,
-              offsetof(struct ti_emif_pm_functions, exit_sr));
-       DEFINE(EMIF_PM_ABORT_SR_OFFSET,
-              offsetof(struct ti_emif_pm_functions, abort_sr));
-       DEFINE(EMIF_PM_FUNCTIONS_SIZE, sizeof(struct ti_emif_pm_functions));
+       ti_emif_asm_offsets();
 
        return 0;
 }
index 231f3a1e27bff66b64e06751c429a21c31aa12f1..86503f60468fa6a7f10a4e339f01df1934ca7d55 100644 (file)
@@ -1994,6 +1994,7 @@ static struct scsi_host_template mptsas_driver_template = {
        .cmd_per_lun                    = 7,
        .use_clustering                 = ENABLE_CLUSTERING,
        .shost_attrs                    = mptscsih_host_attrs,
+       .no_write_same                  = 1,
 };
 
 static int mptsas_get_linkerrors(struct sas_phy *phy)
index a4c9c8297a6d825db6321032f7125aa01c9ca751..918d4fb742d1dd98dc3475adf4eef4feeafa7fdf 100644 (file)
@@ -717,6 +717,7 @@ struct cxl {
        bool perst_select_user;
        bool perst_same_image;
        bool psl_timebase_synced;
+       bool tunneled_ops_supported;
 
        /*
         * number of contexts mapped on to this card. Possible values are:
index 83f1d08058fc234dc9b141b543084ece35c53b0f..4d6736f9d46399b0ea2f1336404acd041af91c86 100644 (file)
@@ -1742,6 +1742,15 @@ static int cxl_configure_adapter(struct cxl *adapter, struct pci_dev *dev)
        /* Required for devices using CAPP DMA mode, harmless for others */
        pci_set_master(dev);
 
+       adapter->tunneled_ops_supported = false;
+
+       if (cxl_is_power9()) {
+               if (pnv_pci_set_tunnel_bar(dev, 0x00020000E0000000ull, 1))
+                       dev_info(&dev->dev, "Tunneled operations unsupported\n");
+               else
+                       adapter->tunneled_ops_supported = true;
+       }
+
        if ((rc = pnv_phb_to_cxl_mode(dev, adapter->native->sl_ops->capi_mode)))
                goto err;
 
@@ -1768,6 +1777,9 @@ static void cxl_deconfigure_adapter(struct cxl *adapter)
 {
        struct pci_dev *pdev = to_pci_dev(adapter->dev.parent);
 
+       if (cxl_is_power9())
+               pnv_pci_set_tunnel_bar(pdev, 0x00020000E0000000ull, 0);
+
        cxl_native_release_psl_err_irq(adapter);
        cxl_unmap_adapter_regs(adapter);
 
index 95285b7f636ff3f854876e5743658f91d88c3e3d..4b5a4c5d3c012dff77508b9f0b8b9883671e594c 100644 (file)
@@ -78,6 +78,15 @@ static ssize_t psl_timebase_synced_show(struct device *device,
        return scnprintf(buf, PAGE_SIZE, "%i\n", adapter->psl_timebase_synced);
 }
 
+static ssize_t tunneled_ops_supported_show(struct device *device,
+                                       struct device_attribute *attr,
+                                       char *buf)
+{
+       struct cxl *adapter = to_cxl_adapter(device);
+
+       return scnprintf(buf, PAGE_SIZE, "%i\n", adapter->tunneled_ops_supported);
+}
+
 static ssize_t reset_adapter_store(struct device *device,
                                   struct device_attribute *attr,
                                   const char *buf, size_t count)
@@ -183,6 +192,7 @@ static struct device_attribute adapter_attrs[] = {
        __ATTR_RO(base_image),
        __ATTR_RO(image_loaded),
        __ATTR_RO(psl_timebase_synced),
+       __ATTR_RO(tunneled_ops_supported),
        __ATTR_RW(load_image_on_perst),
        __ATTR_RW(perst_reloads_same_image),
        __ATTR(reset, S_IWUSR, NULL, reset_adapter_store),
index 0c125f207aea82c394f989f5033fccda9cd7e0b1..33053b0d1fdf65c2d590598cd361b7869e663522 100644 (file)
@@ -518,7 +518,7 @@ static int at24_get_pdata(struct device *dev, struct at24_platform_data *pdata)
        if (of_node && of_match_device(at24_of_match, dev))
                cdata = of_device_get_match_data(dev);
        else if (id)
-               cdata = (void *)&id->driver_data;
+               cdata = (void *)id->driver_data;
        else
                cdata = acpi_device_get_match_data(dev);
 
index d4c07b85f18e598ef8c0f85e8ce6fd70c9709396..f5695be14499855a2a54071004e8107c3289f680 100644 (file)
@@ -45,6 +45,7 @@
 #define I82802AB       0x00ad
 #define I82802AC       0x00ac
 #define PF38F4476      0x881c
+#define M28F00AP30     0x8963
 /* STMicroelectronics chips */
 #define M50LPW080       0x002F
 #define M50FLW080A     0x0080
@@ -375,6 +376,17 @@ static void cfi_fixup_major_minor(struct cfi_private *cfi,
                extp->MinorVersion = '1';
 }
 
+static int cfi_is_micron_28F00AP30(struct cfi_private *cfi, struct flchip *chip)
+{
+       /*
+        * Micron(was Numonyx) 1Gbit bottom boot are buggy w.r.t
+        * Erase Supend for their small Erase Blocks(0x8000)
+        */
+       if (cfi->mfr == CFI_MFR_INTEL && cfi->id == M28F00AP30)
+               return 1;
+       return 0;
+}
+
 static inline struct cfi_pri_intelext *
 read_pri_intelext(struct map_info *map, __u16 adr)
 {
@@ -831,21 +843,30 @@ static int chip_ready (struct map_info *map, struct flchip *chip, unsigned long
                     (mode == FL_WRITING && (cfip->SuspendCmdSupport & 1))))
                        goto sleep;
 
+               /* Do not allow suspend iff read/write to EB address */
+               if ((adr & chip->in_progress_block_mask) ==
+                   chip->in_progress_block_addr)
+                       goto sleep;
+
+               /* do not suspend small EBs, buggy Micron Chips */
+               if (cfi_is_micron_28F00AP30(cfi, chip) &&
+                   (chip->in_progress_block_mask == ~(0x8000-1)))
+                       goto sleep;
 
                /* Erase suspend */
-               map_write(map, CMD(0xB0), adr);
+               map_write(map, CMD(0xB0), chip->in_progress_block_addr);
 
                /* If the flash has finished erasing, then 'erase suspend'
                 * appears to make some (28F320) flash devices switch to
                 * 'read' mode.  Make sure that we switch to 'read status'
                 * mode so we get the right data. --rmk
                 */
-               map_write(map, CMD(0x70), adr);
+               map_write(map, CMD(0x70), chip->in_progress_block_addr);
                chip->oldstate = FL_ERASING;
                chip->state = FL_ERASE_SUSPENDING;
                chip->erase_suspended = 1;
                for (;;) {
-                       status = map_read(map, adr);
+                       status = map_read(map, chip->in_progress_block_addr);
                        if (map_word_andequal(map, status, status_OK, status_OK))
                                break;
 
@@ -1041,8 +1062,8 @@ static void put_chip(struct map_info *map, struct flchip *chip, unsigned long ad
                   sending the 0x70 (Read Status) command to an erasing
                   chip and expecting it to be ignored, that's what we
                   do. */
-               map_write(map, CMD(0xd0), adr);
-               map_write(map, CMD(0x70), adr);
+               map_write(map, CMD(0xd0), chip->in_progress_block_addr);
+               map_write(map, CMD(0x70), chip->in_progress_block_addr);
                chip->oldstate = FL_READY;
                chip->state = FL_ERASING;
                break;
@@ -1933,6 +1954,8 @@ static int __xipram do_erase_oneblock(struct map_info *map, struct flchip *chip,
        map_write(map, CMD(0xD0), adr);
        chip->state = FL_ERASING;
        chip->erase_suspended = 0;
+       chip->in_progress_block_addr = adr;
+       chip->in_progress_block_mask = ~(len - 1);
 
        ret = INVAL_CACHE_AND_WAIT(map, chip, adr,
                                   adr, len,
index 668e2cbc155bbe008858ba9a11a5467fb5fc39ba..692902df259892a78e69964cf1069867d7336acc 100644 (file)
@@ -816,9 +816,10 @@ static int get_chip(struct map_info *map, struct flchip *chip, unsigned long adr
                    (mode == FL_WRITING && (cfip->EraseSuspend & 0x2))))
                        goto sleep;
 
-               /* We could check to see if we're trying to access the sector
-                * that is currently being erased. However, no user will try
-                * anything like that so we just wait for the timeout. */
+               /* Do not allow suspend iff read/write to EB address */
+               if ((adr & chip->in_progress_block_mask) ==
+                   chip->in_progress_block_addr)
+                       goto sleep;
 
                /* Erase suspend */
                /* It's harmless to issue the Erase-Suspend and Erase-Resume
@@ -2267,6 +2268,7 @@ static int __xipram do_erase_chip(struct map_info *map, struct flchip *chip)
        chip->state = FL_ERASING;
        chip->erase_suspended = 0;
        chip->in_progress_block_addr = adr;
+       chip->in_progress_block_mask = ~(map->size - 1);
 
        INVALIDATE_CACHE_UDELAY(map, chip,
                                adr, map->size,
@@ -2356,6 +2358,7 @@ static int __xipram do_erase_oneblock(struct map_info *map, struct flchip *chip,
        chip->state = FL_ERASING;
        chip->erase_suspended = 0;
        chip->in_progress_block_addr = adr;
+       chip->in_progress_block_mask = ~(len - 1);
 
        INVALIDATE_CACHE_UDELAY(map, chip,
                                adr, len,
index d0cd6f8635d722ab3c1bcfcc32184f47c8dab24c..9c9f8936b63bc37bbcbd7c6d22d6ca85d69f388f 100644 (file)
@@ -162,7 +162,6 @@ int nanddev_mtd_erase(struct mtd_info *mtd, struct erase_info *einfo)
                ret = nanddev_erase(nand, &pos);
                if (ret) {
                        einfo->fail_addr = nanddev_pos_to_offs(nand, &pos);
-                       einfo->state = MTD_ERASE_FAILED;
 
                        return ret;
                }
@@ -170,8 +169,6 @@ int nanddev_mtd_erase(struct mtd_info *mtd, struct erase_info *einfo)
                nanddev_pos_next_eraseblock(nand, &pos);
        }
 
-       einfo->state = MTD_ERASE_DONE;
-
        return 0;
 }
 EXPORT_SYMBOL_GPL(nanddev_mtd_erase);
index 9c159f0dd9a61183892bb067c41dac17386c56e6..321137158ff3161a1c89aa7f1a6ad51fa91647c4 100644 (file)
@@ -375,56 +375,42 @@ static int omap2_onenand_read_bufferram(struct mtd_info *mtd, int area,
 {
        struct omap2_onenand *c = container_of(mtd, struct omap2_onenand, mtd);
        struct onenand_chip *this = mtd->priv;
-       dma_addr_t dma_src, dma_dst;
-       int bram_offset;
+       struct device *dev = &c->pdev->dev;
        void *buf = (void *)buffer;
+       dma_addr_t dma_src, dma_dst;
+       int bram_offset, err;
        size_t xtra;
-       int ret;
 
        bram_offset = omap2_onenand_bufferram_offset(mtd, area) + area + offset;
-       if (bram_offset & 3 || (size_t)buf & 3 || count < 384)
-               goto out_copy;
-
-       /* panic_write() may be in an interrupt context */
-       if (in_interrupt() || oops_in_progress)
+       /*
+        * If the buffer address is not DMA-able, len is not long enough to make
+        * DMA transfers profitable or panic_write() may be in an interrupt
+        * context fallback to PIO mode.
+        */
+       if (!virt_addr_valid(buf) || bram_offset & 3 || (size_t)buf & 3 ||
+           count < 384 || in_interrupt() || oops_in_progress )
                goto out_copy;
 
-       if (buf >= high_memory) {
-               struct page *p1;
-
-               if (((size_t)buf & PAGE_MASK) !=
-                   ((size_t)(buf + count - 1) & PAGE_MASK))
-                       goto out_copy;
-               p1 = vmalloc_to_page(buf);
-               if (!p1)
-                       goto out_copy;
-               buf = page_address(p1) + ((size_t)buf & ~PAGE_MASK);
-       }
-
        xtra = count & 3;
        if (xtra) {
                count -= xtra;
                memcpy(buf + count, this->base + bram_offset + count, xtra);
        }
 
+       dma_dst = dma_map_single(dev, buf, count, DMA_FROM_DEVICE);
        dma_src = c->phys_base + bram_offset;
-       dma_dst = dma_map_single(&c->pdev->dev, buf, count, DMA_FROM_DEVICE);
-       if (dma_mapping_error(&c->pdev->dev, dma_dst)) {
-               dev_err(&c->pdev->dev,
-                       "Couldn't DMA map a %d byte buffer\n",
-                       count);
-               goto out_copy;
-       }
 
-       ret = omap2_onenand_dma_transfer(c, dma_src, dma_dst, count);
-       dma_unmap_single(&c->pdev->dev, dma_dst, count, DMA_FROM_DEVICE);
-
-       if (ret) {
-               dev_err(&c->pdev->dev, "timeout waiting for DMA\n");
+       if (dma_mapping_error(dev, dma_dst)) {
+               dev_err(dev, "Couldn't DMA map a %d byte buffer\n", count);
                goto out_copy;
        }
 
-       return 0;
+       err = omap2_onenand_dma_transfer(c, dma_src, dma_dst, count);
+       dma_unmap_single(dev, dma_dst, count, DMA_FROM_DEVICE);
+       if (!err)
+               return 0;
+
+       dev_err(dev, "timeout waiting for DMA\n");
 
 out_copy:
        memcpy(buf, this->base + bram_offset, count);
@@ -437,49 +423,34 @@ static int omap2_onenand_write_bufferram(struct mtd_info *mtd, int area,
 {
        struct omap2_onenand *c = container_of(mtd, struct omap2_onenand, mtd);
        struct onenand_chip *this = mtd->priv;
-       dma_addr_t dma_src, dma_dst;
-       int bram_offset;
+       struct device *dev = &c->pdev->dev;
        void *buf = (void *)buffer;
-       int ret;
+       dma_addr_t dma_src, dma_dst;
+       int bram_offset, err;
 
        bram_offset = omap2_onenand_bufferram_offset(mtd, area) + area + offset;
-       if (bram_offset & 3 || (size_t)buf & 3 || count < 384)
-               goto out_copy;
-
-       /* panic_write() may be in an interrupt context */
-       if (in_interrupt() || oops_in_progress)
+       /*
+        * If the buffer address is not DMA-able, len is not long enough to make
+        * DMA transfers profitable or panic_write() may be in an interrupt
+        * context fallback to PIO mode.
+        */
+       if (!virt_addr_valid(buf) || bram_offset & 3 || (size_t)buf & 3 ||
+           count < 384 || in_interrupt() || oops_in_progress )
                goto out_copy;
 
-       if (buf >= high_memory) {
-               struct page *p1;
-
-               if (((size_t)buf & PAGE_MASK) !=
-                   ((size_t)(buf + count - 1) & PAGE_MASK))
-                       goto out_copy;
-               p1 = vmalloc_to_page(buf);
-               if (!p1)
-                       goto out_copy;
-               buf = page_address(p1) + ((size_t)buf & ~PAGE_MASK);
-       }
-
-       dma_src = dma_map_single(&c->pdev->dev, buf, count, DMA_TO_DEVICE);
+       dma_src = dma_map_single(dev, buf, count, DMA_TO_DEVICE);
        dma_dst = c->phys_base + bram_offset;
-       if (dma_mapping_error(&c->pdev->dev, dma_src)) {
-               dev_err(&c->pdev->dev,
-                       "Couldn't DMA map a %d byte buffer\n",
-                       count);
-               return -1;
-       }
-
-       ret = omap2_onenand_dma_transfer(c, dma_src, dma_dst, count);
-       dma_unmap_single(&c->pdev->dev, dma_src, count, DMA_TO_DEVICE);
-
-       if (ret) {
-               dev_err(&c->pdev->dev, "timeout waiting for DMA\n");
+       if (dma_mapping_error(dev, dma_src)) {
+               dev_err(dev, "Couldn't DMA map a %d byte buffer\n", count);
                goto out_copy;
        }
 
-       return 0;
+       err = omap2_onenand_dma_transfer(c, dma_src, dma_dst, count);
+       dma_unmap_page(dev, dma_src, count, DMA_TO_DEVICE);
+       if (!err)
+               return 0;
+
+       dev_err(dev, "timeout waiting for DMA\n");
 
 out_copy:
        memcpy(this->base + bram_offset, buf, count);
index 10e953218948836e0151f022ef8745d8b523e574..ebb1d141b90000c069b0634fe0a3c4d5d5f842d4 100644 (file)
@@ -1074,7 +1074,7 @@ static int marvell_nfc_hw_ecc_hmg_do_write_page(struct nand_chip *chip,
                return ret;
 
        ret = marvell_nfc_wait_op(chip,
-                                 chip->data_interface.timings.sdr.tPROG_max);
+                                 PSEC_TO_MSEC(chip->data_interface.timings.sdr.tPROG_max));
        return ret;
 }
 
@@ -1194,11 +1194,13 @@ static void marvell_nfc_hw_ecc_bch_read_chunk(struct nand_chip *chip, int chunk,
                                  NDCB0_CMD2(NAND_CMD_READSTART);
 
        /*
-        * Trigger the naked read operation only on the last chunk.
-        * Otherwise, use monolithic read.
+        * Trigger the monolithic read on the first chunk, then naked read on
+        * intermediate chunks and finally a last naked read on the last chunk.
         */
-       if (lt->nchunks == 1 || (chunk < lt->nchunks - 1))
+       if (chunk == 0)
                nfc_op.ndcb[0] |= NDCB0_CMD_XTYPE(XTYPE_MONOLITHIC_RW);
+       else if (chunk < lt->nchunks - 1)
+               nfc_op.ndcb[0] |= NDCB0_CMD_XTYPE(XTYPE_NAKED_RW);
        else
                nfc_op.ndcb[0] |= NDCB0_CMD_XTYPE(XTYPE_LAST_NAKED_RW);
 
@@ -1408,6 +1410,7 @@ marvell_nfc_hw_ecc_bch_write_chunk(struct nand_chip *chip, int chunk,
        struct marvell_nand_chip *marvell_nand = to_marvell_nand(chip);
        struct marvell_nfc *nfc = to_marvell_nfc(chip->controller);
        const struct marvell_hw_ecc_layout *lt = to_marvell_nand(chip)->layout;
+       u32 xtype;
        int ret;
        struct marvell_nfc_op nfc_op = {
                .ndcb[0] = NDCB0_CMD_TYPE(TYPE_WRITE) | NDCB0_LEN_OVRD,
@@ -1423,7 +1426,12 @@ marvell_nfc_hw_ecc_bch_write_chunk(struct nand_chip *chip, int chunk,
         * last naked write.
         */
        if (chunk == 0) {
-               nfc_op.ndcb[0] |= NDCB0_CMD_XTYPE(XTYPE_WRITE_DISPATCH) |
+               if (lt->nchunks == 1)
+                       xtype = XTYPE_MONOLITHIC_RW;
+               else
+                       xtype = XTYPE_WRITE_DISPATCH;
+
+               nfc_op.ndcb[0] |= NDCB0_CMD_XTYPE(xtype) |
                                  NDCB0_ADDR_CYC(marvell_nand->addr_cyc) |
                                  NDCB0_CMD1(NAND_CMD_SEQIN);
                nfc_op.ndcb[1] |= NDCB1_ADDRS_PAGE(page);
@@ -1494,7 +1502,7 @@ static int marvell_nfc_hw_ecc_bch_write_page(struct mtd_info *mtd,
        }
 
        ret = marvell_nfc_wait_op(chip,
-                                 chip->data_interface.timings.sdr.tPROG_max);
+                                 PSEC_TO_MSEC(chip->data_interface.timings.sdr.tPROG_max));
 
        marvell_nfc_disable_hw_ecc(chip);
 
@@ -2299,29 +2307,20 @@ static int marvell_nand_chip_init(struct device *dev, struct marvell_nfc *nfc,
        /*
         * The legacy "num-cs" property indicates the number of CS on the only
         * chip connected to the controller (legacy bindings does not support
-        * more than one chip). CS are only incremented one by one while the RB
-        * pin is always the #0.
+        * more than one chip). The CS and RB pins are always the #0.
         *
         * When not using legacy bindings, a couple of "reg" and "nand-rb"
         * properties must be filled. For each chip, expressed as a subnode,
         * "reg" points to the CS lines and "nand-rb" to the RB line.
         */
-       if (pdata) {
+       if (pdata || nfc->caps->legacy_of_bindings) {
                nsels = 1;
-       } else if (nfc->caps->legacy_of_bindings &&
-                  !of_get_property(np, "num-cs", &nsels)) {
-               dev_err(dev, "missing num-cs property\n");
-               return -EINVAL;
-       } else if (!of_get_property(np, "reg", &nsels)) {
-               dev_err(dev, "missing reg property\n");
-               return -EINVAL;
-       }
-
-       if (!pdata)
-               nsels /= sizeof(u32);
-       if (!nsels) {
-               dev_err(dev, "invalid reg property size\n");
-               return -EINVAL;
+       } else {
+               nsels = of_property_count_elems_of_size(np, "reg", sizeof(u32));
+               if (nsels <= 0) {
+                       dev_err(dev, "missing/invalid reg property\n");
+                       return -EINVAL;
+               }
        }
 
        /* Alloc the nand chip structure */
index 72f3a89da513e20dacf87b9943c6ac87db5a2e2f..f28c3a5558619fa22842ed2bffab7e0b4d7141a6 100644 (file)
@@ -706,12 +706,17 @@ static void nand_wait_status_ready(struct mtd_info *mtd, unsigned long timeo)
  */
 int nand_soft_waitrdy(struct nand_chip *chip, unsigned long timeout_ms)
 {
+       const struct nand_sdr_timings *timings;
        u8 status = 0;
        int ret;
 
        if (!chip->exec_op)
                return -ENOTSUPP;
 
+       /* Wait tWB before polling the STATUS reg. */
+       timings = nand_get_sdr_timings(&chip->data_interface);
+       ndelay(PSEC_TO_NSEC(timings->tWB_max));
+
        ret = nand_status_op(chip, NULL);
        if (ret)
                return ret;
index f54518ffb36af43221bc01905a731d3c9e73d968..f2052fae21c7453ae6f5fa3698fd2d38fa1884ad 100644 (file)
@@ -645,7 +645,7 @@ static int tango_nand_probe(struct platform_device *pdev)
 
        writel_relaxed(MODE_RAW, nfc->pbus_base + PBUS_PAD_MODE);
 
-       clk = clk_get(&pdev->dev, NULL);
+       clk = devm_clk_get(&pdev->dev, NULL);
        if (IS_ERR(clk))
                return PTR_ERR(clk);
 
index 4b8e9183489aa444edfa193d20fd004581470544..5872f31eaa60f91dae62bc1690e78aa97ca5c2a8 100644 (file)
@@ -501,7 +501,9 @@ static int cqspi_indirect_read_execute(struct spi_nor *nor, u8 *rxbuf,
        void __iomem *reg_base = cqspi->iobase;
        void __iomem *ahb_base = cqspi->ahb_base;
        unsigned int remaining = n_rx;
+       unsigned int mod_bytes = n_rx % 4;
        unsigned int bytes_to_read = 0;
+       u8 *rxbuf_end = rxbuf + n_rx;
        int ret = 0;
 
        writel(from_addr, reg_base + CQSPI_REG_INDIRECTRDSTARTADDR);
@@ -530,11 +532,24 @@ static int cqspi_indirect_read_execute(struct spi_nor *nor, u8 *rxbuf,
                }
 
                while (bytes_to_read != 0) {
+                       unsigned int word_remain = round_down(remaining, 4);
+
                        bytes_to_read *= cqspi->fifo_width;
                        bytes_to_read = bytes_to_read > remaining ?
                                        remaining : bytes_to_read;
-                       ioread32_rep(ahb_base, rxbuf,
-                                    DIV_ROUND_UP(bytes_to_read, 4));
+                       bytes_to_read = round_down(bytes_to_read, 4);
+                       /* Read 4 byte word chunks then single bytes */
+                       if (bytes_to_read) {
+                               ioread32_rep(ahb_base, rxbuf,
+                                            (bytes_to_read / 4));
+                       } else if (!word_remain && mod_bytes) {
+                               unsigned int temp = ioread32(ahb_base);
+
+                               bytes_to_read = mod_bytes;
+                               memcpy(rxbuf, &temp, min((unsigned int)
+                                                        (rxbuf_end - rxbuf),
+                                                        bytes_to_read));
+                       }
                        rxbuf += bytes_to_read;
                        remaining -= bytes_to_read;
                        bytes_to_read = cqspi_get_rd_sram_level(cqspi);
index 8918466550004dbf14c2e15f2ab25d7aa2317af5..a029b27fd00280615ab4c1b50eacf040f9f23888 100644 (file)
@@ -198,6 +198,7 @@ config VXLAN
 config GENEVE
        tristate "Generic Network Virtualization Encapsulation"
        depends on INET && NET_UDP_TUNNEL
+       depends on IPV6 || !IPV6
        select NET_IP_TUNNEL
        select GRO_CELLS
        ---help---
index 1ed9529e7bd1de923697731dc0db0ceaa926ef1f..e82108c917a635f036a9e86eeeed5d09c9cbbf53 100644 (file)
 #include <net/bonding.h>
 #include <net/bond_alb.h>
 
-
-
-static const u8 mac_bcast[ETH_ALEN + 2] __long_aligned = {
-       0xff, 0xff, 0xff, 0xff, 0xff, 0xff
-};
 static const u8 mac_v6_allmcast[ETH_ALEN + 2] __long_aligned = {
        0x33, 0x33, 0x00, 0x00, 0x00, 0x01
 };
@@ -420,8 +415,7 @@ static void rlb_clear_slave(struct bonding *bond, struct slave *slave)
 
                        if (assigned_slave) {
                                rx_hash_table[index].slave = assigned_slave;
-                               if (!ether_addr_equal_64bits(rx_hash_table[index].mac_dst,
-                                                            mac_bcast)) {
+                               if (is_valid_ether_addr(rx_hash_table[index].mac_dst)) {
                                        bond_info->rx_hashtbl[index].ntt = 1;
                                        bond_info->rx_ntt = 1;
                                        /* A slave has been removed from the
@@ -450,7 +444,7 @@ static void rlb_update_client(struct rlb_client_info *client_info)
 {
        int i;
 
-       if (!client_info->slave)
+       if (!client_info->slave || !is_valid_ether_addr(client_info->mac_dst))
                return;
 
        for (i = 0; i < RLB_ARP_BURST_SIZE; i++) {
@@ -524,7 +518,7 @@ static void rlb_req_update_slave_clients(struct bonding *bond, struct slave *sla
                client_info = &(bond_info->rx_hashtbl[hash_index]);
 
                if ((client_info->slave == slave) &&
-                   !ether_addr_equal_64bits(client_info->mac_dst, mac_bcast)) {
+                   is_valid_ether_addr(client_info->mac_dst)) {
                        client_info->ntt = 1;
                        ntt = 1;
                }
@@ -565,7 +559,7 @@ static void rlb_req_update_subnet_clients(struct bonding *bond, __be32 src_ip)
                if ((client_info->ip_src == src_ip) &&
                    !ether_addr_equal_64bits(client_info->slave->dev->dev_addr,
                                             bond->dev->dev_addr) &&
-                   !ether_addr_equal_64bits(client_info->mac_dst, mac_bcast)) {
+                   is_valid_ether_addr(client_info->mac_dst)) {
                        client_info->ntt = 1;
                        bond_info->rx_ntt = 1;
                }
@@ -593,7 +587,7 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon
                if ((client_info->ip_src == arp->ip_src) &&
                    (client_info->ip_dst == arp->ip_dst)) {
                        /* the entry is already assigned to this client */
-                       if (!ether_addr_equal_64bits(arp->mac_dst, mac_bcast)) {
+                       if (!is_broadcast_ether_addr(arp->mac_dst)) {
                                /* update mac address from arp */
                                ether_addr_copy(client_info->mac_dst, arp->mac_dst);
                        }
@@ -641,7 +635,7 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon
                ether_addr_copy(client_info->mac_src, arp->mac_src);
                client_info->slave = assigned_slave;
 
-               if (!ether_addr_equal_64bits(client_info->mac_dst, mac_bcast)) {
+               if (is_valid_ether_addr(client_info->mac_dst)) {
                        client_info->ntt = 1;
                        bond->alb_info.rx_ntt = 1;
                } else {
@@ -733,8 +727,10 @@ static void rlb_rebalance(struct bonding *bond)
                assigned_slave = __rlb_next_rx_slave(bond);
                if (assigned_slave && (client_info->slave != assigned_slave)) {
                        client_info->slave = assigned_slave;
-                       client_info->ntt = 1;
-                       ntt = 1;
+                       if (!is_zero_ether_addr(client_info->mac_dst)) {
+                               client_info->ntt = 1;
+                               ntt = 1;
+                       }
                }
        }
 
@@ -943,6 +939,10 @@ static void alb_send_lp_vid(struct slave *slave, u8 mac_addr[],
        skb->priority = TC_PRIO_CONTROL;
        skb->dev = slave->dev;
 
+       netdev_dbg(slave->bond->dev,
+                  "Send learning packet: dev %s mac %pM vlan %d\n",
+                  slave->dev->name, mac_addr, vid);
+
        if (vid)
                __vlan_hwaccel_put_tag(skb, vlan_proto, vid);
 
@@ -965,14 +965,13 @@ static int alb_upper_dev_walk(struct net_device *upper, void *_data)
        u8 *mac_addr = data->mac_addr;
        struct bond_vlan_tag *tags;
 
-       if (is_vlan_dev(upper) && vlan_get_encap_level(upper) == 0) {
-               if (strict_match &&
-                   ether_addr_equal_64bits(mac_addr,
-                                           upper->dev_addr)) {
+       if (is_vlan_dev(upper) &&
+           bond->nest_level == vlan_get_encap_level(upper) - 1) {
+               if (upper->addr_assign_type == NET_ADDR_STOLEN) {
                        alb_send_lp_vid(slave, mac_addr,
                                        vlan_dev_vlan_proto(upper),
                                        vlan_dev_vlan_id(upper));
-               } else if (!strict_match) {
+               } else {
                        alb_send_lp_vid(slave, upper->dev_addr,
                                        vlan_dev_vlan_proto(upper),
                                        vlan_dev_vlan_id(upper));
@@ -1316,8 +1315,8 @@ void bond_alb_deinitialize(struct bonding *bond)
                rlb_deinitialize(bond);
 }
 
-static int bond_do_alb_xmit(struct sk_buff *skb, struct bonding *bond,
-                           struct slave *tx_slave)
+static netdev_tx_t bond_do_alb_xmit(struct sk_buff *skb, struct bonding *bond,
+                                   struct slave *tx_slave)
 {
        struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
        struct ethhdr *eth_data = eth_hdr(skb);
@@ -1351,7 +1350,7 @@ static int bond_do_alb_xmit(struct sk_buff *skb, struct bonding *bond,
        return NETDEV_TX_OK;
 }
 
-int bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
+netdev_tx_t bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
 {
        struct bonding *bond = netdev_priv(bond_dev);
        struct ethhdr *eth_data;
@@ -1389,7 +1388,7 @@ int bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
        return bond_do_alb_xmit(skb, bond, tx_slave);
 }
 
-int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
+netdev_tx_t bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
 {
        struct bonding *bond = netdev_priv(bond_dev);
        struct ethhdr *eth_data;
@@ -1409,9 +1408,9 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
        case ETH_P_IP: {
                const struct iphdr *iph = ip_hdr(skb);
 
-               if (ether_addr_equal_64bits(eth_data->h_dest, mac_bcast) ||
-                   (iph->daddr == ip_bcast) ||
-                   (iph->protocol == IPPROTO_IGMP)) {
+               if (is_broadcast_ether_addr(eth_data->h_dest) ||
+                   iph->daddr == ip_bcast ||
+                   iph->protocol == IPPROTO_IGMP) {
                        do_tx_balance = false;
                        break;
                }
@@ -1423,7 +1422,7 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
                /* IPv6 doesn't really use broadcast mac address, but leave
                 * that here just in case.
                 */
-               if (ether_addr_equal_64bits(eth_data->h_dest, mac_bcast)) {
+               if (is_broadcast_ether_addr(eth_data->h_dest)) {
                        do_tx_balance = false;
                        break;
                }
@@ -1479,8 +1478,24 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
        }
 
        if (do_tx_balance) {
-               hash_index = _simple_hash(hash_start, hash_size);
-               tx_slave = tlb_choose_channel(bond, hash_index, skb->len);
+               if (bond->params.tlb_dynamic_lb) {
+                       hash_index = _simple_hash(hash_start, hash_size);
+                       tx_slave = tlb_choose_channel(bond, hash_index, skb->len);
+               } else {
+                       /*
+                        * do_tx_balance means we are free to select the tx_slave
+                        * So we do exactly what tlb would do for hash selection
+                        */
+
+                       struct bond_up_slave *slaves;
+                       unsigned int count;
+
+                       slaves = rcu_dereference(bond->slave_arr);
+                       count = slaves ? READ_ONCE(slaves->count) : 0;
+                       if (likely(count))
+                               tx_slave = slaves->arr[bond_xmit_hash(bond, skb) %
+                                                      count];
+               }
        }
 
        return bond_do_alb_xmit(skb, bond, tx_slave);
index b7b11301885334315700dc8e3386253dd8178afb..06efdf6a762b52df73d275a42cdaab5ffad1cc53 100644 (file)
@@ -159,7 +159,7 @@ module_param(min_links, int, 0);
 MODULE_PARM_DESC(min_links, "Minimum number of available links before turning on carrier");
 
 module_param(xmit_hash_policy, charp, 0);
-MODULE_PARM_DESC(xmit_hash_policy, "balance-xor and 802.3ad hashing method; "
+MODULE_PARM_DESC(xmit_hash_policy, "balance-alb, balance-tlb, balance-xor, 802.3ad hashing method; "
                                   "0 for layer 2 (default), 1 for layer 3+4, "
                                   "2 for layer 2+3, 3 for encap layer 2+3, "
                                   "4 for encap layer 3+4");
@@ -247,7 +247,7 @@ void bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb,
 
        BUILD_BUG_ON(sizeof(skb->queue_mapping) !=
                     sizeof(qdisc_skb_cb(skb)->slave_dev_queue_mapping));
-       skb->queue_mapping = qdisc_skb_cb(skb)->slave_dev_queue_mapping;
+       skb_set_queue_mapping(skb, qdisc_skb_cb(skb)->slave_dev_queue_mapping);
 
        if (unlikely(netpoll_tx_running(bond->dev)))
                bond_netpoll_send_skb(bond_get_slave_by_dev(bond, slave_dev), skb);
@@ -1660,8 +1660,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
        } /* switch(bond_mode) */
 
 #ifdef CONFIG_NET_POLL_CONTROLLER
-       slave_dev->npinfo = bond->dev->npinfo;
-       if (slave_dev->npinfo) {
+       if (bond->dev->npinfo) {
                if (slave_enable_netpoll(new_slave)) {
                        netdev_info(bond_dev, "master_dev is using netpoll, but new slave device does not support netpoll\n");
                        res = -EBUSY;
@@ -1736,9 +1735,11 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
                unblock_netpoll_tx();
        }
 
-       if (bond_mode_uses_xmit_hash(bond))
+       if (bond_mode_can_use_xmit_hash(bond))
                bond_update_slave_arr(bond, NULL);
 
+       bond->nest_level = dev_get_nest_level(bond_dev);
+
        netdev_info(bond_dev, "Enslaving %s as %s interface with %s link\n",
                    slave_dev->name,
                    bond_is_active_slave(new_slave) ? "an active" : "a backup",
@@ -1869,7 +1870,7 @@ static int __bond_release_one(struct net_device *bond_dev,
        if (BOND_MODE(bond) == BOND_MODE_8023AD)
                bond_3ad_unbind_slave(slave);
 
-       if (bond_mode_uses_xmit_hash(bond))
+       if (bond_mode_can_use_xmit_hash(bond))
                bond_update_slave_arr(bond, slave);
 
        netdev_info(bond_dev, "Releasing %s interface %s\n",
@@ -2136,6 +2137,24 @@ static int bond_miimon_inspect(struct bonding *bond)
        return commit;
 }
 
+static void bond_miimon_link_change(struct bonding *bond,
+                                   struct slave *slave,
+                                   char link)
+{
+       switch (BOND_MODE(bond)) {
+       case BOND_MODE_8023AD:
+               bond_3ad_handle_link_change(slave, link);
+               break;
+       case BOND_MODE_TLB:
+       case BOND_MODE_ALB:
+               bond_alb_handle_link_change(bond, slave, link);
+               break;
+       case BOND_MODE_XOR:
+               bond_update_slave_arr(bond, NULL);
+               break;
+       }
+}
+
 static void bond_miimon_commit(struct bonding *bond)
 {
        struct list_head *iter;
@@ -2177,16 +2196,7 @@ static void bond_miimon_commit(struct bonding *bond)
                                    slave->speed == SPEED_UNKNOWN ? 0 : slave->speed,
                                    slave->duplex ? "full" : "half");
 
-                       /* notify ad that the link status has changed */
-                       if (BOND_MODE(bond) == BOND_MODE_8023AD)
-                               bond_3ad_handle_link_change(slave, BOND_LINK_UP);
-
-                       if (bond_is_lb(bond))
-                               bond_alb_handle_link_change(bond, slave,
-                                                           BOND_LINK_UP);
-
-                       if (BOND_MODE(bond) == BOND_MODE_XOR)
-                               bond_update_slave_arr(bond, NULL);
+                       bond_miimon_link_change(bond, slave, BOND_LINK_UP);
 
                        if (!bond->curr_active_slave || slave == primary)
                                goto do_failover;
@@ -2208,16 +2218,7 @@ static void bond_miimon_commit(struct bonding *bond)
                        netdev_info(bond->dev, "link status definitely down for interface %s, disabling it\n",
                                    slave->dev->name);
 
-                       if (BOND_MODE(bond) == BOND_MODE_8023AD)
-                               bond_3ad_handle_link_change(slave,
-                                                           BOND_LINK_DOWN);
-
-                       if (bond_is_lb(bond))
-                               bond_alb_handle_link_change(bond, slave,
-                                                           BOND_LINK_DOWN);
-
-                       if (BOND_MODE(bond) == BOND_MODE_XOR)
-                               bond_update_slave_arr(bond, NULL);
+                       bond_miimon_link_change(bond, slave, BOND_LINK_DOWN);
 
                        if (slave == rcu_access_pointer(bond->curr_active_slave))
                                goto do_failover;
@@ -3101,7 +3102,7 @@ static int bond_slave_netdev_event(unsigned long event,
                 * events. If these (miimon/arpmon) parameters are configured
                 * then array gets refreshed twice and that should be fine!
                 */
-               if (bond_mode_uses_xmit_hash(bond))
+               if (bond_mode_can_use_xmit_hash(bond))
                        bond_update_slave_arr(bond, NULL);
                break;
        case NETDEV_CHANGEMTU:
@@ -3321,7 +3322,7 @@ static int bond_open(struct net_device *bond_dev)
                 */
                if (bond_alb_initialize(bond, (BOND_MODE(bond) == BOND_MODE_ALB)))
                        return -ENOMEM;
-               if (bond->params.tlb_dynamic_lb)
+               if (bond->params.tlb_dynamic_lb || BOND_MODE(bond) == BOND_MODE_ALB)
                        queue_delayed_work(bond->wq, &bond->alb_work, 0);
        }
 
@@ -3340,7 +3341,7 @@ static int bond_open(struct net_device *bond_dev)
                bond_3ad_initiate_agg_selection(bond, 1);
        }
 
-       if (bond_mode_uses_xmit_hash(bond))
+       if (bond_mode_can_use_xmit_hash(bond))
                bond_update_slave_arr(bond, NULL);
 
        return 0;
@@ -3806,7 +3807,8 @@ static u32 bond_rr_gen_slave_id(struct bonding *bond)
        return slave_id;
 }
 
-static int bond_xmit_roundrobin(struct sk_buff *skb, struct net_device *bond_dev)
+static netdev_tx_t bond_xmit_roundrobin(struct sk_buff *skb,
+                                       struct net_device *bond_dev)
 {
        struct bonding *bond = netdev_priv(bond_dev);
        struct iphdr *iph = ip_hdr(skb);
@@ -3842,7 +3844,8 @@ static int bond_xmit_roundrobin(struct sk_buff *skb, struct net_device *bond_dev
 /* In active-backup mode, we know that bond->curr_active_slave is always valid if
  * the bond has a usable interface.
  */
-static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *bond_dev)
+static netdev_tx_t bond_xmit_activebackup(struct sk_buff *skb,
+                                         struct net_device *bond_dev)
 {
        struct bonding *bond = netdev_priv(bond_dev);
        struct slave *slave;
@@ -3891,7 +3894,7 @@ static void bond_slave_arr_handler(struct work_struct *work)
  * to determine the slave interface -
  * (a) BOND_MODE_8023AD
  * (b) BOND_MODE_XOR
- * (c) BOND_MODE_TLB && tlb_dynamic_lb == 0
+ * (c) (BOND_MODE_TLB || BOND_MODE_ALB) && tlb_dynamic_lb == 0
  *
  * The caller is expected to hold RTNL only and NO other lock!
  */
@@ -3944,6 +3947,11 @@ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave)
                        continue;
                if (skipslave == slave)
                        continue;
+
+               netdev_dbg(bond->dev,
+                          "Adding slave dev %s to tx hash array[%d]\n",
+                          slave->dev->name, new_arr->count);
+
                new_arr->arr[new_arr->count++] = slave;
        }
 
@@ -3980,7 +3988,8 @@ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave)
  * usable slave array is formed in the control path. The xmit function
  * just calculates hash and sends the packet out.
  */
-static int bond_3ad_xor_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t bond_3ad_xor_xmit(struct sk_buff *skb,
+                                    struct net_device *dev)
 {
        struct bonding *bond = netdev_priv(dev);
        struct slave *slave;
@@ -4000,7 +4009,8 @@ static int bond_3ad_xor_xmit(struct sk_buff *skb, struct net_device *dev)
 }
 
 /* in broadcast mode, we send everything to all usable interfaces. */
-static int bond_xmit_broadcast(struct sk_buff *skb, struct net_device *bond_dev)
+static netdev_tx_t bond_xmit_broadcast(struct sk_buff *skb,
+                                      struct net_device *bond_dev)
 {
        struct bonding *bond = netdev_priv(bond_dev);
        struct slave *slave = NULL;
@@ -4037,12 +4047,12 @@ static inline int bond_slave_override(struct bonding *bond,
        struct slave *slave = NULL;
        struct list_head *iter;
 
-       if (!skb->queue_mapping)
+       if (!skb_rx_queue_recorded(skb))
                return 1;
 
        /* Find out if any slaves have the same mapping as this skb. */
        bond_for_each_slave_rcu(bond, slave, iter) {
-               if (slave->queue_id == skb->queue_mapping) {
+               if (slave->queue_id == skb_get_queue_mapping(skb)) {
                        if (bond_slave_is_up(slave) &&
                            slave->link == BOND_LINK_UP) {
                                bond_dev_queue_xmit(bond, skb, slave->dev);
@@ -4068,7 +4078,7 @@ static u16 bond_select_queue(struct net_device *dev, struct sk_buff *skb,
        u16 txq = skb_rx_queue_recorded(skb) ? skb_get_rx_queue(skb) : 0;
 
        /* Save the original txq to restore before passing to the driver */
-       qdisc_skb_cb(skb)->slave_dev_queue_mapping = skb->queue_mapping;
+       qdisc_skb_cb(skb)->slave_dev_queue_mapping = skb_get_queue_mapping(skb);
 
        if (unlikely(txq >= dev->real_num_tx_queues)) {
                do {
@@ -4319,9 +4329,9 @@ static int bond_check_params(struct bond_params *params)
        }
 
        if (xmit_hash_policy) {
-               if ((bond_mode != BOND_MODE_XOR) &&
-                   (bond_mode != BOND_MODE_8023AD) &&
-                   (bond_mode != BOND_MODE_TLB)) {
+               if (bond_mode == BOND_MODE_ROUNDROBIN ||
+                   bond_mode == BOND_MODE_ACTIVEBACKUP ||
+                   bond_mode == BOND_MODE_BROADCAST) {
                        pr_info("xmit_hash_policy param is irrelevant in mode %s\n",
                                bond_mode_name(bond_mode));
                } else {
index 58c705f24f963f98a4fa9181f18ac04f4af1d616..8a945c9341d6adde8ffa49fcc2c11a06860a17c7 100644 (file)
@@ -395,7 +395,7 @@ static const struct bond_option bond_opts[BOND_OPT_LAST] = {
                .id = BOND_OPT_TLB_DYNAMIC_LB,
                .name = "tlb_dynamic_lb",
                .desc = "Enable dynamic flow shuffling",
-               .unsuppmodes = BOND_MODE_ALL_EX(BIT(BOND_MODE_TLB)),
+               .unsuppmodes = BOND_MODE_ALL_EX(BIT(BOND_MODE_TLB) | BIT(BOND_MODE_ALB)),
                .values = bond_tlb_dynamic_lb_tbl,
                .flags = BOND_OPTFLAG_IFDOWN,
                .set = bond_option_tlb_dynamic_lb_set,
index b1779566c5bbf15513d0dd9e5e880f55462c7e21..3c71f1cb205faaa98617eeb2f0f0e3c94a2f36e8 100644 (file)
@@ -605,7 +605,7 @@ void can_bus_off(struct net_device *dev)
 {
        struct can_priv *priv = netdev_priv(dev);
 
-       netdev_dbg(dev, "bus-off\n");
+       netdev_info(dev, "bus-off\n");
 
        netif_carrier_off(dev);
 
index 634c51e6b8ae5d7b73b4dea2180a631fdcfadeee..d53a45bf2a72eb9e0d3cd16fe9f876bccbe5ade5 100644 (file)
 #define FLEXCAN_QUIRK_DISABLE_MECR     BIT(4) /* Disable Memory error detection */
 #define FLEXCAN_QUIRK_USE_OFF_TIMESTAMP        BIT(5) /* Use timestamp based offloading */
 #define FLEXCAN_QUIRK_BROKEN_PERR_STATE        BIT(6) /* No interrupt for error passive */
+#define FLEXCAN_QUIRK_DEFAULT_BIG_ENDIAN       BIT(7) /* default to BE register access */
 
 /* Structure of the message buffer */
 struct flexcan_mb {
@@ -287,6 +288,12 @@ struct flexcan_priv {
 };
 
 static const struct flexcan_devtype_data fsl_p1010_devtype_data = {
+       .quirks = FLEXCAN_QUIRK_BROKEN_WERR_STATE |
+               FLEXCAN_QUIRK_BROKEN_PERR_STATE |
+               FLEXCAN_QUIRK_DEFAULT_BIG_ENDIAN,
+};
+
+static const struct flexcan_devtype_data fsl_imx25_devtype_data = {
        .quirks = FLEXCAN_QUIRK_BROKEN_WERR_STATE |
                FLEXCAN_QUIRK_BROKEN_PERR_STATE,
 };
@@ -1251,9 +1258,9 @@ static void unregister_flexcandev(struct net_device *dev)
 static const struct of_device_id flexcan_of_match[] = {
        { .compatible = "fsl,imx6q-flexcan", .data = &fsl_imx6q_devtype_data, },
        { .compatible = "fsl,imx28-flexcan", .data = &fsl_imx28_devtype_data, },
-       { .compatible = "fsl,imx53-flexcan", .data = &fsl_p1010_devtype_data, },
-       { .compatible = "fsl,imx35-flexcan", .data = &fsl_p1010_devtype_data, },
-       { .compatible = "fsl,imx25-flexcan", .data = &fsl_p1010_devtype_data, },
+       { .compatible = "fsl,imx53-flexcan", .data = &fsl_imx25_devtype_data, },
+       { .compatible = "fsl,imx35-flexcan", .data = &fsl_imx25_devtype_data, },
+       { .compatible = "fsl,imx25-flexcan", .data = &fsl_imx25_devtype_data, },
        { .compatible = "fsl,p1010-flexcan", .data = &fsl_p1010_devtype_data, },
        { .compatible = "fsl,vf610-flexcan", .data = &fsl_vf610_devtype_data, },
        { .compatible = "fsl,ls1021ar2-flexcan", .data = &fsl_ls1021a_r2_devtype_data, },
@@ -1337,18 +1344,13 @@ static int flexcan_probe(struct platform_device *pdev)
 
        priv = netdev_priv(dev);
 
-       if (of_property_read_bool(pdev->dev.of_node, "big-endian")) {
+       if (of_property_read_bool(pdev->dev.of_node, "big-endian") ||
+           devtype_data->quirks & FLEXCAN_QUIRK_DEFAULT_BIG_ENDIAN) {
                priv->read = flexcan_read_be;
                priv->write = flexcan_write_be;
        } else {
-               if (of_device_is_compatible(pdev->dev.of_node,
-                                           "fsl,p1010-flexcan")) {
-                       priv->read = flexcan_read_be;
-                       priv->write = flexcan_write_be;
-               } else {
-                       priv->read = flexcan_read_le;
-                       priv->write = flexcan_write_le;
-               }
+               priv->read = flexcan_read_le;
+               priv->write = flexcan_write_le;
        }
 
        priv->can.clock.freq = clock_freq;
index 5590c559a8ca5305d4eb2ecac18fc8d406afebad..53e320c92a8be21e286ab4f7ada738fd223a08fa 100644 (file)
@@ -91,6 +91,7 @@
 #define HI3110_STAT_BUSOFF BIT(2)
 #define HI3110_STAT_ERRP BIT(3)
 #define HI3110_STAT_ERRW BIT(4)
+#define HI3110_STAT_TXMTY BIT(7)
 
 #define HI3110_BTR0_SJW_SHIFT 6
 #define HI3110_BTR0_BRP_SHIFT 0
@@ -427,8 +428,10 @@ static int hi3110_get_berr_counter(const struct net_device *net,
        struct hi3110_priv *priv = netdev_priv(net);
        struct spi_device *spi = priv->spi;
 
+       mutex_lock(&priv->hi3110_lock);
        bec->txerr = hi3110_read(spi, HI3110_READ_TEC);
        bec->rxerr = hi3110_read(spi, HI3110_READ_REC);
+       mutex_unlock(&priv->hi3110_lock);
 
        return 0;
 }
@@ -735,10 +738,7 @@ static irqreturn_t hi3110_can_ist(int irq, void *dev_id)
                        }
                }
 
-               if (intf == 0)
-                       break;
-
-               if (intf & HI3110_INT_TXCPLT) {
+               if (priv->tx_len && statf & HI3110_STAT_TXMTY) {
                        net->stats.tx_packets++;
                        net->stats.tx_bytes += priv->tx_len - 1;
                        can_led_event(net, CAN_LED_EVENT_TX);
@@ -748,6 +748,9 @@ static irqreturn_t hi3110_can_ist(int irq, void *dev_id)
                        }
                        netif_wake_queue(net);
                }
+
+               if (intf == 0)
+                       break;
        }
        mutex_unlock(&priv->hi3110_lock);
        return IRQ_HANDLED;
index 63587b8e6825add0dadc75b6e446981935d18adb..daed57d3d2097d5f8ad026e5b902811fff3db813 100644 (file)
@@ -1179,7 +1179,7 @@ static void kvaser_usb_rx_can_msg(const struct kvaser_usb *dev,
 
        skb = alloc_can_skb(priv->netdev, &cf);
        if (!skb) {
-               stats->tx_dropped++;
+               stats->rx_dropped++;
                return;
        }
 
index 78616787f2a396102a4b9c5910fcd3abb87bcfb5..9f561fe505cb06e8aebe89709c0704e97baf03c4 100644 (file)
@@ -806,16 +806,39 @@ static unsigned int b53_get_mib_size(struct b53_device *dev)
                return B53_MIBS_SIZE;
 }
 
-void b53_get_strings(struct dsa_switch *ds, int port, uint8_t *data)
+static struct phy_device *b53_get_phy_device(struct dsa_switch *ds, int port)
+{
+       /* These ports typically do not have built-in PHYs */
+       switch (port) {
+       case B53_CPU_PORT_25:
+       case 7:
+       case B53_CPU_PORT:
+               return NULL;
+       }
+
+       return mdiobus_get_phy(ds->slave_mii_bus, port);
+}
+
+void b53_get_strings(struct dsa_switch *ds, int port, u32 stringset,
+                    uint8_t *data)
 {
        struct b53_device *dev = ds->priv;
        const struct b53_mib_desc *mibs = b53_get_mib(dev);
        unsigned int mib_size = b53_get_mib_size(dev);
+       struct phy_device *phydev;
        unsigned int i;
 
-       for (i = 0; i < mib_size; i++)
-               strlcpy(data + i * ETH_GSTRING_LEN,
-                       mibs[i].name, ETH_GSTRING_LEN);
+       if (stringset == ETH_SS_STATS) {
+               for (i = 0; i < mib_size; i++)
+                       strlcpy(data + i * ETH_GSTRING_LEN,
+                               mibs[i].name, ETH_GSTRING_LEN);
+       } else if (stringset == ETH_SS_PHY_STATS) {
+               phydev = b53_get_phy_device(ds, port);
+               if (!phydev)
+                       return;
+
+               phy_ethtool_get_strings(phydev, data);
+       }
 }
 EXPORT_SYMBOL(b53_get_strings);
 
@@ -852,11 +875,34 @@ void b53_get_ethtool_stats(struct dsa_switch *ds, int port, uint64_t *data)
 }
 EXPORT_SYMBOL(b53_get_ethtool_stats);
 
-int b53_get_sset_count(struct dsa_switch *ds, int port)
+void b53_get_ethtool_phy_stats(struct dsa_switch *ds, int port, uint64_t *data)
+{
+       struct phy_device *phydev;
+
+       phydev = b53_get_phy_device(ds, port);
+       if (!phydev)
+               return;
+
+       phy_ethtool_get_stats(phydev, NULL, data);
+}
+EXPORT_SYMBOL(b53_get_ethtool_phy_stats);
+
+int b53_get_sset_count(struct dsa_switch *ds, int port, int sset)
 {
        struct b53_device *dev = ds->priv;
+       struct phy_device *phydev;
+
+       if (sset == ETH_SS_STATS) {
+               return b53_get_mib_size(dev);
+       } else if (sset == ETH_SS_PHY_STATS) {
+               phydev = b53_get_phy_device(ds, port);
+               if (!phydev)
+                       return 0;
+
+               return phy_ethtool_get_sset_count(phydev);
+       }
 
-       return b53_get_mib_size(dev);
+       return 0;
 }
 EXPORT_SYMBOL(b53_get_sset_count);
 
@@ -1477,7 +1523,7 @@ void b53_br_fast_age(struct dsa_switch *ds, int port)
 }
 EXPORT_SYMBOL(b53_br_fast_age);
 
-static bool b53_can_enable_brcm_tags(struct dsa_switch *ds, int port)
+static bool b53_possible_cpu_port(struct dsa_switch *ds, int port)
 {
        /* Broadcom switches will accept enabling Broadcom tags on the
         * following ports: 5, 7 and 8, any other port is not supported
@@ -1489,10 +1535,19 @@ static bool b53_can_enable_brcm_tags(struct dsa_switch *ds, int port)
                return true;
        }
 
-       dev_warn(ds->dev, "Port %d is not Broadcom tag capable\n", port);
        return false;
 }
 
+static bool b53_can_enable_brcm_tags(struct dsa_switch *ds, int port)
+{
+       bool ret = b53_possible_cpu_port(ds, port);
+
+       if (!ret)
+               dev_warn(ds->dev, "Port %d is not Broadcom tag capable\n",
+                        port);
+       return ret;
+}
+
 enum dsa_tag_protocol b53_get_tag_protocol(struct dsa_switch *ds, int port)
 {
        struct b53_device *dev = ds->priv;
@@ -1650,6 +1705,7 @@ static const struct dsa_switch_ops b53_switch_ops = {
        .get_strings            = b53_get_strings,
        .get_ethtool_stats      = b53_get_ethtool_stats,
        .get_sset_count         = b53_get_sset_count,
+       .get_ethtool_phy_stats  = b53_get_ethtool_phy_stats,
        .phy_read               = b53_phy_read16,
        .phy_write              = b53_phy_write16,
        .adjust_link            = b53_adjust_link,
@@ -1954,6 +2010,15 @@ static int b53_switch_init(struct b53_device *dev)
        dev->num_ports = dev->cpu_port + 1;
        dev->enabled_ports |= BIT(dev->cpu_port);
 
+       /* Include non standard CPU port built-in PHYs to be probed */
+       if (is539x(dev) || is531x5(dev)) {
+               for (i = 0; i < dev->num_ports; i++) {
+                       if (!(dev->ds->phys_mii_mask & BIT(i)) &&
+                           !b53_possible_cpu_port(dev->ds, i))
+                               dev->ds->phys_mii_mask |= BIT(i);
+               }
+       }
+
        dev->ports = devm_kzalloc(dev->dev,
                                  sizeof(struct b53_port) * dev->num_ports,
                                  GFP_KERNEL);
index 1187ebd79287bec832f78a04e47805e45fc22aeb..cc284a514de90f4a55373a8a3468e02f18bfdeca 100644 (file)
@@ -286,9 +286,11 @@ static inline int b53_switch_get_reset_gpio(struct b53_device *dev)
 /* Exported functions towards other drivers */
 void b53_imp_vlan_setup(struct dsa_switch *ds, int cpu_port);
 int b53_configure_vlan(struct dsa_switch *ds);
-void b53_get_strings(struct dsa_switch *ds, int port, uint8_t *data);
+void b53_get_strings(struct dsa_switch *ds, int port, u32 stringset,
+                    uint8_t *data);
 void b53_get_ethtool_stats(struct dsa_switch *ds, int port, uint64_t *data);
-int b53_get_sset_count(struct dsa_switch *ds, int port);
+int b53_get_sset_count(struct dsa_switch *ds, int port, int sset);
+void b53_get_ethtool_phy_stats(struct dsa_switch *ds, int port, uint64_t *data);
 int b53_br_join(struct dsa_switch *ds, int port, struct net_device *bridge);
 void b53_br_leave(struct dsa_switch *ds, int port, struct net_device *bridge);
 void b53_br_set_stp_state(struct dsa_switch *ds, int port, u8 state);
index 0378eded31f2cee00ea0d79c35b328b58317cd40..02e8982519cebcfad46f21dbc6c53d605e6b6f8a 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/platform_device.h>
 #include <linux/phy.h>
 #include <linux/phy_fixed.h>
+#include <linux/phylink.h>
 #include <linux/mii.h>
 #include <linux/of.h>
 #include <linux/of_irq.h>
@@ -306,7 +307,8 @@ static int bcm_sf2_sw_mdio_write(struct mii_bus *bus, int addr, int regnum,
 
 static irqreturn_t bcm_sf2_switch_0_isr(int irq, void *dev_id)
 {
-       struct bcm_sf2_priv *priv = dev_id;
+       struct dsa_switch *ds = dev_id;
+       struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
 
        priv->irq0_stat = intrl2_0_readl(priv, INTRL2_CPU_STATUS) &
                                ~priv->irq0_mask;
@@ -317,16 +319,21 @@ static irqreturn_t bcm_sf2_switch_0_isr(int irq, void *dev_id)
 
 static irqreturn_t bcm_sf2_switch_1_isr(int irq, void *dev_id)
 {
-       struct bcm_sf2_priv *priv = dev_id;
+       struct dsa_switch *ds = dev_id;
+       struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
 
        priv->irq1_stat = intrl2_1_readl(priv, INTRL2_CPU_STATUS) &
                                ~priv->irq1_mask;
        intrl2_1_writel(priv, priv->irq1_stat, INTRL2_CPU_CLEAR);
 
-       if (priv->irq1_stat & P_LINK_UP_IRQ(P7_IRQ_OFF))
-               priv->port_sts[7].link = 1;
-       if (priv->irq1_stat & P_LINK_DOWN_IRQ(P7_IRQ_OFF))
-               priv->port_sts[7].link = 0;
+       if (priv->irq1_stat & P_LINK_UP_IRQ(P7_IRQ_OFF)) {
+               priv->port_sts[7].link = true;
+               dsa_port_phylink_mac_change(ds, 7, true);
+       }
+       if (priv->irq1_stat & P_LINK_DOWN_IRQ(P7_IRQ_OFF)) {
+               priv->port_sts[7].link = false;
+               dsa_port_phylink_mac_change(ds, 7, false);
+       }
 
        return IRQ_HANDLED;
 }
@@ -443,12 +450,8 @@ static int bcm_sf2_mdio_register(struct dsa_switch *ds)
        priv->slave_mii_bus->parent = ds->dev->parent;
        priv->slave_mii_bus->phy_mask = ~priv->indir_phy_mask;
 
-       if (dn)
-               err = of_mdiobus_register(priv->slave_mii_bus, dn);
-       else
-               err = mdiobus_register(priv->slave_mii_bus);
-
-       if (err)
+       err = of_mdiobus_register(priv->slave_mii_bus, dn);
+       if (err && dn)
                of_node_put(dn);
 
        return err;
@@ -473,13 +476,56 @@ static u32 bcm_sf2_sw_get_phy_flags(struct dsa_switch *ds, int port)
        return priv->hw_params.gphy_rev;
 }
 
-static void bcm_sf2_sw_adjust_link(struct dsa_switch *ds, int port,
-                                  struct phy_device *phydev)
+static void bcm_sf2_sw_validate(struct dsa_switch *ds, int port,
+                               unsigned long *supported,
+                               struct phylink_link_state *state)
+{
+       __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, };
+
+       if (!phy_interface_mode_is_rgmii(state->interface) &&
+           state->interface != PHY_INTERFACE_MODE_MII &&
+           state->interface != PHY_INTERFACE_MODE_REVMII &&
+           state->interface != PHY_INTERFACE_MODE_GMII &&
+           state->interface != PHY_INTERFACE_MODE_INTERNAL &&
+           state->interface != PHY_INTERFACE_MODE_MOCA) {
+               bitmap_zero(supported, __ETHTOOL_LINK_MODE_MASK_NBITS);
+               dev_err(ds->dev,
+                       "Unsupported interface: %d\n", state->interface);
+               return;
+       }
+
+       /* Allow all the expected bits */
+       phylink_set(mask, Autoneg);
+       phylink_set_port_modes(mask);
+       phylink_set(mask, Pause);
+       phylink_set(mask, Asym_Pause);
+
+       /* With the exclusion of MII and Reverse MII, we support Gigabit,
+        * including Half duplex
+        */
+       if (state->interface != PHY_INTERFACE_MODE_MII &&
+           state->interface != PHY_INTERFACE_MODE_REVMII) {
+               phylink_set(mask, 1000baseT_Full);
+               phylink_set(mask, 1000baseT_Half);
+       }
+
+       phylink_set(mask, 10baseT_Half);
+       phylink_set(mask, 10baseT_Full);
+       phylink_set(mask, 100baseT_Half);
+       phylink_set(mask, 100baseT_Full);
+
+       bitmap_and(supported, supported, mask,
+                  __ETHTOOL_LINK_MODE_MASK_NBITS);
+       bitmap_and(state->advertising, state->advertising, mask,
+                  __ETHTOOL_LINK_MODE_MASK_NBITS);
+}
+
+static void bcm_sf2_sw_mac_config(struct dsa_switch *ds, int port,
+                                 unsigned int mode,
+                                 const struct phylink_link_state *state)
 {
        struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
-       struct ethtool_eee *p = &priv->dev->ports[port].eee;
        u32 id_mode_dis = 0, port_mode;
-       const char *str = NULL;
        u32 reg, offset;
 
        if (priv->type == BCM7445_DEVICE_ID)
@@ -487,62 +533,48 @@ static void bcm_sf2_sw_adjust_link(struct dsa_switch *ds, int port,
        else
                offset = CORE_STS_OVERRIDE_GMIIP2_PORT(port);
 
-       switch (phydev->interface) {
+       switch (state->interface) {
        case PHY_INTERFACE_MODE_RGMII:
-               str = "RGMII (no delay)";
                id_mode_dis = 1;
+               /* fallthrough */
        case PHY_INTERFACE_MODE_RGMII_TXID:
-               if (!str)
-                       str = "RGMII (TX delay)";
                port_mode = EXT_GPHY;
                break;
        case PHY_INTERFACE_MODE_MII:
-               str = "MII";
                port_mode = EXT_EPHY;
                break;
        case PHY_INTERFACE_MODE_REVMII:
-               str = "Reverse MII";
                port_mode = EXT_REVMII;
                break;
        default:
-               /* All other PHYs: internal and MoCA */
+               /* all other PHYs: internal and MoCA */
                goto force_link;
        }
 
-       /* If the link is down, just disable the interface to conserve power */
-       if (!phydev->link) {
-               reg = reg_readl(priv, REG_RGMII_CNTRL_P(port));
-               reg &= ~RGMII_MODE_EN;
-               reg_writel(priv, reg, REG_RGMII_CNTRL_P(port));
-               goto force_link;
-       }
-
-       /* Clear id_mode_dis bit, and the existing port mode, but
-        * make sure we enable the RGMII block for data to pass
+       /* Clear id_mode_dis bit, and the existing port mode, let
+        * RGMII_MODE_EN bet set by mac_link_{up,down}
         */
        reg = reg_readl(priv, REG_RGMII_CNTRL_P(port));
        reg &= ~ID_MODE_DIS;
        reg &= ~(PORT_MODE_MASK << PORT_MODE_SHIFT);
        reg &= ~(RX_PAUSE_EN | TX_PAUSE_EN);
 
-       reg |= port_mode | RGMII_MODE_EN;
+       reg |= port_mode;
        if (id_mode_dis)
                reg |= ID_MODE_DIS;
 
-       if (phydev->pause) {
-               if (phydev->asym_pause)
+       if (state->pause & MLO_PAUSE_TXRX_MASK) {
+               if (state->pause & MLO_PAUSE_TX)
                        reg |= TX_PAUSE_EN;
                reg |= RX_PAUSE_EN;
        }
 
        reg_writel(priv, reg, REG_RGMII_CNTRL_P(port));
 
-       pr_info("Port %d configured for %s\n", port, str);
-
 force_link:
        /* Force link settings detected from the PHY */
        reg = SW_OVERRIDE;
-       switch (phydev->speed) {
+       switch (state->speed) {
        case SPEED_1000:
                reg |= SPDSTS_1000 << SPEED_SHIFT;
                break;
@@ -551,33 +583,61 @@ static void bcm_sf2_sw_adjust_link(struct dsa_switch *ds, int port,
                break;
        }
 
-       if (phydev->link)
+       if (state->link)
                reg |= LINK_STS;
-       if (phydev->duplex == DUPLEX_FULL)
+       if (state->duplex == DUPLEX_FULL)
                reg |= DUPLX_MODE;
 
        core_writel(priv, reg, offset);
-
-       if (!phydev->is_pseudo_fixed_link)
-               p->eee_enabled = b53_eee_init(ds, port, phydev);
 }
 
-static void bcm_sf2_sw_fixed_link_update(struct dsa_switch *ds, int port,
-                                        struct fixed_phy_status *status)
+static void bcm_sf2_sw_mac_link_set(struct dsa_switch *ds, int port,
+                                   phy_interface_t interface, bool link)
 {
        struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
-       u32 duplex, pause, offset;
        u32 reg;
 
-       if (priv->type == BCM7445_DEVICE_ID)
-               offset = CORE_STS_OVERRIDE_GMIIP_PORT(port);
+       if (!phy_interface_mode_is_rgmii(interface) &&
+           interface != PHY_INTERFACE_MODE_MII &&
+           interface != PHY_INTERFACE_MODE_REVMII)
+               return;
+
+       /* If the link is down, just disable the interface to conserve power */
+       reg = reg_readl(priv, REG_RGMII_CNTRL_P(port));
+       if (link)
+               reg |= RGMII_MODE_EN;
        else
-               offset = CORE_STS_OVERRIDE_GMIIP2_PORT(port);
+               reg &= ~RGMII_MODE_EN;
+       reg_writel(priv, reg, REG_RGMII_CNTRL_P(port));
+}
 
-       duplex = core_readl(priv, CORE_DUPSTS);
-       pause = core_readl(priv, CORE_PAUSESTS);
+static void bcm_sf2_sw_mac_link_down(struct dsa_switch *ds, int port,
+                                    unsigned int mode,
+                                    phy_interface_t interface)
+{
+       bcm_sf2_sw_mac_link_set(ds, port, interface, false);
+}
 
-       status->link = 0;
+static void bcm_sf2_sw_mac_link_up(struct dsa_switch *ds, int port,
+                                  unsigned int mode,
+                                  phy_interface_t interface,
+                                  struct phy_device *phydev)
+{
+       struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
+       struct ethtool_eee *p = &priv->dev->ports[port].eee;
+
+       bcm_sf2_sw_mac_link_set(ds, port, interface, true);
+
+       if (mode == MLO_AN_PHY && phydev)
+               p->eee_enabled = b53_eee_init(ds, port, phydev);
+}
+
+static void bcm_sf2_sw_fixed_state(struct dsa_switch *ds, int port,
+                                  struct phylink_link_state *status)
+{
+       struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
+
+       status->link = false;
 
        /* MoCA port is special as we do not get link status from CORE_LNKSTS,
         * which means that we need to force the link at the port override
@@ -596,28 +656,10 @@ static void bcm_sf2_sw_fixed_link_update(struct dsa_switch *ds, int port,
                 */
                if (!status->link)
                        netif_carrier_off(ds->ports[port].slave);
-               status->duplex = 1;
+               status->duplex = DUPLEX_FULL;
        } else {
-               status->link = 1;
-               status->duplex = !!(duplex & (1 << port));
-       }
-
-       reg = core_readl(priv, offset);
-       reg |= SW_OVERRIDE;
-       if (status->link)
-               reg |= LINK_STS;
-       else
-               reg &= ~LINK_STS;
-       core_writel(priv, reg, offset);
-
-       if ((pause & (1 << port)) &&
-           (pause & (1 << (port + PAUSESTS_TX_PAUSE_SHIFT)))) {
-               status->asym_pause = 1;
-               status->pause = 1;
+               status->link = true;
        }
-
-       if (pause & (1 << port))
-               status->pause = 1;
 }
 
 static void bcm_sf2_enable_acb(struct dsa_switch *ds)
@@ -859,9 +901,13 @@ static const struct dsa_switch_ops bcm_sf2_ops = {
        .get_strings            = b53_get_strings,
        .get_ethtool_stats      = b53_get_ethtool_stats,
        .get_sset_count         = b53_get_sset_count,
+       .get_ethtool_phy_stats  = b53_get_ethtool_phy_stats,
        .get_phy_flags          = bcm_sf2_sw_get_phy_flags,
-       .adjust_link            = bcm_sf2_sw_adjust_link,
-       .fixed_link_update      = bcm_sf2_sw_fixed_link_update,
+       .phylink_validate       = bcm_sf2_sw_validate,
+       .phylink_mac_config     = bcm_sf2_sw_mac_config,
+       .phylink_mac_link_down  = bcm_sf2_sw_mac_link_down,
+       .phylink_mac_link_up    = bcm_sf2_sw_mac_link_up,
+       .phylink_fixed_state    = bcm_sf2_sw_fixed_state,
        .suspend                = bcm_sf2_sw_suspend,
        .resume                 = bcm_sf2_sw_resume,
        .get_wol                = bcm_sf2_sw_get_wol,
@@ -1064,14 +1110,14 @@ static int bcm_sf2_sw_probe(struct platform_device *pdev)
        bcm_sf2_intr_disable(priv);
 
        ret = devm_request_irq(&pdev->dev, priv->irq0, bcm_sf2_switch_0_isr, 0,
-                              "switch_0", priv);
+                              "switch_0", ds);
        if (ret < 0) {
                pr_err("failed to request switch_0 IRQ\n");
                goto out_mdio;
        }
 
        ret = devm_request_irq(&pdev->dev, priv->irq1, bcm_sf2_switch_1_isr, 0,
-                              "switch_1", priv);
+                              "switch_1", ds);
        if (ret < 0) {
                pr_err("failed to request switch_1 IRQ\n");
                goto out_mdio;
index 23b45da784cb601a7abf84b212717aee7dc64403..b89acaee12d4364247a694ad989d2ae06ca640aa 100644 (file)
@@ -354,10 +354,13 @@ static int bcm_sf2_cfp_ipv4_rule_set(struct bcm_sf2_priv *priv, int port,
        /* Locate the first rule available */
        if (fs->location == RX_CLS_LOC_ANY)
                rule_index = find_first_zero_bit(priv->cfp.used,
-                                                bcm_sf2_cfp_rule_size(priv));
+                                                priv->num_cfp_rules);
        else
                rule_index = fs->location;
 
+       if (rule_index > bcm_sf2_cfp_rule_size(priv))
+               return -ENOSPC;
+
        layout = &udf_tcpip4_layout;
        /* We only use one UDF slice for now */
        slice_num = bcm_sf2_get_slice_number(layout, 0);
@@ -562,19 +565,21 @@ static int bcm_sf2_cfp_ipv6_rule_set(struct bcm_sf2_priv *priv, int port,
         * first half because the HW search is by incrementing addresses.
         */
        if (fs->location == RX_CLS_LOC_ANY)
-               rule_index[0] = find_first_zero_bit(priv->cfp.used,
-                                                   bcm_sf2_cfp_rule_size(priv));
+               rule_index[1] = find_first_zero_bit(priv->cfp.used,
+                                                   priv->num_cfp_rules);
        else
-               rule_index[0] = fs->location;
+               rule_index[1] = fs->location;
+       if (rule_index[1] > bcm_sf2_cfp_rule_size(priv))
+               return -ENOSPC;
 
        /* Flag it as used (cleared on error path) such that we can immediately
         * obtain a second one to chain from.
         */
-       set_bit(rule_index[0], priv->cfp.used);
+       set_bit(rule_index[1], priv->cfp.used);
 
-       rule_index[1] = find_first_zero_bit(priv->cfp.used,
-                                           bcm_sf2_cfp_rule_size(priv));
-       if (rule_index[1] > bcm_sf2_cfp_rule_size(priv)) {
+       rule_index[0] = find_first_zero_bit(priv->cfp.used,
+                                           priv->num_cfp_rules);
+       if (rule_index[0] > bcm_sf2_cfp_rule_size(priv)) {
                ret = -ENOSPC;
                goto out_err;
        }
@@ -712,14 +717,14 @@ static int bcm_sf2_cfp_ipv6_rule_set(struct bcm_sf2_priv *priv, int port,
        /* Flag the second half rule as being used now, return it as the
         * location, and flag it as unique while dumping rules
         */
-       set_bit(rule_index[1], priv->cfp.used);
+       set_bit(rule_index[0], priv->cfp.used);
        set_bit(rule_index[1], priv->cfp.unique);
        fs->location = rule_index[1];
 
        return ret;
 
 out_err:
-       clear_bit(rule_index[0], priv->cfp.used);
+       clear_bit(rule_index[1], priv->cfp.used);
        return ret;
 }
 
@@ -785,10 +790,6 @@ static int bcm_sf2_cfp_rule_del_one(struct bcm_sf2_priv *priv, int port,
        int ret;
        u32 reg;
 
-       /* Refuse deletion of unused rules, and the default reserved rule */
-       if (!test_bit(loc, priv->cfp.used) || loc == 0)
-               return -EINVAL;
-
        /* Indicate which rule we want to read */
        bcm_sf2_cfp_rule_addr_set(priv, loc);
 
@@ -826,6 +827,13 @@ static int bcm_sf2_cfp_rule_del(struct bcm_sf2_priv *priv, int port,
        u32 next_loc = 0;
        int ret;
 
+       /* Refuse deleting unused rules, and those that are not unique since
+        * that could leave IPv6 rules with one of the chained rule in the
+        * table.
+        */
+       if (!test_bit(loc, priv->cfp.unique) || loc == 0)
+               return -EINVAL;
+
        ret = bcm_sf2_cfp_rule_del_one(priv, port, loc, &next_loc);
        if (ret)
                return ret;
index f77be9f85cb36f3d229bcf1aa9dac0e86e6187c6..58f14af04639b05902d98ae3ded6b78efb4662fc 100644 (file)
@@ -86,16 +86,23 @@ static int dsa_loop_setup(struct dsa_switch *ds)
        return 0;
 }
 
-static int dsa_loop_get_sset_count(struct dsa_switch *ds, int port)
+static int dsa_loop_get_sset_count(struct dsa_switch *ds, int port, int sset)
 {
+       if (sset != ETH_SS_STATS && sset != ETH_SS_PHY_STATS)
+               return 0;
+
        return __DSA_LOOP_CNT_MAX;
 }
 
-static void dsa_loop_get_strings(struct dsa_switch *ds, int port, uint8_t *data)
+static void dsa_loop_get_strings(struct dsa_switch *ds, int port,
+                                u32 stringset, uint8_t *data)
 {
        struct dsa_loop_priv *ps = ds->priv;
        unsigned int i;
 
+       if (stringset != ETH_SS_STATS && stringset != ETH_SS_PHY_STATS)
+               return;
+
        for (i = 0; i < __DSA_LOOP_CNT_MAX; i++)
                memcpy(data + i * ETH_GSTRING_LEN,
                       ps->ports[port].mib[i].name, ETH_GSTRING_LEN);
@@ -256,6 +263,7 @@ static const struct dsa_switch_ops dsa_loop_driver = {
        .get_strings            = dsa_loop_get_strings,
        .get_ethtool_stats      = dsa_loop_get_ethtool_stats,
        .get_sset_count         = dsa_loop_get_sset_count,
+       .get_ethtool_phy_stats  = dsa_loop_get_ethtool_stats,
        .phy_read               = dsa_loop_phy_read,
        .phy_write              = dsa_loop_phy_write,
        .port_bridge_join       = dsa_loop_port_bridge_join,
index fefa454f3e564fcdfef5e56602975f3e31a6a13e..b4f6e1a67dd9a1197f16b72ae0e412ba79c9a99c 100644 (file)
@@ -977,10 +977,14 @@ static const struct lan9303_mib_desc lan9303_mib[] = {
        { .offset = LAN9303_MAC_TX_LATECOL_0, .name = "TxLateCol", },
 };
 
-static void lan9303_get_strings(struct dsa_switch *ds, int port, uint8_t *data)
+static void lan9303_get_strings(struct dsa_switch *ds, int port,
+                               u32 stringset, uint8_t *data)
 {
        unsigned int u;
 
+       if (stringset != ETH_SS_STATS)
+               return;
+
        for (u = 0; u < ARRAY_SIZE(lan9303_mib); u++) {
                strncpy(data + u * ETH_GSTRING_LEN, lan9303_mib[u].name,
                        ETH_GSTRING_LEN);
@@ -1007,8 +1011,11 @@ static void lan9303_get_ethtool_stats(struct dsa_switch *ds, int port,
        }
 }
 
-static int lan9303_get_sset_count(struct dsa_switch *ds, int port)
+static int lan9303_get_sset_count(struct dsa_switch *ds, int port, int sset)
 {
+       if (sset != ETH_SS_STATS)
+               return 0;
+
        return ARRAY_SIZE(lan9303_mib);
 }
 
index bcb3e6c734f25e099ac9356738edb87a22a1eb43..7210c49b79222bf1cb0252533db05a9f5189f9da 100644 (file)
@@ -439,15 +439,22 @@ static void ksz_disable_port(struct dsa_switch *ds, int port,
        ksz_port_cfg(dev, port, REG_PORT_CTRL_0, PORT_MAC_LOOPBACK, true);
 }
 
-static int ksz_sset_count(struct dsa_switch *ds, int port)
+static int ksz_sset_count(struct dsa_switch *ds, int port, int sset)
 {
+       if (sset != ETH_SS_STATS)
+               return 0;
+
        return TOTAL_SWITCH_COUNTER_NUM;
 }
 
-static void ksz_get_strings(struct dsa_switch *ds, int port, uint8_t *buf)
+static void ksz_get_strings(struct dsa_switch *ds, int port,
+                           u32 stringset, uint8_t *buf)
 {
        int i;
 
+       if (stringset != ETH_SS_STATS)
+               return;
+
        for (i = 0; i < TOTAL_SWITCH_COUNTER_NUM; i++) {
                memcpy(buf + i * ETH_GSTRING_LEN, mib_names[i].string,
                       ETH_GSTRING_LEN);
index 80a4dbc3a49959556415a484b4dfd5b239449fb7..62e486652e622074b1ca1615e02a6cf029cc667b 100644 (file)
@@ -573,10 +573,14 @@ static int mt7530_phy_write(struct dsa_switch *ds, int port, int regnum,
 }
 
 static void
-mt7530_get_strings(struct dsa_switch *ds, int port, uint8_t *data)
+mt7530_get_strings(struct dsa_switch *ds, int port, u32 stringset,
+                  uint8_t *data)
 {
        int i;
 
+       if (stringset != ETH_SS_STATS)
+               return;
+
        for (i = 0; i < ARRAY_SIZE(mt7530_mib); i++)
                strncpy(data + i * ETH_GSTRING_LEN, mt7530_mib[i].name,
                        ETH_GSTRING_LEN);
@@ -604,8 +608,11 @@ mt7530_get_ethtool_stats(struct dsa_switch *ds, int port,
 }
 
 static int
-mt7530_get_sset_count(struct dsa_switch *ds, int port)
+mt7530_get_sset_count(struct dsa_switch *ds, int port, int sset)
 {
+       if (sset != ETH_SS_STATS)
+               return 0;
+
        return ARRAY_SIZE(mt7530_mib);
 }
 
index 3d2091099f7f266de920f575a4d30b417bd64558..12df00f593b775669c7fc77a8869da6b41d012fb 100644 (file)
 #include <linux/of_device.h>
 #include <linux/of_irq.h>
 #include <linux/of_mdio.h>
+#include <linux/platform_data/mv88e6xxx.h>
 #include <linux/netdevice.h>
 #include <linux/gpio/consumer.h>
 #include <linux/phy.h>
+#include <linux/phylink.h>
 #include <net/dsa.h>
 
 #include "chip.h"
@@ -580,6 +582,83 @@ static void mv88e6xxx_adjust_link(struct dsa_switch *ds, int port,
                dev_err(ds->dev, "p%d: failed to configure MAC\n", port);
 }
 
+static void mv88e6xxx_validate(struct dsa_switch *ds, int port,
+                              unsigned long *supported,
+                              struct phylink_link_state *state)
+{
+}
+
+static int mv88e6xxx_link_state(struct dsa_switch *ds, int port,
+                               struct phylink_link_state *state)
+{
+       struct mv88e6xxx_chip *chip = ds->priv;
+       int err;
+
+       mutex_lock(&chip->reg_lock);
+       err = mv88e6xxx_port_link_state(chip, port, state);
+       mutex_unlock(&chip->reg_lock);
+
+       return err;
+}
+
+static void mv88e6xxx_mac_config(struct dsa_switch *ds, int port,
+                                unsigned int mode,
+                                const struct phylink_link_state *state)
+{
+       struct mv88e6xxx_chip *chip = ds->priv;
+       int speed, duplex, link, err;
+
+       if (mode == MLO_AN_PHY)
+               return;
+
+       if (mode == MLO_AN_FIXED) {
+               link = LINK_FORCED_UP;
+               speed = state->speed;
+               duplex = state->duplex;
+       } else {
+               speed = SPEED_UNFORCED;
+               duplex = DUPLEX_UNFORCED;
+               link = LINK_UNFORCED;
+       }
+
+       mutex_lock(&chip->reg_lock);
+       err = mv88e6xxx_port_setup_mac(chip, port, link, speed, duplex,
+                                      state->interface);
+       mutex_unlock(&chip->reg_lock);
+
+       if (err && err != -EOPNOTSUPP)
+               dev_err(ds->dev, "p%d: failed to configure MAC\n", port);
+}
+
+static void mv88e6xxx_mac_link_force(struct dsa_switch *ds, int port, int link)
+{
+       struct mv88e6xxx_chip *chip = ds->priv;
+       int err;
+
+       mutex_lock(&chip->reg_lock);
+       err = chip->info->ops->port_set_link(chip, port, link);
+       mutex_unlock(&chip->reg_lock);
+
+       if (err)
+               dev_err(chip->dev, "p%d: failed to force MAC link\n", port);
+}
+
+static void mv88e6xxx_mac_link_down(struct dsa_switch *ds, int port,
+                                   unsigned int mode,
+                                   phy_interface_t interface)
+{
+       if (mode == MLO_AN_FIXED)
+               mv88e6xxx_mac_link_force(ds, port, LINK_FORCED_DOWN);
+}
+
+static void mv88e6xxx_mac_link_up(struct dsa_switch *ds, int port,
+                                 unsigned int mode, phy_interface_t interface,
+                                 struct phy_device *phydev)
+{
+       if (mode == MLO_AN_FIXED)
+               mv88e6xxx_mac_link_force(ds, port, LINK_FORCED_UP);
+}
+
 static int mv88e6xxx_stats_snapshot(struct mv88e6xxx_chip *chip, int port)
 {
        if (!chip->info->ops->stats_snapshot)
@@ -665,13 +744,13 @@ static uint64_t _mv88e6xxx_get_ethtool_stat(struct mv88e6xxx_chip *chip,
        case STATS_TYPE_PORT:
                err = mv88e6xxx_port_read(chip, port, s->reg, &reg);
                if (err)
-                       return UINT64_MAX;
+                       return U64_MAX;
 
                low = reg;
                if (s->size == 4) {
                        err = mv88e6xxx_port_read(chip, port, s->reg + 1, &reg);
                        if (err)
-                               return UINT64_MAX;
+                               return U64_MAX;
                        high = reg;
                }
                break;
@@ -685,7 +764,7 @@ static uint64_t _mv88e6xxx_get_ethtool_stat(struct mv88e6xxx_chip *chip,
                        mv88e6xxx_g1_stats_read(chip, reg + 1, &high);
                break;
        default:
-               return UINT64_MAX;
+               return U64_MAX;
        }
        value = (((u64)high) << 16) | low;
        return value;
@@ -742,11 +821,14 @@ static void mv88e6xxx_atu_vtu_get_strings(uint8_t *data)
 }
 
 static void mv88e6xxx_get_strings(struct dsa_switch *ds, int port,
-                                 uint8_t *data)
+                                 u32 stringset, uint8_t *data)
 {
        struct mv88e6xxx_chip *chip = ds->priv;
        int count = 0;
 
+       if (stringset != ETH_SS_STATS)
+               return;
+
        mutex_lock(&chip->reg_lock);
 
        if (chip->info->ops->stats_get_strings)
@@ -789,12 +871,15 @@ static int mv88e6320_stats_get_sset_count(struct mv88e6xxx_chip *chip)
                                              STATS_TYPE_BANK1);
 }
 
-static int mv88e6xxx_get_sset_count(struct dsa_switch *ds, int port)
+static int mv88e6xxx_get_sset_count(struct dsa_switch *ds, int port, int sset)
 {
        struct mv88e6xxx_chip *chip = ds->priv;
        int serdes_count = 0;
        int count = 0;
 
+       if (sset != ETH_SS_STATS)
+               return 0;
+
        mutex_lock(&chip->reg_lock);
        if (chip->info->ops->stats_get_sset_count)
                count = chip->info->ops->stats_get_sset_count(chip);
@@ -911,14 +996,6 @@ static void mv88e6xxx_get_ethtool_stats(struct dsa_switch *ds, int port,
 
 }
 
-static int mv88e6xxx_stats_set_histogram(struct mv88e6xxx_chip *chip)
-{
-       if (chip->info->ops->stats_set_histogram)
-               return chip->info->ops->stats_set_histogram(chip);
-
-       return 0;
-}
-
 static int mv88e6xxx_get_regs_len(struct dsa_switch *ds, int port)
 {
        return 32 * sizeof(u16);
@@ -1020,6 +1097,76 @@ static void mv88e6xxx_port_stp_state_set(struct dsa_switch *ds, int port,
                dev_err(ds->dev, "p%d: failed to update state\n", port);
 }
 
+static int mv88e6xxx_pri_setup(struct mv88e6xxx_chip *chip)
+{
+       int err;
+
+       if (chip->info->ops->ieee_pri_map) {
+               err = chip->info->ops->ieee_pri_map(chip);
+               if (err)
+                       return err;
+       }
+
+       if (chip->info->ops->ip_pri_map) {
+               err = chip->info->ops->ip_pri_map(chip);
+               if (err)
+                       return err;
+       }
+
+       return 0;
+}
+
+static int mv88e6xxx_devmap_setup(struct mv88e6xxx_chip *chip)
+{
+       int target, port;
+       int err;
+
+       if (!chip->info->global2_addr)
+               return 0;
+
+       /* Initialize the routing port to the 32 possible target devices */
+       for (target = 0; target < 32; target++) {
+               port = 0x1f;
+               if (target < DSA_MAX_SWITCHES)
+                       if (chip->ds->rtable[target] != DSA_RTABLE_NONE)
+                               port = chip->ds->rtable[target];
+
+               err = mv88e6xxx_g2_device_mapping_write(chip, target, port);
+               if (err)
+                       return err;
+       }
+
+       if (chip->info->ops->set_cascade_port) {
+               port = MV88E6XXX_CASCADE_PORT_MULTIPLE;
+               err = chip->info->ops->set_cascade_port(chip, port);
+               if (err)
+                       return err;
+       }
+
+       err = mv88e6xxx_g1_set_device_number(chip, chip->ds->index);
+       if (err)
+               return err;
+
+       return 0;
+}
+
+static int mv88e6xxx_trunk_setup(struct mv88e6xxx_chip *chip)
+{
+       /* Clear all trunk masks and mapping */
+       if (chip->info->global2_addr)
+               return mv88e6xxx_g2_trunk_clear(chip);
+
+       return 0;
+}
+
+static int mv88e6xxx_rmu_setup(struct mv88e6xxx_chip *chip)
+{
+       if (chip->info->ops->rmu_disable)
+               return chip->info->ops->rmu_disable(chip);
+
+       return 0;
+}
+
 static int mv88e6xxx_pot_setup(struct mv88e6xxx_chip *chip)
 {
        if (chip->info->ops->pot_clear)
@@ -2113,53 +2260,16 @@ static int mv88e6xxx_set_ageing_time(struct dsa_switch *ds,
        return err;
 }
 
-static int mv88e6xxx_g1_setup(struct mv88e6xxx_chip *chip)
+static int mv88e6xxx_stats_setup(struct mv88e6xxx_chip *chip)
 {
-       struct dsa_switch *ds = chip->ds;
        int err;
 
-       /* Disable remote management, and set the switch's DSA device number. */
-       err = mv88e6xxx_g1_write(chip, MV88E6XXX_G1_CTL2,
-                                MV88E6XXX_G1_CTL2_MULTIPLE_CASCADE |
-                                (ds->index & 0x1f));
-       if (err)
-               return err;
-
-       /* Configure the IP ToS mapping registers. */
-       err = mv88e6xxx_g1_write(chip, MV88E6XXX_G1_IP_PRI_0, 0x0000);
-       if (err)
-               return err;
-       err = mv88e6xxx_g1_write(chip, MV88E6XXX_G1_IP_PRI_1, 0x0000);
-       if (err)
-               return err;
-       err = mv88e6xxx_g1_write(chip, MV88E6XXX_G1_IP_PRI_2, 0x5555);
-       if (err)
-               return err;
-       err = mv88e6xxx_g1_write(chip, MV88E6XXX_G1_IP_PRI_3, 0x5555);
-       if (err)
-               return err;
-       err = mv88e6xxx_g1_write(chip, MV88E6XXX_G1_IP_PRI_4, 0xaaaa);
-       if (err)
-               return err;
-       err = mv88e6xxx_g1_write(chip, MV88E6XXX_G1_IP_PRI_5, 0xaaaa);
-       if (err)
-               return err;
-       err = mv88e6xxx_g1_write(chip, MV88E6XXX_G1_IP_PRI_6, 0xffff);
-       if (err)
-               return err;
-       err = mv88e6xxx_g1_write(chip, MV88E6XXX_G1_IP_PRI_7, 0xffff);
-       if (err)
-               return err;
-
-       /* Configure the IEEE 802.1p priority mapping register. */
-       err = mv88e6xxx_g1_write(chip, MV88E6XXX_G1_IEEE_PRI, 0xfa41);
-       if (err)
-               return err;
-
        /* Initialize the statistics unit */
-       err = mv88e6xxx_stats_set_histogram(chip);
-       if (err)
-               return err;
+       if (chip->info->ops->stats_set_histogram) {
+               err = chip->info->ops->stats_set_histogram(chip);
+               if (err)
+                       return err;
+       }
 
        return mv88e6xxx_g1_stats_clear(chip);
 }
@@ -2185,18 +2295,6 @@ static int mv88e6xxx_setup(struct dsa_switch *ds)
                        goto unlock;
        }
 
-       /* Setup Switch Global 1 Registers */
-       err = mv88e6xxx_g1_setup(chip);
-       if (err)
-               goto unlock;
-
-       /* Setup Switch Global 2 Registers */
-       if (chip->info->global2_addr) {
-               err = mv88e6xxx_g2_setup(chip);
-               if (err)
-                       goto unlock;
-       }
-
        err = mv88e6xxx_irl_setup(chip);
        if (err)
                goto unlock;
@@ -2229,10 +2327,26 @@ static int mv88e6xxx_setup(struct dsa_switch *ds)
        if (err)
                goto unlock;
 
+       err = mv88e6xxx_rmu_setup(chip);
+       if (err)
+               goto unlock;
+
        err = mv88e6xxx_rsvd2cpu_setup(chip);
        if (err)
                goto unlock;
 
+       err = mv88e6xxx_trunk_setup(chip);
+       if (err)
+               goto unlock;
+
+       err = mv88e6xxx_devmap_setup(chip);
+       if (err)
+               goto unlock;
+
+       err = mv88e6xxx_pri_setup(chip);
+       if (err)
+               goto unlock;
+
        /* Setup PTP Hardware Clock and timestamping */
        if (chip->info->ptp_support) {
                err = mv88e6xxx_ptp_setup(chip);
@@ -2244,6 +2358,10 @@ static int mv88e6xxx_setup(struct dsa_switch *ds)
                        goto unlock;
        }
 
+       err = mv88e6xxx_stats_setup(chip);
+       if (err)
+               goto unlock;
+
 unlock:
        mutex_unlock(&chip->reg_lock);
 
@@ -2337,10 +2455,7 @@ static int mv88e6xxx_mdio_register(struct mv88e6xxx_chip *chip,
                        return err;
        }
 
-       if (np)
-               err = of_mdiobus_register(bus, np);
-       else
-               err = mdiobus_register(bus);
+       err = of_mdiobus_register(bus, np);
        if (err) {
                dev_err(chip->dev, "Cannot register MDIO bus (%d)\n", err);
                mv88e6xxx_g2_irq_mdio_free(chip, bus);
@@ -2460,6 +2575,8 @@ static int mv88e6xxx_set_eeprom(struct dsa_switch *ds,
 
 static const struct mv88e6xxx_ops mv88e6085_ops = {
        /* MV88E6XXX_FAMILY_6097 */
+       .ieee_pri_map = mv88e6085_g1_ieee_pri_map,
+       .ip_pri_map = mv88e6085_g1_ip_pri_map,
        .irl_init_all = mv88e6352_g2_irl_init_all,
        .set_switch_mac = mv88e6xxx_g1_set_switch_mac,
        .phy_read = mv88e6185_phy_ppu_read,
@@ -2488,12 +2605,16 @@ static const struct mv88e6xxx_ops mv88e6085_ops = {
        .ppu_enable = mv88e6185_g1_ppu_enable,
        .ppu_disable = mv88e6185_g1_ppu_disable,
        .reset = mv88e6185_g1_reset,
+       .rmu_disable = mv88e6085_g1_rmu_disable,
        .vtu_getnext = mv88e6352_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
+       .serdes_power = mv88e6341_serdes_power,
 };
 
 static const struct mv88e6xxx_ops mv88e6095_ops = {
        /* MV88E6XXX_FAMILY_6095 */
+       .ieee_pri_map = mv88e6085_g1_ieee_pri_map,
+       .ip_pri_map = mv88e6085_g1_ip_pri_map,
        .set_switch_mac = mv88e6xxx_g1_set_switch_mac,
        .phy_read = mv88e6185_phy_ppu_read,
        .phy_write = mv88e6185_phy_ppu_write,
@@ -2518,6 +2639,8 @@ static const struct mv88e6xxx_ops mv88e6095_ops = {
 
 static const struct mv88e6xxx_ops mv88e6097_ops = {
        /* MV88E6XXX_FAMILY_6097 */
+       .ieee_pri_map = mv88e6085_g1_ieee_pri_map,
+       .ip_pri_map = mv88e6085_g1_ip_pri_map,
        .irl_init_all = mv88e6352_g2_irl_init_all,
        .set_switch_mac = mv88e6xxx_g2_set_switch_mac,
        .phy_read = mv88e6xxx_g2_smi_phy_read,
@@ -2545,12 +2668,15 @@ static const struct mv88e6xxx_ops mv88e6097_ops = {
        .mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu,
        .pot_clear = mv88e6xxx_g2_pot_clear,
        .reset = mv88e6352_g1_reset,
+       .rmu_disable = mv88e6085_g1_rmu_disable,
        .vtu_getnext = mv88e6352_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
 };
 
 static const struct mv88e6xxx_ops mv88e6123_ops = {
        /* MV88E6XXX_FAMILY_6165 */
+       .ieee_pri_map = mv88e6085_g1_ieee_pri_map,
+       .ip_pri_map = mv88e6085_g1_ip_pri_map,
        .irl_init_all = mv88e6352_g2_irl_init_all,
        .set_switch_mac = mv88e6xxx_g2_set_switch_mac,
        .phy_read = mv88e6xxx_g2_smi_phy_read,
@@ -2579,6 +2705,8 @@ static const struct mv88e6xxx_ops mv88e6123_ops = {
 
 static const struct mv88e6xxx_ops mv88e6131_ops = {
        /* MV88E6XXX_FAMILY_6185 */
+       .ieee_pri_map = mv88e6085_g1_ieee_pri_map,
+       .ip_pri_map = mv88e6085_g1_ip_pri_map,
        .set_switch_mac = mv88e6xxx_g1_set_switch_mac,
        .phy_read = mv88e6185_phy_ppu_read,
        .phy_write = mv88e6185_phy_ppu_write,
@@ -2603,6 +2731,7 @@ static const struct mv88e6xxx_ops mv88e6131_ops = {
        .watchdog_ops = &mv88e6097_watchdog_ops,
        .mgmt_rsvd2cpu = mv88e6185_g2_mgmt_rsvd2cpu,
        .ppu_enable = mv88e6185_g1_ppu_enable,
+       .set_cascade_port = mv88e6185_g1_set_cascade_port,
        .ppu_disable = mv88e6185_g1_ppu_disable,
        .reset = mv88e6185_g1_reset,
        .vtu_getnext = mv88e6185_g1_vtu_getnext,
@@ -2611,6 +2740,8 @@ static const struct mv88e6xxx_ops mv88e6131_ops = {
 
 static const struct mv88e6xxx_ops mv88e6141_ops = {
        /* MV88E6XXX_FAMILY_6341 */
+       .ieee_pri_map = mv88e6085_g1_ieee_pri_map,
+       .ip_pri_map = mv88e6085_g1_ip_pri_map,
        .irl_init_all = mv88e6352_g2_irl_init_all,
        .get_eeprom = mv88e6xxx_g2_get_eeprom8,
        .set_eeprom = mv88e6xxx_g2_set_eeprom8,
@@ -2648,6 +2779,8 @@ static const struct mv88e6xxx_ops mv88e6141_ops = {
 
 static const struct mv88e6xxx_ops mv88e6161_ops = {
        /* MV88E6XXX_FAMILY_6165 */
+       .ieee_pri_map = mv88e6085_g1_ieee_pri_map,
+       .ip_pri_map = mv88e6085_g1_ip_pri_map,
        .irl_init_all = mv88e6352_g2_irl_init_all,
        .set_switch_mac = mv88e6xxx_g2_set_switch_mac,
        .phy_read = mv88e6xxx_g2_smi_phy_read,
@@ -2681,6 +2814,8 @@ static const struct mv88e6xxx_ops mv88e6161_ops = {
 
 static const struct mv88e6xxx_ops mv88e6165_ops = {
        /* MV88E6XXX_FAMILY_6165 */
+       .ieee_pri_map = mv88e6085_g1_ieee_pri_map,
+       .ip_pri_map = mv88e6085_g1_ip_pri_map,
        .irl_init_all = mv88e6352_g2_irl_init_all,
        .set_switch_mac = mv88e6xxx_g2_set_switch_mac,
        .phy_read = mv88e6165_phy_read,
@@ -2707,6 +2842,8 @@ static const struct mv88e6xxx_ops mv88e6165_ops = {
 
 static const struct mv88e6xxx_ops mv88e6171_ops = {
        /* MV88E6XXX_FAMILY_6351 */
+       .ieee_pri_map = mv88e6085_g1_ieee_pri_map,
+       .ip_pri_map = mv88e6085_g1_ip_pri_map,
        .irl_init_all = mv88e6352_g2_irl_init_all,
        .set_switch_mac = mv88e6xxx_g2_set_switch_mac,
        .phy_read = mv88e6xxx_g2_smi_phy_read,
@@ -2741,6 +2878,8 @@ static const struct mv88e6xxx_ops mv88e6171_ops = {
 
 static const struct mv88e6xxx_ops mv88e6172_ops = {
        /* MV88E6XXX_FAMILY_6352 */
+       .ieee_pri_map = mv88e6085_g1_ieee_pri_map,
+       .ip_pri_map = mv88e6085_g1_ip_pri_map,
        .irl_init_all = mv88e6352_g2_irl_init_all,
        .get_eeprom = mv88e6xxx_g2_get_eeprom16,
        .set_eeprom = mv88e6xxx_g2_set_eeprom16,
@@ -2771,6 +2910,7 @@ static const struct mv88e6xxx_ops mv88e6172_ops = {
        .mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu,
        .pot_clear = mv88e6xxx_g2_pot_clear,
        .reset = mv88e6352_g1_reset,
+       .rmu_disable = mv88e6352_g1_rmu_disable,
        .vtu_getnext = mv88e6352_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
        .serdes_power = mv88e6352_serdes_power,
@@ -2779,6 +2919,8 @@ static const struct mv88e6xxx_ops mv88e6172_ops = {
 
 static const struct mv88e6xxx_ops mv88e6175_ops = {
        /* MV88E6XXX_FAMILY_6351 */
+       .ieee_pri_map = mv88e6085_g1_ieee_pri_map,
+       .ip_pri_map = mv88e6085_g1_ip_pri_map,
        .irl_init_all = mv88e6352_g2_irl_init_all,
        .set_switch_mac = mv88e6xxx_g2_set_switch_mac,
        .phy_read = mv88e6xxx_g2_smi_phy_read,
@@ -2809,10 +2951,13 @@ static const struct mv88e6xxx_ops mv88e6175_ops = {
        .reset = mv88e6352_g1_reset,
        .vtu_getnext = mv88e6352_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
+       .serdes_power = mv88e6341_serdes_power,
 };
 
 static const struct mv88e6xxx_ops mv88e6176_ops = {
        /* MV88E6XXX_FAMILY_6352 */
+       .ieee_pri_map = mv88e6085_g1_ieee_pri_map,
+       .ip_pri_map = mv88e6085_g1_ip_pri_map,
        .irl_init_all = mv88e6352_g2_irl_init_all,
        .get_eeprom = mv88e6xxx_g2_get_eeprom16,
        .set_eeprom = mv88e6xxx_g2_set_eeprom16,
@@ -2843,6 +2988,7 @@ static const struct mv88e6xxx_ops mv88e6176_ops = {
        .mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu,
        .pot_clear = mv88e6xxx_g2_pot_clear,
        .reset = mv88e6352_g1_reset,
+       .rmu_disable = mv88e6352_g1_rmu_disable,
        .vtu_getnext = mv88e6352_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
        .serdes_power = mv88e6352_serdes_power,
@@ -2851,6 +2997,8 @@ static const struct mv88e6xxx_ops mv88e6176_ops = {
 
 static const struct mv88e6xxx_ops mv88e6185_ops = {
        /* MV88E6XXX_FAMILY_6185 */
+       .ieee_pri_map = mv88e6085_g1_ieee_pri_map,
+       .ip_pri_map = mv88e6085_g1_ip_pri_map,
        .set_switch_mac = mv88e6xxx_g1_set_switch_mac,
        .phy_read = mv88e6185_phy_ppu_read,
        .phy_write = mv88e6185_phy_ppu_write,
@@ -2870,6 +3018,7 @@ static const struct mv88e6xxx_ops mv88e6185_ops = {
        .set_egress_port = mv88e6095_g1_set_egress_port,
        .watchdog_ops = &mv88e6097_watchdog_ops,
        .mgmt_rsvd2cpu = mv88e6185_g2_mgmt_rsvd2cpu,
+       .set_cascade_port = mv88e6185_g1_set_cascade_port,
        .ppu_enable = mv88e6185_g1_ppu_enable,
        .ppu_disable = mv88e6185_g1_ppu_disable,
        .reset = mv88e6185_g1_reset,
@@ -2907,6 +3056,7 @@ static const struct mv88e6xxx_ops mv88e6190_ops = {
        .mgmt_rsvd2cpu = mv88e6390_g1_mgmt_rsvd2cpu,
        .pot_clear = mv88e6xxx_g2_pot_clear,
        .reset = mv88e6352_g1_reset,
+       .rmu_disable = mv88e6390_g1_rmu_disable,
        .vtu_getnext = mv88e6390_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
        .serdes_power = mv88e6390_serdes_power,
@@ -2943,6 +3093,7 @@ static const struct mv88e6xxx_ops mv88e6190x_ops = {
        .mgmt_rsvd2cpu = mv88e6390_g1_mgmt_rsvd2cpu,
        .pot_clear = mv88e6xxx_g2_pot_clear,
        .reset = mv88e6352_g1_reset,
+       .rmu_disable = mv88e6390_g1_rmu_disable,
        .vtu_getnext = mv88e6390_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
        .serdes_power = mv88e6390_serdes_power,
@@ -2979,6 +3130,7 @@ static const struct mv88e6xxx_ops mv88e6191_ops = {
        .mgmt_rsvd2cpu = mv88e6390_g1_mgmt_rsvd2cpu,
        .pot_clear = mv88e6xxx_g2_pot_clear,
        .reset = mv88e6352_g1_reset,
+       .rmu_disable = mv88e6390_g1_rmu_disable,
        .vtu_getnext = mv88e6390_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
        .serdes_power = mv88e6390_serdes_power,
@@ -2986,6 +3138,8 @@ static const struct mv88e6xxx_ops mv88e6191_ops = {
 
 static const struct mv88e6xxx_ops mv88e6240_ops = {
        /* MV88E6XXX_FAMILY_6352 */
+       .ieee_pri_map = mv88e6085_g1_ieee_pri_map,
+       .ip_pri_map = mv88e6085_g1_ip_pri_map,
        .irl_init_all = mv88e6352_g2_irl_init_all,
        .get_eeprom = mv88e6xxx_g2_get_eeprom16,
        .set_eeprom = mv88e6xxx_g2_set_eeprom16,
@@ -3016,6 +3170,7 @@ static const struct mv88e6xxx_ops mv88e6240_ops = {
        .mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu,
        .pot_clear = mv88e6xxx_g2_pot_clear,
        .reset = mv88e6352_g1_reset,
+       .rmu_disable = mv88e6352_g1_rmu_disable,
        .vtu_getnext = mv88e6352_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
        .serdes_power = mv88e6352_serdes_power,
@@ -3054,6 +3209,7 @@ static const struct mv88e6xxx_ops mv88e6290_ops = {
        .mgmt_rsvd2cpu = mv88e6390_g1_mgmt_rsvd2cpu,
        .pot_clear = mv88e6xxx_g2_pot_clear,
        .reset = mv88e6352_g1_reset,
+       .rmu_disable = mv88e6390_g1_rmu_disable,
        .vtu_getnext = mv88e6390_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
        .serdes_power = mv88e6390_serdes_power,
@@ -3063,6 +3219,8 @@ static const struct mv88e6xxx_ops mv88e6290_ops = {
 
 static const struct mv88e6xxx_ops mv88e6320_ops = {
        /* MV88E6XXX_FAMILY_6320 */
+       .ieee_pri_map = mv88e6085_g1_ieee_pri_map,
+       .ip_pri_map = mv88e6085_g1_ip_pri_map,
        .irl_init_all = mv88e6352_g2_irl_init_all,
        .get_eeprom = mv88e6xxx_g2_get_eeprom16,
        .set_eeprom = mv88e6xxx_g2_set_eeprom16,
@@ -3099,6 +3257,8 @@ static const struct mv88e6xxx_ops mv88e6320_ops = {
 
 static const struct mv88e6xxx_ops mv88e6321_ops = {
        /* MV88E6XXX_FAMILY_6320 */
+       .ieee_pri_map = mv88e6085_g1_ieee_pri_map,
+       .ip_pri_map = mv88e6085_g1_ip_pri_map,
        .irl_init_all = mv88e6352_g2_irl_init_all,
        .get_eeprom = mv88e6xxx_g2_get_eeprom16,
        .set_eeprom = mv88e6xxx_g2_set_eeprom16,
@@ -3133,6 +3293,8 @@ static const struct mv88e6xxx_ops mv88e6321_ops = {
 
 static const struct mv88e6xxx_ops mv88e6341_ops = {
        /* MV88E6XXX_FAMILY_6341 */
+       .ieee_pri_map = mv88e6085_g1_ieee_pri_map,
+       .ip_pri_map = mv88e6085_g1_ip_pri_map,
        .irl_init_all = mv88e6352_g2_irl_init_all,
        .get_eeprom = mv88e6xxx_g2_get_eeprom8,
        .set_eeprom = mv88e6xxx_g2_set_eeprom8,
@@ -3171,6 +3333,8 @@ static const struct mv88e6xxx_ops mv88e6341_ops = {
 
 static const struct mv88e6xxx_ops mv88e6350_ops = {
        /* MV88E6XXX_FAMILY_6351 */
+       .ieee_pri_map = mv88e6085_g1_ieee_pri_map,
+       .ip_pri_map = mv88e6085_g1_ip_pri_map,
        .irl_init_all = mv88e6352_g2_irl_init_all,
        .set_switch_mac = mv88e6xxx_g2_set_switch_mac,
        .phy_read = mv88e6xxx_g2_smi_phy_read,
@@ -3205,6 +3369,8 @@ static const struct mv88e6xxx_ops mv88e6350_ops = {
 
 static const struct mv88e6xxx_ops mv88e6351_ops = {
        /* MV88E6XXX_FAMILY_6351 */
+       .ieee_pri_map = mv88e6085_g1_ieee_pri_map,
+       .ip_pri_map = mv88e6085_g1_ip_pri_map,
        .irl_init_all = mv88e6352_g2_irl_init_all,
        .set_switch_mac = mv88e6xxx_g2_set_switch_mac,
        .phy_read = mv88e6xxx_g2_smi_phy_read,
@@ -3240,6 +3406,8 @@ static const struct mv88e6xxx_ops mv88e6351_ops = {
 
 static const struct mv88e6xxx_ops mv88e6352_ops = {
        /* MV88E6XXX_FAMILY_6352 */
+       .ieee_pri_map = mv88e6085_g1_ieee_pri_map,
+       .ip_pri_map = mv88e6085_g1_ip_pri_map,
        .irl_init_all = mv88e6352_g2_irl_init_all,
        .get_eeprom = mv88e6xxx_g2_get_eeprom16,
        .set_eeprom = mv88e6xxx_g2_set_eeprom16,
@@ -3270,6 +3438,7 @@ static const struct mv88e6xxx_ops mv88e6352_ops = {
        .mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu,
        .pot_clear = mv88e6xxx_g2_pot_clear,
        .reset = mv88e6352_g1_reset,
+       .rmu_disable = mv88e6352_g1_rmu_disable,
        .vtu_getnext = mv88e6352_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
        .serdes_power = mv88e6352_serdes_power,
@@ -3313,6 +3482,7 @@ static const struct mv88e6xxx_ops mv88e6390_ops = {
        .mgmt_rsvd2cpu = mv88e6390_g1_mgmt_rsvd2cpu,
        .pot_clear = mv88e6xxx_g2_pot_clear,
        .reset = mv88e6352_g1_reset,
+       .rmu_disable = mv88e6390_g1_rmu_disable,
        .vtu_getnext = mv88e6390_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
        .serdes_power = mv88e6390_serdes_power,
@@ -3353,6 +3523,7 @@ static const struct mv88e6xxx_ops mv88e6390x_ops = {
        .mgmt_rsvd2cpu = mv88e6390_g1_mgmt_rsvd2cpu,
        .pot_clear = mv88e6xxx_g2_pot_clear,
        .reset = mv88e6352_g1_reset,
+       .rmu_disable = mv88e6390_g1_rmu_disable,
        .vtu_getnext = mv88e6390_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
        .serdes_power = mv88e6390_serdes_power,
@@ -3370,6 +3541,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .num_internal_phys = 5,
                .max_vid = 4095,
                .port_base_addr = 0x10,
+               .phy_base_addr = 0x0,
                .global1_addr = 0x1b,
                .global2_addr = 0x1c,
                .age_time_coeff = 15000,
@@ -3391,6 +3563,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .num_internal_phys = 0,
                .max_vid = 4095,
                .port_base_addr = 0x10,
+               .phy_base_addr = 0x0,
                .global1_addr = 0x1b,
                .global2_addr = 0x1c,
                .age_time_coeff = 15000,
@@ -3410,6 +3583,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .num_internal_phys = 8,
                .max_vid = 4095,
                .port_base_addr = 0x10,
+               .phy_base_addr = 0x0,
                .global1_addr = 0x1b,
                .global2_addr = 0x1c,
                .age_time_coeff = 15000,
@@ -3431,6 +3605,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .num_internal_phys = 5,
                .max_vid = 4095,
                .port_base_addr = 0x10,
+               .phy_base_addr = 0x0,
                .global1_addr = 0x1b,
                .global2_addr = 0x1c,
                .age_time_coeff = 15000,
@@ -3452,6 +3627,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .num_internal_phys = 0,
                .max_vid = 4095,
                .port_base_addr = 0x10,
+               .phy_base_addr = 0x0,
                .global1_addr = 0x1b,
                .global2_addr = 0x1c,
                .age_time_coeff = 15000,
@@ -3472,6 +3648,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .num_gpio = 11,
                .max_vid = 4095,
                .port_base_addr = 0x10,
+               .phy_base_addr = 0x10,
                .global1_addr = 0x1b,
                .global2_addr = 0x1c,
                .age_time_coeff = 3750,
@@ -3493,6 +3670,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .num_internal_phys = 5,
                .max_vid = 4095,
                .port_base_addr = 0x10,
+               .phy_base_addr = 0x0,
                .global1_addr = 0x1b,
                .global2_addr = 0x1c,
                .age_time_coeff = 15000,
@@ -3514,6 +3692,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .num_internal_phys = 0,
                .max_vid = 4095,
                .port_base_addr = 0x10,
+               .phy_base_addr = 0x0,
                .global1_addr = 0x1b,
                .global2_addr = 0x1c,
                .age_time_coeff = 15000,
@@ -3535,6 +3714,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .num_internal_phys = 5,
                .max_vid = 4095,
                .port_base_addr = 0x10,
+               .phy_base_addr = 0x0,
                .global1_addr = 0x1b,
                .global2_addr = 0x1c,
                .age_time_coeff = 15000,
@@ -3557,6 +3737,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .num_gpio = 15,
                .max_vid = 4095,
                .port_base_addr = 0x10,
+               .phy_base_addr = 0x0,
                .global1_addr = 0x1b,
                .global2_addr = 0x1c,
                .age_time_coeff = 15000,
@@ -3578,6 +3759,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .num_internal_phys = 5,
                .max_vid = 4095,
                .port_base_addr = 0x10,
+               .phy_base_addr = 0x0,
                .global1_addr = 0x1b,
                .global2_addr = 0x1c,
                .age_time_coeff = 15000,
@@ -3600,6 +3782,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .num_gpio = 15,
                .max_vid = 4095,
                .port_base_addr = 0x10,
+               .phy_base_addr = 0x0,
                .global1_addr = 0x1b,
                .global2_addr = 0x1c,
                .age_time_coeff = 15000,
@@ -3621,6 +3804,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .num_internal_phys = 0,
                .max_vid = 4095,
                .port_base_addr = 0x10,
+               .phy_base_addr = 0x0,
                .global1_addr = 0x1b,
                .global2_addr = 0x1c,
                .age_time_coeff = 15000,
@@ -3641,6 +3825,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .num_gpio = 16,
                .max_vid = 8191,
                .port_base_addr = 0x0,
+               .phy_base_addr = 0x0,
                .global1_addr = 0x1b,
                .global2_addr = 0x1c,
                .tag_protocol = DSA_TAG_PROTO_DSA,
@@ -3663,6 +3848,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .num_gpio = 16,
                .max_vid = 8191,
                .port_base_addr = 0x0,
+               .phy_base_addr = 0x0,
                .global1_addr = 0x1b,
                .global2_addr = 0x1c,
                .age_time_coeff = 3750,
@@ -3684,6 +3870,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .num_internal_phys = 11,
                .max_vid = 8191,
                .port_base_addr = 0x0,
+               .phy_base_addr = 0x0,
                .global1_addr = 0x1b,
                .global2_addr = 0x1c,
                .age_time_coeff = 3750,
@@ -3707,6 +3894,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .num_gpio = 15,
                .max_vid = 4095,
                .port_base_addr = 0x10,
+               .phy_base_addr = 0x0,
                .global1_addr = 0x1b,
                .global2_addr = 0x1c,
                .age_time_coeff = 15000,
@@ -3730,6 +3918,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .num_gpio = 16,
                .max_vid = 8191,
                .port_base_addr = 0x0,
+               .phy_base_addr = 0x0,
                .global1_addr = 0x1b,
                .global2_addr = 0x1c,
                .age_time_coeff = 3750,
@@ -3753,6 +3942,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .num_gpio = 15,
                .max_vid = 4095,
                .port_base_addr = 0x10,
+               .phy_base_addr = 0x0,
                .global1_addr = 0x1b,
                .global2_addr = 0x1c,
                .age_time_coeff = 15000,
@@ -3776,6 +3966,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .num_gpio = 15,
                .max_vid = 4095,
                .port_base_addr = 0x10,
+               .phy_base_addr = 0x0,
                .global1_addr = 0x1b,
                .global2_addr = 0x1c,
                .age_time_coeff = 15000,
@@ -3798,6 +3989,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .num_gpio = 11,
                .max_vid = 4095,
                .port_base_addr = 0x10,
+               .phy_base_addr = 0x10,
                .global1_addr = 0x1b,
                .global2_addr = 0x1c,
                .age_time_coeff = 3750,
@@ -3820,6 +4012,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .num_internal_phys = 5,
                .max_vid = 4095,
                .port_base_addr = 0x10,
+               .phy_base_addr = 0x0,
                .global1_addr = 0x1b,
                .global2_addr = 0x1c,
                .age_time_coeff = 15000,
@@ -3841,6 +4034,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .num_internal_phys = 5,
                .max_vid = 4095,
                .port_base_addr = 0x10,
+               .phy_base_addr = 0x0,
                .global1_addr = 0x1b,
                .global2_addr = 0x1c,
                .age_time_coeff = 15000,
@@ -3863,6 +4057,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .num_gpio = 15,
                .max_vid = 4095,
                .port_base_addr = 0x10,
+               .phy_base_addr = 0x0,
                .global1_addr = 0x1b,
                .global2_addr = 0x1c,
                .age_time_coeff = 15000,
@@ -3885,6 +4080,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .num_gpio = 16,
                .max_vid = 8191,
                .port_base_addr = 0x0,
+               .phy_base_addr = 0x0,
                .global1_addr = 0x1b,
                .global2_addr = 0x1c,
                .age_time_coeff = 3750,
@@ -3907,6 +4103,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .num_gpio = 16,
                .max_vid = 8191,
                .port_base_addr = 0x0,
+               .phy_base_addr = 0x0,
                .global1_addr = 0x1b,
                .global2_addr = 0x1c,
                .age_time_coeff = 3750,
@@ -4099,6 +4296,11 @@ static const struct dsa_switch_ops mv88e6xxx_switch_ops = {
        .get_tag_protocol       = mv88e6xxx_get_tag_protocol,
        .setup                  = mv88e6xxx_setup,
        .adjust_link            = mv88e6xxx_adjust_link,
+       .phylink_validate       = mv88e6xxx_validate,
+       .phylink_mac_link_state = mv88e6xxx_link_state,
+       .phylink_mac_config     = mv88e6xxx_mac_config,
+       .phylink_mac_link_down  = mv88e6xxx_mac_link_down,
+       .phylink_mac_link_up    = mv88e6xxx_mac_link_up,
        .get_strings            = mv88e6xxx_get_strings,
        .get_ethtool_stats      = mv88e6xxx_get_ethtool_stats,
        .get_sset_count         = mv88e6xxx_get_sset_count,
@@ -4149,6 +4351,7 @@ static int mv88e6xxx_register_switch(struct mv88e6xxx_chip *chip)
                return -ENOMEM;
 
        ds->priv = chip;
+       ds->dev = dev;
        ds->ops = &mv88e6xxx_switch_ops;
        ds->ageing_time_min = chip->info->age_time_coeff;
        ds->ageing_time_max = chip->info->age_time_coeff * U8_MAX;
@@ -4163,42 +4366,82 @@ static void mv88e6xxx_unregister_switch(struct mv88e6xxx_chip *chip)
        dsa_unregister_switch(chip->ds);
 }
 
+static const void *pdata_device_get_match_data(struct device *dev)
+{
+       const struct of_device_id *matches = dev->driver->of_match_table;
+       const struct dsa_mv88e6xxx_pdata *pdata = dev->platform_data;
+
+       for (; matches->name[0] || matches->type[0] || matches->compatible[0];
+            matches++) {
+               if (!strcmp(pdata->compatible, matches->compatible))
+                       return matches->data;
+       }
+       return NULL;
+}
+
 static int mv88e6xxx_probe(struct mdio_device *mdiodev)
 {
+       struct dsa_mv88e6xxx_pdata *pdata = mdiodev->dev.platform_data;
+       const struct mv88e6xxx_info *compat_info = NULL;
        struct device *dev = &mdiodev->dev;
        struct device_node *np = dev->of_node;
-       const struct mv88e6xxx_info *compat_info;
        struct mv88e6xxx_chip *chip;
-       u32 eeprom_len;
+       int port;
        int err;
 
-       compat_info = of_device_get_match_data(dev);
+       if (np)
+               compat_info = of_device_get_match_data(dev);
+
+       if (pdata) {
+               compat_info = pdata_device_get_match_data(dev);
+
+               if (!pdata->netdev)
+                       return -EINVAL;
+
+               for (port = 0; port < DSA_MAX_PORTS; port++) {
+                       if (!(pdata->enabled_ports & (1 << port)))
+                               continue;
+                       if (strcmp(pdata->cd.port_names[port], "cpu"))
+                               continue;
+                       pdata->cd.netdev[port] = &pdata->netdev->dev;
+                       break;
+               }
+       }
+
        if (!compat_info)
                return -EINVAL;
 
        chip = mv88e6xxx_alloc_chip(dev);
-       if (!chip)
-               return -ENOMEM;
+       if (!chip) {
+               err = -ENOMEM;
+               goto out;
+       }
 
        chip->info = compat_info;
 
        err = mv88e6xxx_smi_init(chip, mdiodev->bus, mdiodev->addr);
        if (err)
-               return err;
+               goto out;
 
        chip->reset = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_LOW);
-       if (IS_ERR(chip->reset))
-               return PTR_ERR(chip->reset);
+       if (IS_ERR(chip->reset)) {
+               err = PTR_ERR(chip->reset);
+               goto out;
+       }
 
        err = mv88e6xxx_detect(chip);
        if (err)
-               return err;
+               goto out;
 
        mv88e6xxx_phy_init(chip);
 
-       if (chip->info->ops->get_eeprom &&
-           !of_property_read_u32(np, "eeprom-length", &eeprom_len))
-               chip->eeprom_len = eeprom_len;
+       if (chip->info->ops->get_eeprom) {
+               if (np)
+                       of_property_read_u32(np, "eeprom-length",
+                                            &chip->eeprom_len);
+               else
+                       chip->eeprom_len = pdata->eeprom_len;
+       }
 
        mutex_lock(&chip->reg_lock);
        err = mv88e6xxx_switch_reset(chip);
@@ -4267,6 +4510,9 @@ static int mv88e6xxx_probe(struct mdio_device *mdiodev)
                mv88e6xxx_irq_poll_free(chip);
        mutex_unlock(&chip->reg_lock);
 out:
+       if (pdata)
+               dev_put(pdata->netdev);
+
        return err;
 }
 
index 80490f66bc066eb7c13b64c69c96f7287f480db2..8ac3fbb15352d85f3f38ee4ffa30d551d34bc337 100644 (file)
 #include <linux/timecounter.h>
 #include <net/dsa.h>
 
-#ifndef UINT64_MAX
-#define UINT64_MAX             (u64)(~((u64)0))
-#endif
-
 #define SMI_CMD                        0x00
 #define SMI_CMD_BUSY           BIT(15)
 #define SMI_CMD_CLAUSE_22      BIT(12)
@@ -114,6 +110,7 @@ struct mv88e6xxx_info {
        unsigned int num_gpio;
        unsigned int max_vid;
        unsigned int port_base_addr;
+       unsigned int phy_base_addr;
        unsigned int global1_addr;
        unsigned int global2_addr;
        unsigned int age_time_coeff;
@@ -241,7 +238,7 @@ struct mv88e6xxx_chip {
        struct gpio_desc *reset;
 
        /* set to size of eeprom if supported by the switch */
-       int             eeprom_len;
+       u32 eeprom_len;
 
        /* List of mdio busses */
        struct list_head mdios;
@@ -297,6 +294,9 @@ struct mv88e6xxx_mdio_bus {
 };
 
 struct mv88e6xxx_ops {
+       int (*ieee_pri_map)(struct mv88e6xxx_chip *chip);
+       int (*ip_pri_map)(struct mv88e6xxx_chip *chip);
+
        /* Ingress Rate Limit unit (IRL) operations */
        int (*irl_init_all)(struct mv88e6xxx_chip *chip, int port);
 
@@ -405,6 +405,12 @@ struct mv88e6xxx_ops {
                               uint64_t *data);
        int (*set_cpu_port)(struct mv88e6xxx_chip *chip, int port);
        int (*set_egress_port)(struct mv88e6xxx_chip *chip, int port);
+
+#define MV88E6XXX_CASCADE_PORT_NONE            0xe
+#define MV88E6XXX_CASCADE_PORT_MULTIPLE                0xf
+
+       int (*set_cascade_port)(struct mv88e6xxx_chip *chip, int port);
+
        const struct mv88e6xxx_irq_ops *watchdog_ops;
 
        int (*mgmt_rsvd2cpu)(struct mv88e6xxx_chip *chip);
@@ -430,6 +436,9 @@ struct mv88e6xxx_ops {
 
        /* Interface to the AVB/PTP registers */
        const struct mv88e6xxx_avb_ops *avb_ops;
+
+       /* Remote Management Unit operations */
+       int (*rmu_disable)(struct mv88e6xxx_chip *chip);
 };
 
 struct mv88e6xxx_irq_ops {
index b43bd6476632e7ea25d867a1b49c5b32afa815de..d721ccf7d8bed8230fa5fbbac2ecfd5068680cac 100644 (file)
@@ -241,6 +241,64 @@ int mv88e6185_g1_ppu_disable(struct mv88e6xxx_chip *chip)
        return mv88e6185_g1_wait_ppu_disabled(chip);
 }
 
+/* Offset 0x10: IP-PRI Mapping Register 0
+ * Offset 0x11: IP-PRI Mapping Register 1
+ * Offset 0x12: IP-PRI Mapping Register 2
+ * Offset 0x13: IP-PRI Mapping Register 3
+ * Offset 0x14: IP-PRI Mapping Register 4
+ * Offset 0x15: IP-PRI Mapping Register 5
+ * Offset 0x16: IP-PRI Mapping Register 6
+ * Offset 0x17: IP-PRI Mapping Register 7
+ */
+
+int mv88e6085_g1_ip_pri_map(struct mv88e6xxx_chip *chip)
+{
+       int err;
+
+       /* Reset the IP TOS/DiffServ/Traffic priorities to defaults */
+       err = mv88e6xxx_g1_write(chip, MV88E6XXX_G1_IP_PRI_0, 0x0000);
+       if (err)
+               return err;
+
+       err = mv88e6xxx_g1_write(chip, MV88E6XXX_G1_IP_PRI_1, 0x0000);
+       if (err)
+               return err;
+
+       err = mv88e6xxx_g1_write(chip, MV88E6XXX_G1_IP_PRI_2, 0x5555);
+       if (err)
+               return err;
+
+       err = mv88e6xxx_g1_write(chip, MV88E6XXX_G1_IP_PRI_3, 0x5555);
+       if (err)
+               return err;
+
+       err = mv88e6xxx_g1_write(chip, MV88E6XXX_G1_IP_PRI_4, 0xaaaa);
+       if (err)
+               return err;
+
+       err = mv88e6xxx_g1_write(chip, MV88E6XXX_G1_IP_PRI_5, 0xaaaa);
+       if (err)
+               return err;
+
+       err = mv88e6xxx_g1_write(chip, MV88E6XXX_G1_IP_PRI_6, 0xffff);
+       if (err)
+               return err;
+
+       err = mv88e6xxx_g1_write(chip, MV88E6XXX_G1_IP_PRI_7, 0xffff);
+       if (err)
+               return err;
+
+       return 0;
+}
+
+/* Offset 0x18: IEEE-PRI Register */
+
+int mv88e6085_g1_ieee_pri_map(struct mv88e6xxx_chip *chip)
+{
+       /* Reset the IEEE Tag priorities to defaults */
+       return mv88e6xxx_g1_write(chip, MV88E6XXX_G1_IEEE_PRI, 0xfa41);
+}
+
 /* Offset 0x1a: Monitor Control */
 /* Offset 0x1a: Monitor & MGMT Control on some devices */
 
@@ -350,20 +408,59 @@ int mv88e6390_g1_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip)
 
 /* Offset 0x1c: Global Control 2 */
 
-int mv88e6390_g1_stats_set_histogram(struct mv88e6xxx_chip *chip)
+static int mv88e6xxx_g1_ctl2_mask(struct mv88e6xxx_chip *chip, u16 mask,
+                                 u16 val)
 {
-       u16 val;
+       u16 reg;
        int err;
 
-       err = mv88e6xxx_g1_read(chip, MV88E6XXX_G1_CTL2, &val);
+       err = mv88e6xxx_g1_read(chip, MV88E6XXX_G1_CTL2, &reg);
        if (err)
                return err;
 
-       val |= MV88E6XXX_G1_CTL2_HIST_RX_TX;
+       reg &= ~mask;
+       reg |= val & mask;
 
-       err = mv88e6xxx_g1_write(chip, MV88E6XXX_G1_CTL2, val);
+       return mv88e6xxx_g1_write(chip, MV88E6XXX_G1_CTL2, reg);
+}
 
-       return err;
+int mv88e6185_g1_set_cascade_port(struct mv88e6xxx_chip *chip, int port)
+{
+       const u16 mask = MV88E6185_G1_CTL2_CASCADE_PORT_MASK;
+
+       return mv88e6xxx_g1_ctl2_mask(chip, mask, port << __bf_shf(mask));
+}
+
+int mv88e6085_g1_rmu_disable(struct mv88e6xxx_chip *chip)
+{
+       return mv88e6xxx_g1_ctl2_mask(chip, MV88E6085_G1_CTL2_P10RM |
+                                     MV88E6085_G1_CTL2_RM_ENABLE, 0);
+}
+
+int mv88e6352_g1_rmu_disable(struct mv88e6xxx_chip *chip)
+{
+       return mv88e6xxx_g1_ctl2_mask(chip, MV88E6352_G1_CTL2_RMU_MODE_MASK,
+                                     MV88E6352_G1_CTL2_RMU_MODE_DISABLED);
+}
+
+int mv88e6390_g1_rmu_disable(struct mv88e6xxx_chip *chip)
+{
+       return mv88e6xxx_g1_ctl2_mask(chip, MV88E6390_G1_CTL2_RMU_MODE_MASK,
+                                     MV88E6390_G1_CTL2_RMU_MODE_DISABLED);
+}
+
+int mv88e6390_g1_stats_set_histogram(struct mv88e6xxx_chip *chip)
+{
+       return mv88e6xxx_g1_ctl2_mask(chip, MV88E6390_G1_CTL2_HIST_MODE_MASK,
+                                     MV88E6390_G1_CTL2_HIST_MODE_RX |
+                                     MV88E6390_G1_CTL2_HIST_MODE_TX);
+}
+
+int mv88e6xxx_g1_set_device_number(struct mv88e6xxx_chip *chip, int index)
+{
+       return mv88e6xxx_g1_ctl2_mask(chip,
+                                     MV88E6XXX_G1_CTL2_DEVICE_NUMBER_MASK,
+                                     index);
 }
 
 /* Offset 0x1d: Statistics Operation 2 */
index 6aee7316fea65cd439facd0e2945daa4d6115a7c..7c791c1da4b98af8cdd2885f91ce18ae119ae1d2 100644 (file)
 
 /* Offset 0x1C: Global Control 2 */
 #define MV88E6XXX_G1_CTL2                      0x1c
-#define MV88E6XXX_G1_CTL2_NO_CASCADE           0xe000
-#define MV88E6XXX_G1_CTL2_MULTIPLE_CASCADE     0xf000
-#define MV88E6XXX_G1_CTL2_HIST_RX              0x0040
-#define MV88E6XXX_G1_CTL2_HIST_TX              0x0080
-#define MV88E6XXX_G1_CTL2_HIST_RX_TX           0x00c0
+#define MV88E6185_G1_CTL2_CASCADE_PORT_MASK    0xf000
+#define MV88E6185_G1_CTL2_CASCADE_PORT_NONE    0xe000
+#define MV88E6185_G1_CTL2_CASCADE_PORT_MULTI   0xf000
+#define MV88E6352_G1_CTL2_HEADER_TYPE_MASK     0xc000
+#define MV88E6352_G1_CTL2_HEADER_TYPE_ORIG     0x0000
+#define MV88E6352_G1_CTL2_HEADER_TYPE_MGMT     0x4000
+#define MV88E6390_G1_CTL2_HEADER_TYPE_LAG      0x8000
+#define MV88E6352_G1_CTL2_RMU_MODE_MASK                0x3000
+#define MV88E6352_G1_CTL2_RMU_MODE_DISABLED    0x0000
+#define MV88E6352_G1_CTL2_RMU_MODE_PORT_4      0x1000
+#define MV88E6352_G1_CTL2_RMU_MODE_PORT_5      0x2000
+#define MV88E6352_G1_CTL2_RMU_MODE_PORT_6      0x3000
+#define MV88E6085_G1_CTL2_DA_CHECK             0x4000
+#define MV88E6085_G1_CTL2_P10RM                        0x2000
+#define MV88E6085_G1_CTL2_RM_ENABLE            0x1000
+#define MV88E6352_G1_CTL2_DA_CHECK             0x0800
+#define MV88E6390_G1_CTL2_RMU_MODE_MASK                0x0700
+#define MV88E6390_G1_CTL2_RMU_MODE_PORT_0      0x0000
+#define MV88E6390_G1_CTL2_RMU_MODE_PORT_1      0x0100
+#define MV88E6390_G1_CTL2_RMU_MODE_PORT_9      0x0200
+#define MV88E6390_G1_CTL2_RMU_MODE_PORT_10     0x0300
+#define MV88E6390_G1_CTL2_RMU_MODE_ALL_DSA     0x0600
+#define MV88E6390_G1_CTL2_RMU_MODE_DISABLED    0x0700
+#define MV88E6390_G1_CTL2_HIST_MODE_MASK       0x00c0
+#define MV88E6390_G1_CTL2_HIST_MODE_RX         0x0040
+#define MV88E6390_G1_CTL2_HIST_MODE_TX         0x0080
+#define MV88E6352_G1_CTL2_CTR_MODE_MASK                0x0060
+#define MV88E6390_G1_CTL2_CTR_MODE             0x0020
+#define MV88E6XXX_G1_CTL2_DEVICE_NUMBER_MASK   0x001f
 
 /* Offset 0x1D: Stats Operation Register */
 #define MV88E6XXX_G1_STATS_OP                  0x1d
@@ -253,6 +277,17 @@ int mv88e6095_g1_set_cpu_port(struct mv88e6xxx_chip *chip, int port);
 int mv88e6390_g1_set_cpu_port(struct mv88e6xxx_chip *chip, int port);
 int mv88e6390_g1_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip);
 
+int mv88e6085_g1_ip_pri_map(struct mv88e6xxx_chip *chip);
+int mv88e6085_g1_ieee_pri_map(struct mv88e6xxx_chip *chip);
+
+int mv88e6185_g1_set_cascade_port(struct mv88e6xxx_chip *chip, int port);
+
+int mv88e6085_g1_rmu_disable(struct mv88e6xxx_chip *chip);
+int mv88e6352_g1_rmu_disable(struct mv88e6xxx_chip *chip);
+int mv88e6390_g1_rmu_disable(struct mv88e6xxx_chip *chip);
+
+int mv88e6xxx_g1_set_device_number(struct mv88e6xxx_chip *chip, int index);
+
 int mv88e6xxx_g1_atu_set_learn2all(struct mv88e6xxx_chip *chip, bool learn2all);
 int mv88e6xxx_g1_atu_set_age_time(struct mv88e6xxx_chip *chip,
                                  unsigned int msecs);
index 0ce627fded48f2182676d46194c363d30bb93287..91a3cb2452acc1e901b82c113aac9ba07504a3af 100644 (file)
@@ -119,37 +119,17 @@ int mv88e6352_g2_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip)
 
 /* Offset 0x06: Device Mapping Table register */
 
-static int mv88e6xxx_g2_device_mapping_write(struct mv88e6xxx_chip *chip,
-                                            int target, int port)
+int mv88e6xxx_g2_device_mapping_write(struct mv88e6xxx_chip *chip, int target,
+                                     int port)
 {
-       u16 val = (target << 8) | (port & 0xf);
+       u16 val = (target << 8) | (port & 0x1f);
+       /* Modern chips use 5 bits to define a device mapping port,
+        * but bit 4 is reserved on older chips, so it is safe to use.
+        */
 
        return mv88e6xxx_g2_update(chip, MV88E6XXX_G2_DEVICE_MAPPING, val);
 }
 
-static int mv88e6xxx_g2_set_device_mapping(struct mv88e6xxx_chip *chip)
-{
-       int target, port;
-       int err;
-
-       /* Initialize the routing port to the 32 possible target devices */
-       for (target = 0; target < 32; ++target) {
-               port = 0xf;
-
-               if (target < DSA_MAX_SWITCHES) {
-                       port = chip->ds->rtable[target];
-                       if (port == DSA_RTABLE_NONE)
-                               port = 0xf;
-               }
-
-               err = mv88e6xxx_g2_device_mapping_write(chip, target, port);
-               if (err)
-                       break;
-       }
-
-       return err;
-}
-
 /* Offset 0x07: Trunk Mask Table register */
 
 static int mv88e6xxx_g2_trunk_mask_write(struct mv88e6xxx_chip *chip, int num,
@@ -174,7 +154,7 @@ static int mv88e6xxx_g2_trunk_mapping_write(struct mv88e6xxx_chip *chip, int id,
        return mv88e6xxx_g2_update(chip, MV88E6XXX_G2_TRUNK_MAPPING, val);
 }
 
-static int mv88e6xxx_g2_clear_trunk(struct mv88e6xxx_chip *chip)
+int mv88e6xxx_g2_trunk_clear(struct mv88e6xxx_chip *chip)
 {
        const u16 port_mask = BIT(mv88e6xxx_num_ports(chip)) - 1;
        int i, err;
@@ -1067,9 +1047,6 @@ int mv88e6xxx_g2_irq_setup(struct mv88e6xxx_chip *chip)
 {
        int err, irq, virq;
 
-       if (!chip->dev->of_node)
-               return -EINVAL;
-
        chip->g2_irq.domain = irq_domain_add_simple(
                chip->dev->of_node, 16, 0, &mv88e6xxx_g2_irq_domain_ops, chip);
        if (!chip->g2_irq.domain)
@@ -1118,7 +1095,7 @@ int mv88e6xxx_g2_irq_mdio_setup(struct mv88e6xxx_chip *chip,
                        err = irq;
                        goto out;
                }
-               bus->irq[chip->info->port_base_addr + phy] = irq;
+               bus->irq[chip->info->phy_base_addr + phy] = irq;
        }
        return 0;
 out:
@@ -1138,31 +1115,3 @@ void mv88e6xxx_g2_irq_mdio_free(struct mv88e6xxx_chip *chip,
        for (phy = 0; phy < chip->info->num_internal_phys; phy++)
                irq_dispose_mapping(bus->irq[phy]);
 }
-
-int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip)
-{
-       u16 reg;
-       int err;
-
-       /* Ignore removed tag data on doubly tagged packets, disable
-        * flow control messages, force flow control priority to the
-        * highest, and send all special multicast frames to the CPU
-        * port at the highest priority.
-        */
-       reg = MV88E6XXX_G2_SWITCH_MGMT_FORCE_FLOW_CTL_PRI | (0x7 << 4);
-       err = mv88e6xxx_g2_write(chip, MV88E6XXX_G2_SWITCH_MGMT, reg);
-       if (err)
-               return err;
-
-       /* Program the DSA routing table. */
-       err = mv88e6xxx_g2_set_device_mapping(chip);
-       if (err)
-               return err;
-
-       /* Clear all trunk masks and mapping. */
-       err = mv88e6xxx_g2_clear_trunk(chip);
-       if (err)
-               return err;
-
-       return 0;
-}
index 520ec70d32e84f304cf9246eb904bb5016474dd6..37e8ce2c72a0d3bd293f53bd87ec12c8a270e190 100644 (file)
@@ -60,7 +60,8 @@
 #define MV88E6XXX_G2_DEVICE_MAPPING            0x06
 #define MV88E6XXX_G2_DEVICE_MAPPING_UPDATE     0x8000
 #define MV88E6XXX_G2_DEVICE_MAPPING_DEV_MASK   0x1f00
-#define MV88E6XXX_G2_DEVICE_MAPPING_PORT_MASK  0x000f
+#define MV88E6352_G2_DEVICE_MAPPING_PORT_MASK  0x000f
+#define MV88E6390_G2_DEVICE_MAPPING_PORT_MASK  0x001f
 
 /* Offset 0x07: Trunk Mask Table Register */
 #define MV88E6XXX_G2_TRUNK_MASK                        0x07
@@ -313,7 +314,6 @@ int mv88e6xxx_g2_pvt_write(struct mv88e6xxx_chip *chip, int src_dev,
                           int src_port, u16 data);
 int mv88e6xxx_g2_misc_4_bit_port(struct mv88e6xxx_chip *chip);
 
-int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip);
 int mv88e6xxx_g2_irq_setup(struct mv88e6xxx_chip *chip);
 void mv88e6xxx_g2_irq_free(struct mv88e6xxx_chip *chip);
 
@@ -327,6 +327,11 @@ int mv88e6352_g2_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip);
 
 int mv88e6xxx_g2_pot_clear(struct mv88e6xxx_chip *chip);
 
+int mv88e6xxx_g2_trunk_clear(struct mv88e6xxx_chip *chip);
+
+int mv88e6xxx_g2_device_mapping_write(struct mv88e6xxx_chip *chip, int target,
+                                     int port);
+
 extern const struct mv88e6xxx_irq_ops mv88e6097_watchdog_ops;
 extern const struct mv88e6xxx_irq_ops mv88e6390_watchdog_ops;
 
@@ -441,11 +446,6 @@ static inline int mv88e6xxx_g2_misc_4_bit_port(struct mv88e6xxx_chip *chip)
        return -EOPNOTSUPP;
 }
 
-static inline int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip)
-{
-       return -EOPNOTSUPP;
-}
-
 static inline int mv88e6xxx_g2_irq_setup(struct mv88e6xxx_chip *chip)
 {
        return -EOPNOTSUPP;
@@ -495,6 +495,17 @@ static inline int mv88e6xxx_g2_scratch_gpio_set_smi(struct mv88e6xxx_chip *chip,
        return -EOPNOTSUPP;
 }
 
+static inline int mv88e6xxx_g2_trunk_clear(struct mv88e6xxx_chip *chip)
+{
+       return -EOPNOTSUPP;
+}
+
+static inline int mv88e6xxx_g2_device_mapping_write(struct mv88e6xxx_chip *chip,
+                                                   int target, int port)
+{
+       return -EOPNOTSUPP;
+}
+
 #endif /* CONFIG_NET_DSA_MV88E6XXX_GLOBAL2 */
 
 #endif /* _MV88E6XXX_GLOBAL2_H */
index 6315774d72b3304d5ccd1729cb150c183bbac19e..429d0ebcd5b168631fe1f2cc5f732b8ab324b5f9 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/bitfield.h>
 #include <linux/if_bridge.h>
 #include <linux/phy.h>
+#include <linux/phylink.h>
 
 #include "chip.h"
 #include "port.h"
@@ -378,6 +379,44 @@ int mv88e6xxx_port_get_cmode(struct mv88e6xxx_chip *chip, int port, u8 *cmode)
        return 0;
 }
 
+int mv88e6xxx_port_link_state(struct mv88e6xxx_chip *chip, int port,
+                             struct phylink_link_state *state)
+{
+       int err;
+       u16 reg;
+
+       err = mv88e6xxx_port_read(chip, port, MV88E6XXX_PORT_STS, &reg);
+       if (err)
+               return err;
+
+       switch (reg & MV88E6XXX_PORT_STS_SPEED_MASK) {
+       case MV88E6XXX_PORT_STS_SPEED_10:
+               state->speed = SPEED_10;
+               break;
+       case MV88E6XXX_PORT_STS_SPEED_100:
+               state->speed = SPEED_100;
+               break;
+       case MV88E6XXX_PORT_STS_SPEED_1000:
+               state->speed = SPEED_1000;
+               break;
+       case MV88E6XXX_PORT_STS_SPEED_10000:
+               if ((reg &MV88E6XXX_PORT_STS_CMODE_MASK) ==
+                   MV88E6XXX_PORT_STS_CMODE_2500BASEX)
+                       state->speed = SPEED_2500;
+               else
+                       state->speed = SPEED_10000;
+               break;
+       }
+
+       state->duplex = reg & MV88E6XXX_PORT_STS_DUPLEX ?
+                       DUPLEX_FULL : DUPLEX_HALF;
+       state->link = !!(reg & MV88E6XXX_PORT_STS_LINK);
+       state->an_enabled = 1;
+       state->an_complete = state->link;
+
+       return 0;
+}
+
 /* Offset 0x02: Jamming Control
  *
  * Do not limit the period of time that this port can be paused for by
index b16d5f0e6e9c5520c54d6cf90df0c0734d5df84d..5e1db1b221ca1d85d911eae054c4ad3c31503ca8 100644 (file)
@@ -29,6 +29,7 @@
 #define MV88E6XXX_PORT_STS_SPEED_10            0x0000
 #define MV88E6XXX_PORT_STS_SPEED_100           0x0100
 #define MV88E6XXX_PORT_STS_SPEED_1000          0x0200
+#define MV88E6XXX_PORT_STS_SPEED_10000         0x0300
 #define MV88E6352_PORT_STS_EEE                 0x0040
 #define MV88E6165_PORT_STS_AM_DIS              0x0040
 #define MV88E6185_PORT_STS_MGMII               0x0040
@@ -295,6 +296,8 @@ int mv88e6390_port_pause_limit(struct mv88e6xxx_chip *chip, int port, u8 in,
 int mv88e6390x_port_set_cmode(struct mv88e6xxx_chip *chip, int port,
                              phy_interface_t mode);
 int mv88e6xxx_port_get_cmode(struct mv88e6xxx_chip *chip, int port, u8 *cmode);
+int mv88e6xxx_port_link_state(struct mv88e6xxx_chip *chip, int port,
+                             struct phylink_link_state *state);
 int mv88e6xxx_port_set_map_da(struct mv88e6xxx_chip *chip, int port);
 int mv88e6095_port_set_upstream_port(struct mv88e6xxx_chip *chip, int port,
                                     int upstream_port);
index fb058fd35c0d5ea362af605b880bbd95f4eb7736..880b2cf0a53019027299cb4ae5356100b94fc651 100644 (file)
@@ -326,3 +326,23 @@ int mv88e6390_serdes_power(struct mv88e6xxx_chip *chip, int port, bool on)
 
        return 0;
 }
+
+int mv88e6341_serdes_power(struct mv88e6xxx_chip *chip, int port, bool on)
+{
+       int err;
+       u8 cmode;
+
+       if (port != 5)
+               return 0;
+
+       err = mv88e6xxx_port_get_cmode(chip, port, &cmode);
+       if (err)
+               return err;
+
+       if (cmode == MV88E6XXX_PORT_STS_CMODE_1000BASE_X ||
+           cmode == MV88E6XXX_PORT_STS_CMODE_SGMII ||
+           cmode == MV88E6XXX_PORT_STS_CMODE_2500BASEX)
+               return mv88e6390_serdes_sgmii(chip, MV88E6341_ADDR_SERDES, on);
+
+       return 0;
+}
index 1897c01c6e19669410bb1bd7bdcf6d86e426e089..b6e5fbd46b5e37101023a18c56098748a1df1d4a 100644 (file)
@@ -19,6 +19,8 @@
 #define MV88E6352_ADDR_SERDES          0x0f
 #define MV88E6352_SERDES_PAGE_FIBER    0x01
 
+#define MV88E6341_ADDR_SERDES          0x15
+
 #define MV88E6390_PORT9_LANE0          0x09
 #define MV88E6390_PORT9_LANE1          0x12
 #define MV88E6390_PORT9_LANE2          0x13
@@ -42,6 +44,7 @@
 #define MV88E6390_SGMII_CONTROL_LOOPBACK       BIT(14)
 #define MV88E6390_SGMII_CONTROL_PDOWN          BIT(11)
 
+int mv88e6341_serdes_power(struct mv88e6xxx_chip *chip, int port, bool on);
 int mv88e6352_serdes_power(struct mv88e6xxx_chip *chip, int port, bool on);
 int mv88e6390_serdes_power(struct mv88e6xxx_chip *chip, int port, bool on);
 int mv88e6352_serdes_get_sset_count(struct mv88e6xxx_chip *chip, int port);
index 600d5ad1fbde265afbf7dc030d694cadbaa8fcef..757b6d90ea365b96150e6ac51a8503cf7cb3f24b 100644 (file)
@@ -600,10 +600,13 @@ qca8k_phy_write(struct dsa_switch *ds, int phy, int regnum, u16 val)
 }
 
 static void
-qca8k_get_strings(struct dsa_switch *ds, int port, uint8_t *data)
+qca8k_get_strings(struct dsa_switch *ds, int port, u32 stringset, uint8_t *data)
 {
        int i;
 
+       if (stringset != ETH_SS_STATS)
+               return;
+
        for (i = 0; i < ARRAY_SIZE(ar8327_mib); i++)
                strncpy(data + i * ETH_GSTRING_LEN, ar8327_mib[i].name,
                        ETH_GSTRING_LEN);
@@ -631,8 +634,11 @@ qca8k_get_ethtool_stats(struct dsa_switch *ds, int port,
 }
 
 static int
-qca8k_get_sset_count(struct dsa_switch *ds, int port)
+qca8k_get_sset_count(struct dsa_switch *ds, int port, int sset)
 {
+       if (sset != ETH_SS_STATS)
+               return 0;
+
        return ARRAY_SIZE(ar8327_mib);
 }
 
index 36c8950dbd2d80699f396f217f0f438479f68355..5bc168314ea2f0db8ab58d99f772b4f4f6a38570 100644 (file)
@@ -765,8 +765,9 @@ static netdev_tx_t boomerang_start_xmit(struct sk_buff *skb,
                                        struct net_device *dev);
 static int vortex_rx(struct net_device *dev);
 static int boomerang_rx(struct net_device *dev);
-static irqreturn_t vortex_interrupt(int irq, void *dev_id);
-static irqreturn_t boomerang_interrupt(int irq, void *dev_id);
+static irqreturn_t vortex_boomerang_interrupt(int irq, void *dev_id);
+static irqreturn_t _vortex_interrupt(int irq, struct net_device *dev);
+static irqreturn_t _boomerang_interrupt(int irq, struct net_device *dev);
 static int vortex_close(struct net_device *dev);
 static void dump_tx_ring(struct net_device *dev);
 static void update_stats(void __iomem *ioaddr, struct net_device *dev);
@@ -838,11 +839,7 @@ MODULE_PARM_DESC(use_mmio, "3c59x: use memory-mapped PCI I/O resource (0-1)");
 #ifdef CONFIG_NET_POLL_CONTROLLER
 static void poll_vortex(struct net_device *dev)
 {
-       struct vortex_private *vp = netdev_priv(dev);
-       unsigned long flags;
-       local_irq_save(flags);
-       (vp->full_bus_master_rx ? boomerang_interrupt:vortex_interrupt)(dev->irq,dev);
-       local_irq_restore(flags);
+       vortex_boomerang_interrupt(dev->irq, dev);
 }
 #endif
 
@@ -1212,9 +1209,9 @@ static int vortex_probe1(struct device *gendev, void __iomem *ioaddr, int irq,
        vp->mii.reg_num_mask = 0x1f;
 
        /* Makes sure rings are at least 16 byte aligned. */
-       vp->rx_ring = pci_alloc_consistent(pdev, sizeof(struct boom_rx_desc) * RX_RING_SIZE
+       vp->rx_ring = dma_alloc_coherent(gendev, sizeof(struct boom_rx_desc) * RX_RING_SIZE
                                           + sizeof(struct boom_tx_desc) * TX_RING_SIZE,
-                                          &vp->rx_ring_dma);
+                                          &vp->rx_ring_dma, GFP_KERNEL);
        retval = -ENOMEM;
        if (!vp->rx_ring)
                goto free_device;
@@ -1476,11 +1473,10 @@ static int vortex_probe1(struct device *gendev, void __iomem *ioaddr, int irq,
                return 0;
 
 free_ring:
-       pci_free_consistent(pdev,
-                                               sizeof(struct boom_rx_desc) * RX_RING_SIZE
-                                                       + sizeof(struct boom_tx_desc) * TX_RING_SIZE,
-                                               vp->rx_ring,
-                                               vp->rx_ring_dma);
+       dma_free_coherent(&pdev->dev,
+               sizeof(struct boom_rx_desc) * RX_RING_SIZE +
+               sizeof(struct boom_tx_desc) * TX_RING_SIZE,
+               vp->rx_ring, vp->rx_ring_dma);
 free_device:
        free_netdev(dev);
        pr_err(PFX "vortex_probe1 fails.  Returns %d\n", retval);
@@ -1729,8 +1725,7 @@ vortex_open(struct net_device *dev)
        dma_addr_t dma;
 
        /* Use the now-standard shared IRQ implementation. */
-       if ((retval = request_irq(dev->irq, vp->full_bus_master_rx ?
-                               boomerang_interrupt : vortex_interrupt, IRQF_SHARED, dev->name, dev))) {
+       if ((retval = request_irq(dev->irq, vortex_boomerang_interrupt, IRQF_SHARED, dev->name, dev))) {
                pr_err("%s: Could not reserve IRQ %d\n", dev->name, dev->irq);
                goto err;
        }
@@ -1751,9 +1746,9 @@ vortex_open(struct net_device *dev)
                                break;                  /* Bad news!  */
 
                        skb_reserve(skb, NET_IP_ALIGN); /* Align IP on 16 byte boundaries */
-                       dma = pci_map_single(VORTEX_PCI(vp), skb->data,
-                                            PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
-                       if (dma_mapping_error(&VORTEX_PCI(vp)->dev, dma))
+                       dma = dma_map_single(vp->gendev, skb->data,
+                                            PKT_BUF_SZ, DMA_FROM_DEVICE);
+                       if (dma_mapping_error(vp->gendev, dma))
                                break;
                        vp->rx_ring[i].addr = cpu_to_le32(dma);
                }
@@ -1905,18 +1900,7 @@ static void vortex_tx_timeout(struct net_device *dev)
                pr_err("%s: Interrupt posted but not delivered --"
                           " IRQ blocked by another device?\n", dev->name);
                /* Bad idea here.. but we might as well handle a few events. */
-               {
-                       /*
-                        * Block interrupts because vortex_interrupt does a bare spin_lock()
-                        */
-                       unsigned long flags;
-                       local_irq_save(flags);
-                       if (vp->full_bus_master_tx)
-                               boomerang_interrupt(dev->irq, dev);
-                       else
-                               vortex_interrupt(dev->irq, dev);
-                       local_irq_restore(flags);
-               }
+               vortex_boomerang_interrupt(dev->irq, dev);
        }
 
        if (vortex_debug > 0)
@@ -2067,9 +2051,9 @@ vortex_start_xmit(struct sk_buff *skb, struct net_device *dev)
        if (vp->bus_master) {
                /* Set the bus-master controller to transfer the packet. */
                int len = (skb->len + 3) & ~3;
-               vp->tx_skb_dma = pci_map_single(VORTEX_PCI(vp), skb->data, len,
-                                               PCI_DMA_TODEVICE);
-               if (dma_mapping_error(&VORTEX_PCI(vp)->dev, vp->tx_skb_dma)) {
+               vp->tx_skb_dma = dma_map_single(vp->gendev, skb->data, len,
+                                               DMA_TO_DEVICE);
+               if (dma_mapping_error(vp->gendev, vp->tx_skb_dma)) {
                        dev_kfree_skb_any(skb);
                        dev->stats.tx_dropped++;
                        return NETDEV_TX_OK;
@@ -2168,9 +2152,9 @@ boomerang_start_xmit(struct sk_buff *skb, struct net_device *dev)
                        vp->tx_ring[entry].status = cpu_to_le32(skb->len | TxIntrUploaded | AddTCPChksum | AddUDPChksum);
 
        if (!skb_shinfo(skb)->nr_frags) {
-               dma_addr = pci_map_single(VORTEX_PCI(vp), skb->data, skb->len,
-                                         PCI_DMA_TODEVICE);
-               if (dma_mapping_error(&VORTEX_PCI(vp)->dev, dma_addr))
+               dma_addr = dma_map_single(vp->gendev, skb->data, skb->len,
+                                         DMA_TO_DEVICE);
+               if (dma_mapping_error(vp->gendev, dma_addr))
                        goto out_dma_err;
 
                vp->tx_ring[entry].frag[0].addr = cpu_to_le32(dma_addr);
@@ -2178,9 +2162,9 @@ boomerang_start_xmit(struct sk_buff *skb, struct net_device *dev)
        } else {
                int i;
 
-               dma_addr = pci_map_single(VORTEX_PCI(vp), skb->data,
-                                         skb_headlen(skb), PCI_DMA_TODEVICE);
-               if (dma_mapping_error(&VORTEX_PCI(vp)->dev, dma_addr))
+               dma_addr = dma_map_single(vp->gendev, skb->data,
+                                         skb_headlen(skb), DMA_TO_DEVICE);
+               if (dma_mapping_error(vp->gendev, dma_addr))
                        goto out_dma_err;
 
                vp->tx_ring[entry].frag[0].addr = cpu_to_le32(dma_addr);
@@ -2189,21 +2173,21 @@ boomerang_start_xmit(struct sk_buff *skb, struct net_device *dev)
                for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
                        skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 
-                       dma_addr = skb_frag_dma_map(&VORTEX_PCI(vp)->dev, frag,
+                       dma_addr = skb_frag_dma_map(vp->gendev, frag,
                                                    0,
                                                    frag->size,
                                                    DMA_TO_DEVICE);
-                       if (dma_mapping_error(&VORTEX_PCI(vp)->dev, dma_addr)) {
+                       if (dma_mapping_error(vp->gendev, dma_addr)) {
                                for(i = i-1; i >= 0; i--)
-                                       dma_unmap_page(&VORTEX_PCI(vp)->dev,
+                                       dma_unmap_page(vp->gendev,
                                                       le32_to_cpu(vp->tx_ring[entry].frag[i+1].addr),
                                                       le32_to_cpu(vp->tx_ring[entry].frag[i+1].length),
                                                       DMA_TO_DEVICE);
 
-                               pci_unmap_single(VORTEX_PCI(vp),
+                               dma_unmap_single(vp->gendev,
                                                 le32_to_cpu(vp->tx_ring[entry].frag[0].addr),
                                                 le32_to_cpu(vp->tx_ring[entry].frag[0].length),
-                                                PCI_DMA_TODEVICE);
+                                                DMA_TO_DEVICE);
 
                                goto out_dma_err;
                        }
@@ -2218,8 +2202,8 @@ boomerang_start_xmit(struct sk_buff *skb, struct net_device *dev)
                }
        }
 #else
-       dma_addr = pci_map_single(VORTEX_PCI(vp), skb->data, skb->len, PCI_DMA_TODEVICE);
-       if (dma_mapping_error(&VORTEX_PCI(vp)->dev, dma_addr))
+       dma_addr = dma_map_single(vp->gendev, skb->data, skb->len, DMA_TO_DEVICE);
+       if (dma_mapping_error(vp->gendev, dma_addr))
                goto out_dma_err;
        vp->tx_ring[entry].addr = cpu_to_le32(dma_addr);
        vp->tx_ring[entry].length = cpu_to_le32(skb->len | LAST_FRAG);
@@ -2254,7 +2238,7 @@ boomerang_start_xmit(struct sk_buff *skb, struct net_device *dev)
 out:
        return NETDEV_TX_OK;
 out_dma_err:
-       dev_err(&VORTEX_PCI(vp)->dev, "Error mapping dma buffer\n");
+       dev_err(vp->gendev, "Error mapping dma buffer\n");
        goto out;
 }
 
@@ -2267,9 +2251,8 @@ boomerang_start_xmit(struct sk_buff *skb, struct net_device *dev)
  */
 
 static irqreturn_t
-vortex_interrupt(int irq, void *dev_id)
+_vortex_interrupt(int irq, struct net_device *dev)
 {
-       struct net_device *dev = dev_id;
        struct vortex_private *vp = netdev_priv(dev);
        void __iomem *ioaddr;
        int status;
@@ -2278,7 +2261,6 @@ vortex_interrupt(int irq, void *dev_id)
        unsigned int bytes_compl = 0, pkts_compl = 0;
 
        ioaddr = vp->ioaddr;
-       spin_lock(&vp->lock);
 
        status = ioread16(ioaddr + EL3_STATUS);
 
@@ -2322,7 +2304,7 @@ vortex_interrupt(int irq, void *dev_id)
                if (status & DMADone) {
                        if (ioread16(ioaddr + Wn7_MasterStatus) & 0x1000) {
                                iowrite16(0x1000, ioaddr + Wn7_MasterStatus); /* Ack the event. */
-                               pci_unmap_single(VORTEX_PCI(vp), vp->tx_skb_dma, (vp->tx_skb->len + 3) & ~3, PCI_DMA_TODEVICE);
+                               dma_unmap_single(vp->gendev, vp->tx_skb_dma, (vp->tx_skb->len + 3) & ~3, DMA_TO_DEVICE);
                                pkts_compl++;
                                bytes_compl += vp->tx_skb->len;
                                dev_kfree_skb_irq(vp->tx_skb); /* Release the transferred buffer */
@@ -2376,7 +2358,6 @@ vortex_interrupt(int irq, void *dev_id)
                pr_debug("%s: exiting interrupt, status %4.4x.\n",
                           dev->name, status);
 handler_exit:
-       spin_unlock(&vp->lock);
        return IRQ_RETVAL(handled);
 }
 
@@ -2386,9 +2367,8 @@ vortex_interrupt(int irq, void *dev_id)
  */
 
 static irqreturn_t
-boomerang_interrupt(int irq, void *dev_id)
+_boomerang_interrupt(int irq, struct net_device *dev)
 {
-       struct net_device *dev = dev_id;
        struct vortex_private *vp = netdev_priv(dev);
        void __iomem *ioaddr;
        int status;
@@ -2398,12 +2378,6 @@ boomerang_interrupt(int irq, void *dev_id)
 
        ioaddr = vp->ioaddr;
 
-
-       /*
-        * It seems dopey to put the spinlock this early, but we could race against vortex_tx_timeout
-        * and boomerang_start_xmit
-        */
-       spin_lock(&vp->lock);
        vp->handling_irq = 1;
 
        status = ioread16(ioaddr + EL3_STATUS);
@@ -2459,19 +2433,19 @@ boomerang_interrupt(int irq, void *dev_id)
                                        struct sk_buff *skb = vp->tx_skbuff[entry];
 #if DO_ZEROCOPY
                                        int i;
-                                       pci_unmap_single(VORTEX_PCI(vp),
+                                       dma_unmap_single(vp->gendev,
                                                        le32_to_cpu(vp->tx_ring[entry].frag[0].addr),
                                                        le32_to_cpu(vp->tx_ring[entry].frag[0].length)&0xFFF,
-                                                       PCI_DMA_TODEVICE);
+                                                       DMA_TO_DEVICE);
 
                                        for (i=1; i<=skb_shinfo(skb)->nr_frags; i++)
-                                                       pci_unmap_page(VORTEX_PCI(vp),
+                                                       dma_unmap_page(vp->gendev,
                                                                                         le32_to_cpu(vp->tx_ring[entry].frag[i].addr),
                                                                                         le32_to_cpu(vp->tx_ring[entry].frag[i].length)&0xFFF,
-                                                                                        PCI_DMA_TODEVICE);
+                                                                                        DMA_TO_DEVICE);
 #else
-                                       pci_unmap_single(VORTEX_PCI(vp),
-                                               le32_to_cpu(vp->tx_ring[entry].addr), skb->len, PCI_DMA_TODEVICE);
+                                       dma_unmap_single(vp->gendev,
+                                               le32_to_cpu(vp->tx_ring[entry].addr), skb->len, DMA_TO_DEVICE);
 #endif
                                        pkts_compl++;
                                        bytes_compl += skb->len;
@@ -2522,10 +2496,29 @@ boomerang_interrupt(int irq, void *dev_id)
                           dev->name, status);
 handler_exit:
        vp->handling_irq = 0;
-       spin_unlock(&vp->lock);
        return IRQ_RETVAL(handled);
 }
 
+static irqreturn_t
+vortex_boomerang_interrupt(int irq, void *dev_id)
+{
+       struct net_device *dev = dev_id;
+       struct vortex_private *vp = netdev_priv(dev);
+       unsigned long flags;
+       irqreturn_t ret;
+
+       spin_lock_irqsave(&vp->lock, flags);
+
+       if (vp->full_bus_master_rx)
+               ret = _boomerang_interrupt(dev->irq, dev);
+       else
+               ret = _vortex_interrupt(dev->irq, dev);
+
+       spin_unlock_irqrestore(&vp->lock, flags);
+
+       return ret;
+}
+
 static int vortex_rx(struct net_device *dev)
 {
        struct vortex_private *vp = netdev_priv(dev);
@@ -2561,14 +2554,14 @@ static int vortex_rx(struct net_device *dev)
                                /* 'skb_put()' points to the start of sk_buff data area. */
                                if (vp->bus_master &&
                                        ! (ioread16(ioaddr + Wn7_MasterStatus) & 0x8000)) {
-                                       dma_addr_t dma = pci_map_single(VORTEX_PCI(vp), skb_put(skb, pkt_len),
-                                                                          pkt_len, PCI_DMA_FROMDEVICE);
+                                       dma_addr_t dma = dma_map_single(vp->gendev, skb_put(skb, pkt_len),
+                                                                          pkt_len, DMA_FROM_DEVICE);
                                        iowrite32(dma, ioaddr + Wn7_MasterAddr);
                                        iowrite16((skb->len + 3) & ~3, ioaddr + Wn7_MasterLen);
                                        iowrite16(StartDMAUp, ioaddr + EL3_CMD);
                                        while (ioread16(ioaddr + Wn7_MasterStatus) & 0x8000)
                                                ;
-                                       pci_unmap_single(VORTEX_PCI(vp), dma, pkt_len, PCI_DMA_FROMDEVICE);
+                                       dma_unmap_single(vp->gendev, dma, pkt_len, DMA_FROM_DEVICE);
                                } else {
                                        ioread32_rep(ioaddr + RX_FIFO,
                                                     skb_put(skb, pkt_len),
@@ -2635,11 +2628,11 @@ boomerang_rx(struct net_device *dev)
                        if (pkt_len < rx_copybreak &&
                            (skb = netdev_alloc_skb(dev, pkt_len + 2)) != NULL) {
                                skb_reserve(skb, 2);    /* Align IP on 16 byte boundaries */
-                               pci_dma_sync_single_for_cpu(VORTEX_PCI(vp), dma, PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
+                               dma_sync_single_for_cpu(vp->gendev, dma, PKT_BUF_SZ, DMA_FROM_DEVICE);
                                /* 'skb_put()' points to the start of sk_buff data area. */
                                skb_put_data(skb, vp->rx_skbuff[entry]->data,
                                             pkt_len);
-                               pci_dma_sync_single_for_device(VORTEX_PCI(vp), dma, PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
+                               dma_sync_single_for_device(vp->gendev, dma, PKT_BUF_SZ, DMA_FROM_DEVICE);
                                vp->rx_copy++;
                        } else {
                                /* Pre-allocate the replacement skb.  If it or its
@@ -2651,9 +2644,9 @@ boomerang_rx(struct net_device *dev)
                                        dev->stats.rx_dropped++;
                                        goto clear_complete;
                                }
-                               newdma = pci_map_single(VORTEX_PCI(vp), newskb->data,
-                                                       PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
-                               if (dma_mapping_error(&VORTEX_PCI(vp)->dev, newdma)) {
+                               newdma = dma_map_single(vp->gendev, newskb->data,
+                                                       PKT_BUF_SZ, DMA_FROM_DEVICE);
+                               if (dma_mapping_error(vp->gendev, newdma)) {
                                        dev->stats.rx_dropped++;
                                        consume_skb(newskb);
                                        goto clear_complete;
@@ -2664,7 +2657,7 @@ boomerang_rx(struct net_device *dev)
                                vp->rx_skbuff[entry] = newskb;
                                vp->rx_ring[entry].addr = cpu_to_le32(newdma);
                                skb_put(skb, pkt_len);
-                               pci_unmap_single(VORTEX_PCI(vp), dma, PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
+                               dma_unmap_single(vp->gendev, dma, PKT_BUF_SZ, DMA_FROM_DEVICE);
                                vp->rx_nocopy++;
                        }
                        skb->protocol = eth_type_trans(skb, dev);
@@ -2761,8 +2754,8 @@ vortex_close(struct net_device *dev)
        if (vp->full_bus_master_rx) { /* Free Boomerang bus master Rx buffers. */
                for (i = 0; i < RX_RING_SIZE; i++)
                        if (vp->rx_skbuff[i]) {
-                               pci_unmap_single(       VORTEX_PCI(vp), le32_to_cpu(vp->rx_ring[i].addr),
-                                                                       PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
+                               dma_unmap_single(vp->gendev, le32_to_cpu(vp->rx_ring[i].addr),
+                                                                       PKT_BUF_SZ, DMA_FROM_DEVICE);
                                dev_kfree_skb(vp->rx_skbuff[i]);
                                vp->rx_skbuff[i] = NULL;
                        }
@@ -2775,12 +2768,12 @@ vortex_close(struct net_device *dev)
                                int k;
 
                                for (k=0; k<=skb_shinfo(skb)->nr_frags; k++)
-                                               pci_unmap_single(VORTEX_PCI(vp),
+                                               dma_unmap_single(vp->gendev,
                                                                                 le32_to_cpu(vp->tx_ring[i].frag[k].addr),
                                                                                 le32_to_cpu(vp->tx_ring[i].frag[k].length)&0xFFF,
-                                                                                PCI_DMA_TODEVICE);
+                                                                                DMA_TO_DEVICE);
 #else
-                               pci_unmap_single(VORTEX_PCI(vp), le32_to_cpu(vp->tx_ring[i].addr), skb->len, PCI_DMA_TODEVICE);
+                               dma_unmap_single(vp->gendev, le32_to_cpu(vp->tx_ring[i].addr), skb->len, DMA_TO_DEVICE);
 #endif
                                dev_kfree_skb(skb);
                                vp->tx_skbuff[i] = NULL;
@@ -3288,11 +3281,10 @@ static void vortex_remove_one(struct pci_dev *pdev)
 
        pci_iounmap(pdev, vp->ioaddr);
 
-       pci_free_consistent(pdev,
-                                               sizeof(struct boom_rx_desc) * RX_RING_SIZE
-                                                       + sizeof(struct boom_tx_desc) * TX_RING_SIZE,
-                                               vp->rx_ring,
-                                               vp->rx_ring_dma);
+       dma_free_coherent(&pdev->dev,
+                       sizeof(struct boom_rx_desc) * RX_RING_SIZE +
+                       sizeof(struct boom_tx_desc) * TX_RING_SIZE,
+                       vp->rx_ring, vp->rx_ring_dma);
 
        pci_release_regions(pdev);
 
index ac99d089ac7266c349fa974e4edabab1e127b90b..1c97e39b478e9f8957ff76f75bc47d819b248c03 100644 (file)
@@ -164,7 +164,9 @@ bad_clone_list[] __initdata = {
 #define NESM_START_PG  0x40    /* First page of TX buffer */
 #define NESM_STOP_PG   0x80    /* Last page +1 of RX ring */
 
-#if defined(CONFIG_ATARI)      /* 8-bit mode on Atari, normal on Q40 */
+#if defined(CONFIG_MACH_TX49XX)
+#  define DCR_VAL 0x48         /* 8-bit mode */
+#elif defined(CONFIG_ATARI)    /* 8-bit mode on Atari, normal on Q40 */
 #  define DCR_VAL (MACH_IS_ATARI ? 0x48 : 0x49)
 #else
 #  define DCR_VAL 0x49
index 603a5704dab893093993601741c0c4be86661090..af766fd61151643d260b24e51b30fd68f5a2d9e8 100644 (file)
@@ -33,9 +33,9 @@ source "drivers/net/ethernet/aquantia/Kconfig"
 source "drivers/net/ethernet/arc/Kconfig"
 source "drivers/net/ethernet/atheros/Kconfig"
 source "drivers/net/ethernet/aurora/Kconfig"
-source "drivers/net/ethernet/cadence/Kconfig"
 source "drivers/net/ethernet/broadcom/Kconfig"
 source "drivers/net/ethernet/brocade/Kconfig"
+source "drivers/net/ethernet/cadence/Kconfig"
 source "drivers/net/ethernet/calxeda/Kconfig"
 source "drivers/net/ethernet/cavium/Kconfig"
 source "drivers/net/ethernet/chelsio/Kconfig"
@@ -72,16 +72,16 @@ source "drivers/net/ethernet/dec/Kconfig"
 source "drivers/net/ethernet/dlink/Kconfig"
 source "drivers/net/ethernet/emulex/Kconfig"
 source "drivers/net/ethernet/ezchip/Kconfig"
-source "drivers/net/ethernet/neterion/Kconfig"
 source "drivers/net/ethernet/faraday/Kconfig"
 source "drivers/net/ethernet/freescale/Kconfig"
 source "drivers/net/ethernet/fujitsu/Kconfig"
 source "drivers/net/ethernet/hisilicon/Kconfig"
 source "drivers/net/ethernet/hp/Kconfig"
 source "drivers/net/ethernet/huawei/Kconfig"
+source "drivers/net/ethernet/i825xx/Kconfig"
 source "drivers/net/ethernet/ibm/Kconfig"
 source "drivers/net/ethernet/intel/Kconfig"
-source "drivers/net/ethernet/i825xx/Kconfig"
+source "drivers/net/ethernet/neterion/Kconfig"
 source "drivers/net/ethernet/xscale/Kconfig"
 
 config JME
@@ -115,6 +115,7 @@ source "drivers/net/ethernet/mellanox/Kconfig"
 source "drivers/net/ethernet/micrel/Kconfig"
 source "drivers/net/ethernet/microchip/Kconfig"
 source "drivers/net/ethernet/moxa/Kconfig"
+source "drivers/net/ethernet/mscc/Kconfig"
 source "drivers/net/ethernet/myricom/Kconfig"
 
 config FEALNX
@@ -160,20 +161,21 @@ source "drivers/net/ethernet/packetengines/Kconfig"
 source "drivers/net/ethernet/pasemi/Kconfig"
 source "drivers/net/ethernet/qlogic/Kconfig"
 source "drivers/net/ethernet/qualcomm/Kconfig"
+source "drivers/net/ethernet/rdc/Kconfig"
 source "drivers/net/ethernet/realtek/Kconfig"
 source "drivers/net/ethernet/renesas/Kconfig"
-source "drivers/net/ethernet/rdc/Kconfig"
 source "drivers/net/ethernet/rocker/Kconfig"
 source "drivers/net/ethernet/samsung/Kconfig"
 source "drivers/net/ethernet/seeq/Kconfig"
-source "drivers/net/ethernet/silan/Kconfig"
-source "drivers/net/ethernet/sis/Kconfig"
 source "drivers/net/ethernet/sfc/Kconfig"
 source "drivers/net/ethernet/sgi/Kconfig"
+source "drivers/net/ethernet/silan/Kconfig"
+source "drivers/net/ethernet/sis/Kconfig"
 source "drivers/net/ethernet/smsc/Kconfig"
 source "drivers/net/ethernet/socionext/Kconfig"
 source "drivers/net/ethernet/stmicro/Kconfig"
 source "drivers/net/ethernet/sun/Kconfig"
+source "drivers/net/ethernet/synopsys/Kconfig"
 source "drivers/net/ethernet/tehuti/Kconfig"
 source "drivers/net/ethernet/ti/Kconfig"
 source "drivers/net/ethernet/toshiba/Kconfig"
@@ -182,6 +184,5 @@ source "drivers/net/ethernet/via/Kconfig"
 source "drivers/net/ethernet/wiznet/Kconfig"
 source "drivers/net/ethernet/xilinx/Kconfig"
 source "drivers/net/ethernet/xircom/Kconfig"
-source "drivers/net/ethernet/synopsys/Kconfig"
 
 endif # ETHERNET
index 2bfd2eea50bff16b90fd809e426e70e12025eb29..8fbfe9ce2fa53a69673671871465e816b2386ba6 100644 (file)
@@ -55,6 +55,7 @@ obj-$(CONFIG_NET_VENDOR_MEDIATEK) += mediatek/
 obj-$(CONFIG_NET_VENDOR_MELLANOX) += mellanox/
 obj-$(CONFIG_NET_VENDOR_MICREL) += micrel/
 obj-$(CONFIG_NET_VENDOR_MICROCHIP) += microchip/
+obj-$(CONFIG_NET_VENDOR_MICROSEMI) += mscc/
 obj-$(CONFIG_NET_VENDOR_MOXART) += moxa/
 obj-$(CONFIG_NET_VENDOR_MYRI) += myricom/
 obj-$(CONFIG_FEALNX) += fealnx.o
index c99e3e845ac05021462853fc780df2187b2e0f7c..a90080f12e67cd07e26ed60b5d0f12cc29741f41 100644 (file)
@@ -1074,16 +1074,12 @@ static int amd8111e_calc_coalesce(struct net_device *dev)
                                amd8111e_set_coalesce(dev,TX_INTR_COAL);
                                coal_conf->tx_coal_type = MEDIUM_COALESCE;
                        }
-
-               }
-               else if(tx_pkt_size >= 1024){
-                       if (tx_pkt_size >= 1024){
-                               if(coal_conf->tx_coal_type !=  HIGH_COALESCE){
-                                       coal_conf->tx_timeout = 4;
-                                       coal_conf->tx_event_count = 8;
-                                       amd8111e_set_coalesce(dev,TX_INTR_COAL);
-                                       coal_conf->tx_coal_type = HIGH_COALESCE;
-                               }
+               } else if (tx_pkt_size >= 1024) {
+                       if (coal_conf->tx_coal_type != HIGH_COALESCE) {
+                               coal_conf->tx_timeout = 4;
+                               coal_conf->tx_event_count = 8;
+                               amd8111e_set_coalesce(dev, TX_INTR_COAL);
+                               coal_conf->tx_coal_type = HIGH_COALESCE;
                        }
                }
        }
index 7ea72ef11a55d5a4425959da1d12c69b108676b2..d272dc6984ac6ef3f61743a16912c13552c35b2f 100644 (file)
 #define MDIO_VEND2_AN_STAT             0x8002
 #endif
 
+#ifndef MDIO_VEND2_PMA_CDR_CONTROL
+#define MDIO_VEND2_PMA_CDR_CONTROL     0x8056
+#endif
+
 #ifndef MDIO_CTRL1_SPEED1G
 #define MDIO_CTRL1_SPEED1G             (MDIO_CTRL1_SPEED10G & ~BMCR_SPEED100)
 #endif
 #define XGBE_AN_CL37_TX_CONFIG_MASK    0x08
 #define XGBE_AN_CL37_MII_CTRL_8BIT     0x0100
 
+#define XGBE_PMA_CDR_TRACK_EN_MASK     0x01
+#define XGBE_PMA_CDR_TRACK_EN_OFF      0x00
+#define XGBE_PMA_CDR_TRACK_EN_ON       0x01
+
 /* Bit setting and getting macros
  *  The get macro will extract the current bit field value from within
  *  the variable
index 7d128be613103a1088655c4b1dcc2e10e234a82f..b91143947ed271a6d29c83040aae6f4304e1549d 100644 (file)
@@ -519,6 +519,22 @@ void xgbe_debugfs_init(struct xgbe_prv_data *pdata)
                                   "debugfs_create_file failed\n");
        }
 
+       if (pdata->vdata->an_cdr_workaround) {
+               pfile = debugfs_create_bool("an_cdr_workaround", 0600,
+                                           pdata->xgbe_debugfs,
+                                           &pdata->debugfs_an_cdr_workaround);
+               if (!pfile)
+                       netdev_err(pdata->netdev,
+                                  "debugfs_create_bool failed\n");
+
+               pfile = debugfs_create_bool("an_cdr_track_early", 0600,
+                                           pdata->xgbe_debugfs,
+                                           &pdata->debugfs_an_cdr_track_early);
+               if (!pfile)
+                       netdev_err(pdata->netdev,
+                                  "debugfs_create_bool failed\n");
+       }
+
        kfree(buf);
 }
 
index 795e556d4a3f89d70b088189b28dcaae686e6e3f..441d0973957ba0670c847fa1d37c6ef607e66643 100644 (file)
@@ -349,6 +349,7 @@ int xgbe_config_netdev(struct xgbe_prv_data *pdata)
        XGMAC_SET_BITS(pdata->rss_options, MAC_RSSCR, UDP4TE, 1);
 
        /* Call MDIO/PHY initialization routine */
+       pdata->debugfs_an_cdr_workaround = pdata->vdata->an_cdr_workaround;
        ret = pdata->phy_if.phy_init(pdata);
        if (ret)
                return ret;
index 072b9f6645978d0c53ce40271b9dbfe44a6baa0a..1b45cd73a258f05211bfc2ca6e69124c5347bda8 100644 (file)
@@ -432,11 +432,16 @@ static void xgbe_an73_disable(struct xgbe_prv_data *pdata)
        xgbe_an73_set(pdata, false, false);
        xgbe_an73_disable_interrupts(pdata);
 
+       pdata->an_start = 0;
+
        netif_dbg(pdata, link, pdata->netdev, "CL73 AN disabled\n");
 }
 
 static void xgbe_an_restart(struct xgbe_prv_data *pdata)
 {
+       if (pdata->phy_if.phy_impl.an_pre)
+               pdata->phy_if.phy_impl.an_pre(pdata);
+
        switch (pdata->an_mode) {
        case XGBE_AN_MODE_CL73:
        case XGBE_AN_MODE_CL73_REDRV:
@@ -453,6 +458,9 @@ static void xgbe_an_restart(struct xgbe_prv_data *pdata)
 
 static void xgbe_an_disable(struct xgbe_prv_data *pdata)
 {
+       if (pdata->phy_if.phy_impl.an_post)
+               pdata->phy_if.phy_impl.an_post(pdata);
+
        switch (pdata->an_mode) {
        case XGBE_AN_MODE_CL73:
        case XGBE_AN_MODE_CL73_REDRV:
@@ -505,11 +513,11 @@ static enum xgbe_an xgbe_an73_tx_training(struct xgbe_prv_data *pdata,
                XMDIO_WRITE(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL,
                            reg);
 
-               if (pdata->phy_if.phy_impl.kr_training_post)
-                       pdata->phy_if.phy_impl.kr_training_post(pdata);
-
                netif_dbg(pdata, link, pdata->netdev,
                          "KR training initiated\n");
+
+               if (pdata->phy_if.phy_impl.kr_training_post)
+                       pdata->phy_if.phy_impl.kr_training_post(pdata);
        }
 
        return XGBE_AN_PAGE_RECEIVED;
@@ -637,11 +645,11 @@ static enum xgbe_an xgbe_an73_incompat_link(struct xgbe_prv_data *pdata)
                        return XGBE_AN_NO_LINK;
        }
 
-       xgbe_an73_disable(pdata);
+       xgbe_an_disable(pdata);
 
        xgbe_switch_mode(pdata);
 
-       xgbe_an73_restart(pdata);
+       xgbe_an_restart(pdata);
 
        return XGBE_AN_INCOMPAT_LINK;
 }
@@ -820,6 +828,9 @@ static void xgbe_an37_state_machine(struct xgbe_prv_data *pdata)
                pdata->an_result = pdata->an_state;
                pdata->an_state = XGBE_AN_READY;
 
+               if (pdata->phy_if.phy_impl.an_post)
+                       pdata->phy_if.phy_impl.an_post(pdata);
+
                netif_dbg(pdata, link, pdata->netdev, "CL37 AN result: %s\n",
                          xgbe_state_as_string(pdata->an_result));
        }
@@ -903,6 +914,9 @@ static void xgbe_an73_state_machine(struct xgbe_prv_data *pdata)
                pdata->kx_state = XGBE_RX_BPA;
                pdata->an_start = 0;
 
+               if (pdata->phy_if.phy_impl.an_post)
+                       pdata->phy_if.phy_impl.an_post(pdata);
+
                netif_dbg(pdata, link, pdata->netdev, "CL73 AN result: %s\n",
                          xgbe_state_as_string(pdata->an_result));
        }
index eb23f9ba1a9a10091f8d42bb71e35291e224c4f1..82d1f416ee2ac96c9aa2d7e5d70c22650e0407fe 100644 (file)
@@ -456,6 +456,7 @@ static const struct xgbe_version_data xgbe_v2a = {
        .irq_reissue_support            = 1,
        .tx_desc_prefetch               = 5,
        .rx_desc_prefetch               = 5,
+       .an_cdr_workaround              = 1,
 };
 
 static const struct xgbe_version_data xgbe_v2b = {
@@ -470,6 +471,7 @@ static const struct xgbe_version_data xgbe_v2b = {
        .irq_reissue_support            = 1,
        .tx_desc_prefetch               = 5,
        .rx_desc_prefetch               = 5,
+       .an_cdr_workaround              = 1,
 };
 
 static const struct pci_device_id xgbe_pci_table[] = {
index 3304a291aa964c034f1ebcace56d8aaec32c8407..aac884314000c9114decb1a29e743a593a5bce89 100644 (file)
 /* Rate-change complete wait/retry count */
 #define XGBE_RATECHANGE_COUNT          500
 
+/* CDR delay values for KR support (in usec) */
+#define XGBE_CDR_DELAY_INIT            10000
+#define XGBE_CDR_DELAY_INC             10000
+#define XGBE_CDR_DELAY_MAX             100000
+
+/* RRC frequency during link status check */
+#define XGBE_RRC_FREQUENCY             10
+
 enum xgbe_port_mode {
        XGBE_PORT_MODE_RSVD = 0,
        XGBE_PORT_MODE_BACKPLANE,
@@ -245,6 +253,10 @@ enum xgbe_sfp_speed {
 #define XGBE_SFP_BASE_VENDOR_SN                        4
 #define XGBE_SFP_BASE_VENDOR_SN_LEN            16
 
+#define XGBE_SFP_EXTD_OPT1                     1
+#define XGBE_SFP_EXTD_OPT1_RX_LOS              BIT(1)
+#define XGBE_SFP_EXTD_OPT1_TX_FAULT            BIT(3)
+
 #define XGBE_SFP_EXTD_DIAG                     28
 #define XGBE_SFP_EXTD_DIAG_ADDR_CHANGE         BIT(2)
 
@@ -324,6 +336,7 @@ struct xgbe_phy_data {
 
        unsigned int sfp_gpio_address;
        unsigned int sfp_gpio_mask;
+       unsigned int sfp_gpio_inputs;
        unsigned int sfp_gpio_rx_los;
        unsigned int sfp_gpio_tx_fault;
        unsigned int sfp_gpio_mod_absent;
@@ -355,6 +368,10 @@ struct xgbe_phy_data {
        unsigned int redrv_addr;
        unsigned int redrv_lane;
        unsigned int redrv_model;
+
+       /* KR AN support */
+       unsigned int phy_cdr_notrack;
+       unsigned int phy_cdr_delay;
 };
 
 /* I2C, MDIO and GPIO lines are muxed, so only one device at a time */
@@ -974,6 +991,49 @@ static void xgbe_phy_sfp_external_phy(struct xgbe_prv_data *pdata)
        phy_data->sfp_phy_avail = 1;
 }
 
+static bool xgbe_phy_check_sfp_rx_los(struct xgbe_phy_data *phy_data)
+{
+       u8 *sfp_extd = phy_data->sfp_eeprom.extd;
+
+       if (!(sfp_extd[XGBE_SFP_EXTD_OPT1] & XGBE_SFP_EXTD_OPT1_RX_LOS))
+               return false;
+
+       if (phy_data->sfp_gpio_mask & XGBE_GPIO_NO_RX_LOS)
+               return false;
+
+       if (phy_data->sfp_gpio_inputs & (1 << phy_data->sfp_gpio_rx_los))
+               return true;
+
+       return false;
+}
+
+static bool xgbe_phy_check_sfp_tx_fault(struct xgbe_phy_data *phy_data)
+{
+       u8 *sfp_extd = phy_data->sfp_eeprom.extd;
+
+       if (!(sfp_extd[XGBE_SFP_EXTD_OPT1] & XGBE_SFP_EXTD_OPT1_TX_FAULT))
+               return false;
+
+       if (phy_data->sfp_gpio_mask & XGBE_GPIO_NO_TX_FAULT)
+               return false;
+
+       if (phy_data->sfp_gpio_inputs & (1 << phy_data->sfp_gpio_tx_fault))
+               return true;
+
+       return false;
+}
+
+static bool xgbe_phy_check_sfp_mod_absent(struct xgbe_phy_data *phy_data)
+{
+       if (phy_data->sfp_gpio_mask & XGBE_GPIO_NO_MOD_ABSENT)
+               return false;
+
+       if (phy_data->sfp_gpio_inputs & (1 << phy_data->sfp_gpio_mod_absent))
+               return true;
+
+       return false;
+}
+
 static bool xgbe_phy_belfuse_parse_quirks(struct xgbe_prv_data *pdata)
 {
        struct xgbe_phy_data *phy_data = pdata->phy_data;
@@ -1019,6 +1079,10 @@ static void xgbe_phy_sfp_parse_eeprom(struct xgbe_prv_data *pdata)
        if (sfp_base[XGBE_SFP_BASE_EXT_ID] != XGBE_SFP_EXT_ID_SFP)
                return;
 
+       /* Update transceiver signals (eeprom extd/options) */
+       phy_data->sfp_tx_fault = xgbe_phy_check_sfp_tx_fault(phy_data);
+       phy_data->sfp_rx_los = xgbe_phy_check_sfp_rx_los(phy_data);
+
        if (xgbe_phy_sfp_parse_quirks(pdata))
                return;
 
@@ -1184,7 +1248,6 @@ static int xgbe_phy_sfp_read_eeprom(struct xgbe_prv_data *pdata)
 static void xgbe_phy_sfp_signals(struct xgbe_prv_data *pdata)
 {
        struct xgbe_phy_data *phy_data = pdata->phy_data;
-       unsigned int gpio_input;
        u8 gpio_reg, gpio_ports[2];
        int ret;
 
@@ -1199,23 +1262,9 @@ static void xgbe_phy_sfp_signals(struct xgbe_prv_data *pdata)
                return;
        }
 
-       gpio_input = (gpio_ports[1] << 8) | gpio_ports[0];
-
-       if (phy_data->sfp_gpio_mask & XGBE_GPIO_NO_MOD_ABSENT) {
-               /* No GPIO, just assume the module is present for now */
-               phy_data->sfp_mod_absent = 0;
-       } else {
-               if (!(gpio_input & (1 << phy_data->sfp_gpio_mod_absent)))
-                       phy_data->sfp_mod_absent = 0;
-       }
-
-       if (!(phy_data->sfp_gpio_mask & XGBE_GPIO_NO_RX_LOS) &&
-           (gpio_input & (1 << phy_data->sfp_gpio_rx_los)))
-               phy_data->sfp_rx_los = 1;
+       phy_data->sfp_gpio_inputs = (gpio_ports[1] << 8) | gpio_ports[0];
 
-       if (!(phy_data->sfp_gpio_mask & XGBE_GPIO_NO_TX_FAULT) &&
-           (gpio_input & (1 << phy_data->sfp_gpio_tx_fault)))
-               phy_data->sfp_tx_fault = 1;
+       phy_data->sfp_mod_absent = xgbe_phy_check_sfp_mod_absent(phy_data);
 }
 
 static void xgbe_phy_sfp_mod_absent(struct xgbe_prv_data *pdata)
@@ -2361,7 +2410,7 @@ static int xgbe_phy_link_status(struct xgbe_prv_data *pdata, int *an_restart)
                return 1;
 
        /* No link, attempt a receiver reset cycle */
-       if (phy_data->rrc_count++) {
+       if (phy_data->rrc_count++ > XGBE_RRC_FREQUENCY) {
                phy_data->rrc_count = 0;
                xgbe_phy_rrc(pdata);
        }
@@ -2669,6 +2718,103 @@ static bool xgbe_phy_port_enabled(struct xgbe_prv_data *pdata)
        return true;
 }
 
+static void xgbe_phy_cdr_track(struct xgbe_prv_data *pdata)
+{
+       struct xgbe_phy_data *phy_data = pdata->phy_data;
+
+       if (!pdata->debugfs_an_cdr_workaround)
+               return;
+
+       if (!phy_data->phy_cdr_notrack)
+               return;
+
+       usleep_range(phy_data->phy_cdr_delay,
+                    phy_data->phy_cdr_delay + 500);
+
+       XMDIO_WRITE_BITS(pdata, MDIO_MMD_PMAPMD, MDIO_VEND2_PMA_CDR_CONTROL,
+                        XGBE_PMA_CDR_TRACK_EN_MASK,
+                        XGBE_PMA_CDR_TRACK_EN_ON);
+
+       phy_data->phy_cdr_notrack = 0;
+}
+
+static void xgbe_phy_cdr_notrack(struct xgbe_prv_data *pdata)
+{
+       struct xgbe_phy_data *phy_data = pdata->phy_data;
+
+       if (!pdata->debugfs_an_cdr_workaround)
+               return;
+
+       if (phy_data->phy_cdr_notrack)
+               return;
+
+       XMDIO_WRITE_BITS(pdata, MDIO_MMD_PMAPMD, MDIO_VEND2_PMA_CDR_CONTROL,
+                        XGBE_PMA_CDR_TRACK_EN_MASK,
+                        XGBE_PMA_CDR_TRACK_EN_OFF);
+
+       xgbe_phy_rrc(pdata);
+
+       phy_data->phy_cdr_notrack = 1;
+}
+
+static void xgbe_phy_kr_training_post(struct xgbe_prv_data *pdata)
+{
+       if (!pdata->debugfs_an_cdr_track_early)
+               xgbe_phy_cdr_track(pdata);
+}
+
+static void xgbe_phy_kr_training_pre(struct xgbe_prv_data *pdata)
+{
+       if (pdata->debugfs_an_cdr_track_early)
+               xgbe_phy_cdr_track(pdata);
+}
+
+static void xgbe_phy_an_post(struct xgbe_prv_data *pdata)
+{
+       struct xgbe_phy_data *phy_data = pdata->phy_data;
+
+       switch (pdata->an_mode) {
+       case XGBE_AN_MODE_CL73:
+       case XGBE_AN_MODE_CL73_REDRV:
+               if (phy_data->cur_mode != XGBE_MODE_KR)
+                       break;
+
+               xgbe_phy_cdr_track(pdata);
+
+               switch (pdata->an_result) {
+               case XGBE_AN_READY:
+               case XGBE_AN_COMPLETE:
+                       break;
+               default:
+                       if (phy_data->phy_cdr_delay < XGBE_CDR_DELAY_MAX)
+                               phy_data->phy_cdr_delay += XGBE_CDR_DELAY_INC;
+                       else
+                               phy_data->phy_cdr_delay = XGBE_CDR_DELAY_INIT;
+                       break;
+               }
+               break;
+       default:
+               break;
+       }
+}
+
+static void xgbe_phy_an_pre(struct xgbe_prv_data *pdata)
+{
+       struct xgbe_phy_data *phy_data = pdata->phy_data;
+
+       switch (pdata->an_mode) {
+       case XGBE_AN_MODE_CL73:
+       case XGBE_AN_MODE_CL73_REDRV:
+               if (phy_data->cur_mode != XGBE_MODE_KR)
+                       break;
+
+               xgbe_phy_cdr_notrack(pdata);
+               break;
+       default:
+               break;
+       }
+}
+
 static void xgbe_phy_stop(struct xgbe_prv_data *pdata)
 {
        struct xgbe_phy_data *phy_data = pdata->phy_data;
@@ -2680,6 +2826,9 @@ static void xgbe_phy_stop(struct xgbe_prv_data *pdata)
        xgbe_phy_sfp_reset(phy_data);
        xgbe_phy_sfp_mod_absent(pdata);
 
+       /* Reset CDR support */
+       xgbe_phy_cdr_track(pdata);
+
        /* Power off the PHY */
        xgbe_phy_power_off(pdata);
 
@@ -2712,6 +2861,9 @@ static int xgbe_phy_start(struct xgbe_prv_data *pdata)
        /* Start in highest supported mode */
        xgbe_phy_set_mode(pdata, phy_data->start_mode);
 
+       /* Reset CDR support */
+       xgbe_phy_cdr_track(pdata);
+
        /* After starting the I2C controller, we can check for an SFP */
        switch (phy_data->port_mode) {
        case XGBE_PORT_MODE_SFP:
@@ -3019,6 +3171,8 @@ static int xgbe_phy_init(struct xgbe_prv_data *pdata)
                }
        }
 
+       phy_data->phy_cdr_delay = XGBE_CDR_DELAY_INIT;
+
        /* Register for driving external PHYs */
        mii = devm_mdiobus_alloc(pdata->dev);
        if (!mii) {
@@ -3071,4 +3225,10 @@ void xgbe_init_function_ptrs_phy_v2(struct xgbe_phy_if *phy_if)
        phy_impl->an_advertising        = xgbe_phy_an_advertising;
 
        phy_impl->an_outcome            = xgbe_phy_an_outcome;
+
+       phy_impl->an_pre                = xgbe_phy_an_pre;
+       phy_impl->an_post               = xgbe_phy_an_post;
+
+       phy_impl->kr_training_pre       = xgbe_phy_kr_training_pre;
+       phy_impl->kr_training_post      = xgbe_phy_kr_training_post;
 }
index ad102c8bac7bf44d8fa92d0bc175333b12f415fc..95d4b56448c68058102cc4fb3da9e1aefc9fea59 100644 (file)
@@ -833,6 +833,7 @@ struct xgbe_hw_if {
 /* This structure represents implementation specific routines for an
  * implementation of a PHY. All routines are required unless noted below.
  *   Optional routines:
+ *     an_pre, an_post
  *     kr_training_pre, kr_training_post
  */
 struct xgbe_phy_impl_if {
@@ -875,6 +876,10 @@ struct xgbe_phy_impl_if {
        /* Process results of auto-negotiation */
        enum xgbe_mode (*an_outcome)(struct xgbe_prv_data *);
 
+       /* Pre/Post auto-negotiation support */
+       void (*an_pre)(struct xgbe_prv_data *);
+       void (*an_post)(struct xgbe_prv_data *);
+
        /* Pre/Post KR training enablement support */
        void (*kr_training_pre)(struct xgbe_prv_data *);
        void (*kr_training_post)(struct xgbe_prv_data *);
@@ -989,6 +994,7 @@ struct xgbe_version_data {
        unsigned int irq_reissue_support;
        unsigned int tx_desc_prefetch;
        unsigned int rx_desc_prefetch;
+       unsigned int an_cdr_workaround;
 };
 
 struct xgbe_vxlan_data {
@@ -1257,6 +1263,9 @@ struct xgbe_prv_data {
        unsigned int debugfs_xprop_reg;
 
        unsigned int debugfs_xi2c_reg;
+
+       bool debugfs_an_cdr_workaround;
+       bool debugfs_an_cdr_track_early;
 };
 
 /* Function prototypes*/
index 32f6d2e24d6692ce00ffe077f0c0bc970c90e825..1a1a6380c128c4522b330907cc16258f0e012189 100644 (file)
@@ -95,6 +95,7 @@ void aq_nic_cfg_start(struct aq_nic_s *self)
        /*rss rings */
        cfg->vecs = min(cfg->aq_hw_caps->vecs, AQ_CFG_VECS_DEF);
        cfg->vecs = min(cfg->vecs, num_online_cpus());
+       cfg->vecs = min(cfg->vecs, self->irqvecs);
        /* cfg->vecs should be power of 2 for RSS */
        if (cfg->vecs >= 8U)
                cfg->vecs = 8U;
@@ -246,6 +247,8 @@ void aq_nic_ndev_init(struct aq_nic_s *self)
 
        self->ndev->hw_features |= aq_hw_caps->hw_features;
        self->ndev->features = aq_hw_caps->hw_features;
+       self->ndev->vlan_features |= NETIF_F_HW_CSUM | NETIF_F_RXCSUM |
+                                    NETIF_F_RXHASH | NETIF_F_SG | NETIF_F_LRO;
        self->ndev->priv_flags = aq_hw_caps->hw_priv_flags;
        self->ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
 
index 219b550d16650bd6b205fb6e10855627a0fd277b..faa533a0ec474116b7d84369c947a7f0f1bfa853 100644 (file)
@@ -80,6 +80,7 @@ struct aq_nic_s {
 
        struct pci_dev *pdev;
        unsigned int msix_entry_mask;
+       u32 irqvecs;
 };
 
 static inline struct device *aq_nic_get_dev(struct aq_nic_s *self)
index ecc6306f940f5d9f975d9cd422114f0be05c3435..a50e08bb4748386f999b1898b8207d973ac3508b 100644 (file)
@@ -267,16 +267,16 @@ static int aq_pci_probe(struct pci_dev *pdev,
        numvecs = min(numvecs, num_online_cpus());
        /*enable interrupts */
 #if !AQ_CFG_FORCE_LEGACY_INT
-       err = pci_alloc_irq_vectors(self->pdev, numvecs, numvecs,
-                                   PCI_IRQ_MSIX);
-
-       if (err < 0) {
-               err = pci_alloc_irq_vectors(self->pdev, 1, 1,
-                                           PCI_IRQ_MSI | PCI_IRQ_LEGACY);
-               if (err < 0)
-                       goto err_hwinit;
+       numvecs = pci_alloc_irq_vectors(self->pdev, 1, numvecs,
+                                       PCI_IRQ_MSIX | PCI_IRQ_MSI |
+                                       PCI_IRQ_LEGACY);
+
+       if (numvecs < 0) {
+               err = numvecs;
+               goto err_hwinit;
        }
 #endif
+       self->irqvecs = numvecs;
 
        /* net device init */
        aq_nic_cfg_start(self);
@@ -298,9 +298,9 @@ static int aq_pci_probe(struct pci_dev *pdev,
        kfree(self->aq_hw);
 err_ioremap:
        free_netdev(ndev);
-err_pci_func:
-       pci_release_regions(pdev);
 err_ndev:
+       pci_release_regions(pdev);
+err_pci_func:
        pci_disable_device(pdev);
        return err;
 }
index f9a3c1a76d5db4cf7efcdedbc79c77afe33ac98a..d5fca2e5a9bc34ad6edfa295e378dfe12078c0e5 100644 (file)
@@ -654,7 +654,7 @@ static int bcm_sysport_set_coalesce(struct net_device *dev,
        pkts = priv->rx_max_coalesced_frames;
 
        if (ec->use_adaptive_rx_coalesce && !priv->dim.use_dim) {
-               moder = net_dim_get_def_profile(priv->dim.dim.mode);
+               moder = net_dim_get_def_rx_moderation(priv->dim.dim.mode);
                usecs = moder.usec;
                pkts = moder.pkts;
        }
@@ -1064,7 +1064,7 @@ static void bcm_sysport_dim_work(struct work_struct *work)
        struct bcm_sysport_priv *priv =
                        container_of(ndim, struct bcm_sysport_priv, dim);
        struct net_dim_cq_moder cur_profile =
-                               net_dim_get_profile(dim->mode, dim->profile_ix);
+                       net_dim_get_rx_moderation(dim->mode, dim->profile_ix);
 
        bcm_sysport_set_rx_coalesce(priv, cur_profile.usec, cur_profile.pkts);
        dim->state = NET_DIM_START_MEASURE;
@@ -1437,7 +1437,7 @@ static void bcm_sysport_init_rx_coalesce(struct bcm_sysport_priv *priv)
 
        /* If DIM was enabled, re-apply default parameters */
        if (dim->use_dim) {
-               moder = net_dim_get_def_profile(dim->dim.mode);
+               moder = net_dim_get_def_rx_moderation(dim->dim.mode);
                usecs = moder.usec;
                pkts = moder.pkts;
        }
@@ -2144,14 +2144,21 @@ static const struct net_device_ops bcm_sysport_netdev_ops = {
        .ndo_select_queue       = bcm_sysport_select_queue,
 };
 
-static int bcm_sysport_map_queues(struct net_device *dev,
+static int bcm_sysport_map_queues(struct notifier_block *nb,
                                  struct dsa_notifier_register_info *info)
 {
-       struct bcm_sysport_priv *priv = netdev_priv(dev);
        struct bcm_sysport_tx_ring *ring;
+       struct bcm_sysport_priv *priv;
        struct net_device *slave_dev;
        unsigned int num_tx_queues;
        unsigned int q, start, port;
+       struct net_device *dev;
+
+       priv = container_of(nb, struct bcm_sysport_priv, dsa_notifier);
+       if (priv->netdev != info->master)
+               return 0;
+
+       dev = info->master;
 
        /* We can't be setting up queue inspection for non directly attached
         * switches
@@ -2174,11 +2181,12 @@ static int bcm_sysport_map_queues(struct net_device *dev,
        if (priv->is_lite)
                netif_set_real_num_tx_queues(slave_dev,
                                             slave_dev->num_tx_queues / 2);
+
        num_tx_queues = slave_dev->real_num_tx_queues;
 
        if (priv->per_port_num_tx_queues &&
            priv->per_port_num_tx_queues != num_tx_queues)
-               netdev_warn(slave_dev, "asymetric number of per-port queues\n");
+               netdev_warn(slave_dev, "asymmetric number of per-port queues\n");
 
        priv->per_port_num_tx_queues = num_tx_queues;
 
@@ -2201,7 +2209,7 @@ static int bcm_sysport_map_queues(struct net_device *dev,
        return 0;
 }
 
-static int bcm_sysport_dsa_notifier(struct notifier_block *unused,
+static int bcm_sysport_dsa_notifier(struct notifier_block *nb,
                                    unsigned long event, void *ptr)
 {
        struct dsa_notifier_register_info *info;
@@ -2211,7 +2219,7 @@ static int bcm_sysport_dsa_notifier(struct notifier_block *unused,
 
        info = ptr;
 
-       return notifier_from_errno(bcm_sysport_map_queues(info->master, info));
+       return notifier_from_errno(bcm_sysport_map_queues(nb, info));
 }
 
 #define REV_FMT        "v%2x.%02x"
index 7c560d545c038100a7af3492cb816ac03348d278..5a779b19d14931ba2c9e5cb33e37f03722f09376 100644 (file)
@@ -2,3 +2,4 @@ obj-$(CONFIG_BNXT) += bnxt_en.o
 
 bnxt_en-y := bnxt.o bnxt_sriov.o bnxt_ethtool.o bnxt_dcb.o bnxt_ulp.o bnxt_xdp.o bnxt_vfr.o bnxt_devlink.o bnxt_dim.o
 bnxt_en-$(CONFIG_BNXT_FLOWER_OFFLOAD) += bnxt_tc.o
+bnxt_en-$(CONFIG_DEBUG_FS) += bnxt_debugfs.o
index f83769d8047b4d383adebf839bcd88775496cada..dfa0839f665613bc9ec0cf20e94df826708677bd 100644 (file)
@@ -62,6 +62,7 @@
 #include "bnxt_vfr.h"
 #include "bnxt_tc.h"
 #include "bnxt_devlink.h"
+#include "bnxt_debugfs.h"
 
 #define BNXT_TX_TIMEOUT                (5 * HZ)
 
@@ -2383,6 +2384,7 @@ static int bnxt_alloc_tx_rings(struct bnxt *bp)
        for (i = 0, j = 0; i < bp->tx_nr_rings; i++) {
                struct bnxt_tx_ring_info *txr = &bp->tx_ring[i];
                struct bnxt_ring_struct *ring;
+               u8 qidx;
 
                ring = &txr->tx_ring_struct;
 
@@ -2411,7 +2413,8 @@ static int bnxt_alloc_tx_rings(struct bnxt *bp)
 
                        memset(txr->tx_push, 0, sizeof(struct tx_push_bd));
                }
-               ring->queue_id = bp->q_info[j].queue_id;
+               qidx = bp->tc_to_qidx[j];
+               ring->queue_id = bp->q_info[qidx].queue_id;
                if (i < bp->tx_nr_rings_xdp)
                        continue;
                if (i % bp->tx_nr_rings_per_tc == (bp->tx_nr_rings_per_tc - 1))
@@ -3493,15 +3496,29 @@ static int bnxt_hwrm_do_send_msg(struct bnxt *bp, void *msg, u32 msg_len,
 
        if (!timeout)
                timeout = DFLT_HWRM_CMD_TIMEOUT;
+       /* convert timeout to usec */
+       timeout *= 1000;
 
        i = 0;
-       tmo_count = timeout * 40;
+       /* Short timeout for the first few iterations:
+        * number of loops = number of loops for short timeout +
+        * number of loops for standard timeout.
+        */
+       tmo_count = HWRM_SHORT_TIMEOUT_COUNTER;
+       timeout = timeout - HWRM_SHORT_MIN_TIMEOUT * HWRM_SHORT_TIMEOUT_COUNTER;
+       tmo_count += DIV_ROUND_UP(timeout, HWRM_MIN_TIMEOUT);
        resp_len = bp->hwrm_cmd_resp_addr + HWRM_RESP_LEN_OFFSET;
        if (intr_process) {
                /* Wait until hwrm response cmpl interrupt is processed */
                while (bp->hwrm_intr_seq_id != HWRM_SEQ_ID_INVALID &&
                       i++ < tmo_count) {
-                       usleep_range(25, 40);
+                       /* on first few passes, just barely sleep */
+                       if (i < HWRM_SHORT_TIMEOUT_COUNTER)
+                               usleep_range(HWRM_SHORT_MIN_TIMEOUT,
+                                            HWRM_SHORT_MAX_TIMEOUT);
+                       else
+                               usleep_range(HWRM_MIN_TIMEOUT,
+                                            HWRM_MAX_TIMEOUT);
                }
 
                if (bp->hwrm_intr_seq_id != HWRM_SEQ_ID_INVALID) {
@@ -3513,25 +3530,34 @@ static int bnxt_hwrm_do_send_msg(struct bnxt *bp, void *msg, u32 msg_len,
                      HWRM_RESP_LEN_SFT;
                valid = bp->hwrm_cmd_resp_addr + len - 1;
        } else {
+               int j;
+
                /* Check if response len is updated */
                for (i = 0; i < tmo_count; i++) {
                        len = (le32_to_cpu(*resp_len) & HWRM_RESP_LEN_MASK) >>
                              HWRM_RESP_LEN_SFT;
                        if (len)
                                break;
-                       usleep_range(25, 40);
+                       /* on first few passes, just barely sleep */
+                       if (i < DFLT_HWRM_CMD_TIMEOUT)
+                               usleep_range(HWRM_SHORT_MIN_TIMEOUT,
+                                            HWRM_SHORT_MAX_TIMEOUT);
+                       else
+                               usleep_range(HWRM_MIN_TIMEOUT,
+                                            HWRM_MAX_TIMEOUT);
                }
 
                if (i >= tmo_count) {
                        netdev_err(bp->dev, "Error (timeout: %d) msg {0x%x 0x%x} len:%d\n",
-                                  timeout, le16_to_cpu(req->req_type),
+                                  HWRM_TOTAL_TIMEOUT(i),
+                                  le16_to_cpu(req->req_type),
                                   le16_to_cpu(req->seq_id), len);
                        return -1;
                }
 
                /* Last byte of resp contains valid bit */
                valid = bp->hwrm_cmd_resp_addr + len - 1;
-               for (i = 0; i < 5; i++) {
+               for (j = 0; j < HWRM_VALID_BIT_DELAY_USEC; j++) {
                        /* make sure we read from updated DMA memory */
                        dma_rmb();
                        if (*valid)
@@ -3539,9 +3565,10 @@ static int bnxt_hwrm_do_send_msg(struct bnxt *bp, void *msg, u32 msg_len,
                        udelay(1);
                }
 
-               if (i >= 5) {
+               if (j >= HWRM_VALID_BIT_DELAY_USEC) {
                        netdev_err(bp->dev, "Error (timeout: %d) msg {0x%x 0x%x} len:%d v:%d\n",
-                                  timeout, le16_to_cpu(req->req_type),
+                                  HWRM_TOTAL_TIMEOUT(i),
+                                  le16_to_cpu(req->req_type),
                                   le16_to_cpu(req->seq_id), len, *valid);
                        return -1;
                }
@@ -4334,26 +4361,9 @@ static int hwrm_ring_alloc_send_msg(struct bnxt *bp,
        mutex_unlock(&bp->hwrm_cmd_lock);
 
        if (rc || err) {
-               switch (ring_type) {
-               case RING_FREE_REQ_RING_TYPE_L2_CMPL:
-                       netdev_err(bp->dev, "hwrm_ring_alloc cp failed. rc:%x err:%x\n",
-                                  rc, err);
-                       return -1;
-
-               case RING_FREE_REQ_RING_TYPE_RX:
-                       netdev_err(bp->dev, "hwrm_ring_alloc rx failed. rc:%x err:%x\n",
-                                  rc, err);
-                       return -1;
-
-               case RING_FREE_REQ_RING_TYPE_TX:
-                       netdev_err(bp->dev, "hwrm_ring_alloc tx failed. rc:%x err:%x\n",
-                                  rc, err);
-                       return -1;
-
-               default:
-                       netdev_err(bp->dev, "Invalid ring\n");
-                       return -1;
-               }
+               netdev_err(bp->dev, "hwrm_ring_alloc type %d failed. rc:%x err:%x\n",
+                          ring_type, rc, err);
+               return -EIO;
        }
        ring->fw_ring_id = ring_id;
        return rc;
@@ -4477,23 +4487,9 @@ static int hwrm_ring_free_send_msg(struct bnxt *bp,
        mutex_unlock(&bp->hwrm_cmd_lock);
 
        if (rc || error_code) {
-               switch (ring_type) {
-               case RING_FREE_REQ_RING_TYPE_L2_CMPL:
-                       netdev_err(bp->dev, "hwrm_ring_free cp failed. rc:%d\n",
-                                  rc);
-                       return rc;
-               case RING_FREE_REQ_RING_TYPE_RX:
-                       netdev_err(bp->dev, "hwrm_ring_free rx failed. rc:%d\n",
-                                  rc);
-                       return rc;
-               case RING_FREE_REQ_RING_TYPE_TX:
-                       netdev_err(bp->dev, "hwrm_ring_free tx failed. rc:%d\n",
-                                  rc);
-                       return rc;
-               default:
-                       netdev_err(bp->dev, "Invalid ring\n");
-                       return -1;
-               }
+               netdev_err(bp->dev, "hwrm_ring_free type %d failed. rc:%x err:%x\n",
+                          ring_type, rc, error_code);
+               return -EIO;
        }
        return 0;
 }
@@ -4721,6 +4717,10 @@ bnxt_hwrm_reserve_vf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
 
        __bnxt_hwrm_reserve_vf_rings(bp, &req, tx_rings, rx_rings, ring_grps,
                                     cp_rings, vnics);
+       req.enables |= cpu_to_le32(FUNC_VF_CFG_REQ_ENABLES_NUM_RSSCOS_CTXS |
+                                  FUNC_VF_CFG_REQ_ENABLES_NUM_L2_CTXS);
+       req.num_rsscos_ctxs = cpu_to_le16(BNXT_VF_MAX_RSS_CTX);
+       req.num_l2_ctxs = cpu_to_le16(BNXT_VF_MAX_L2_CTX);
        rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
        if (rc)
                return -ENOMEM;
@@ -5309,6 +5309,7 @@ static int bnxt_hwrm_queue_qportcfg(struct bnxt *bp)
        for (i = 0; i < bp->max_tc; i++) {
                bp->q_info[i].queue_id = *qptr++;
                bp->q_info[i].queue_profile = *qptr++;
+               bp->tc_to_qidx[i] = i;
        }
 
 qportcfg_exit:
@@ -5376,7 +5377,8 @@ int bnxt_hwrm_fw_set_time(struct bnxt *bp)
        struct tm tm;
        time64_t now = ktime_get_real_seconds();
 
-       if (bp->hwrm_spec_code < 0x10400)
+       if ((BNXT_VF(bp) && bp->hwrm_spec_code < 0x10901) ||
+           bp->hwrm_spec_code < 0x10400)
                return -EOPNOTSUPP;
 
        time64_to_tm(now, 0, &tm);
@@ -5958,6 +5960,9 @@ static int bnxt_init_msix(struct bnxt *bp)
        if (total_vecs > max)
                total_vecs = max;
 
+       if (!total_vecs)
+               return 0;
+
        msix_ent = kcalloc(total_vecs, sizeof(struct msix_entry), GFP_KERNEL);
        if (!msix_ent)
                return -ENOMEM;
@@ -6457,6 +6462,9 @@ static int bnxt_update_link(struct bnxt *bp, bool chng_link_state)
        }
        mutex_unlock(&bp->hwrm_cmd_lock);
 
+       if (!BNXT_SINGLE_PF(bp))
+               return 0;
+
        diff = link_info->support_auto_speeds ^ link_info->advertising;
        if ((link_info->support_auto_speeds | diff) !=
            link_info->support_auto_speeds) {
@@ -6843,6 +6851,8 @@ static void bnxt_preset_reg_win(struct bnxt *bp)
        }
 }
 
+static int bnxt_init_dflt_ring_mode(struct bnxt *bp);
+
 static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init)
 {
        int rc = 0;
@@ -6850,6 +6860,12 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init)
        bnxt_preset_reg_win(bp);
        netif_carrier_off(bp->dev);
        if (irq_re_init) {
+               /* Reserve rings now if none were reserved at driver probe. */
+               rc = bnxt_init_dflt_ring_mode(bp);
+               if (rc) {
+                       netdev_err(bp->dev, "Failed to reserve default rings at open\n");
+                       return rc;
+               }
                rc = bnxt_reserve_rings(bp);
                if (rc)
                        return rc;
@@ -6877,6 +6893,7 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init)
        }
 
        bnxt_enable_napi(bp);
+       bnxt_debug_dev_init(bp);
 
        rc = bnxt_init_nic(bp, irq_re_init);
        if (rc) {
@@ -6909,6 +6926,7 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init)
        return 0;
 
 open_err:
+       bnxt_debug_dev_exit(bp);
        bnxt_disable_napi(bp);
        bnxt_del_napi(bp);
 
@@ -7002,6 +7020,7 @@ static void __bnxt_close_nic(struct bnxt *bp, bool irq_re_init,
 
        /* TODO CHIMP_FW: Link/PHY related cleanup if (link_re_init) */
 
+       bnxt_debug_dev_exit(bp);
        bnxt_disable_napi(bp);
        del_timer_sync(&bp->timer);
        bnxt_free_skbs(bp);
@@ -7279,6 +7298,25 @@ static int bnxt_cfg_rx_mode(struct bnxt *bp)
        return rc;
 }
 
+static bool bnxt_can_reserve_rings(struct bnxt *bp)
+{
+#ifdef CONFIG_BNXT_SRIOV
+       if ((bp->flags & BNXT_FLAG_NEW_RM) && BNXT_VF(bp)) {
+               struct bnxt_hw_resc *hw_resc = &bp->hw_resc;
+
+               /* No minimum rings were provisioned by the PF.  Don't
+                * reserve rings by default when device is down.
+                */
+               if (hw_resc->min_tx_rings || hw_resc->resv_tx_rings)
+                       return true;
+
+               if (!netif_running(bp->dev))
+                       return false;
+       }
+#endif
+       return true;
+}
+
 /* If the chip and firmware supports RFS */
 static bool bnxt_rfs_supported(struct bnxt *bp)
 {
@@ -7295,7 +7333,7 @@ static bool bnxt_rfs_capable(struct bnxt *bp)
 #ifdef CONFIG_RFS_ACCEL
        int vnics, max_vnics, max_rss_ctxs;
 
-       if (!(bp->flags & BNXT_FLAG_MSIX_CAP))
+       if (!(bp->flags & BNXT_FLAG_MSIX_CAP) || !bnxt_can_reserve_rings(bp))
                return false;
 
        vnics = 1 + bp->rx_nr_rings;
@@ -7729,7 +7767,7 @@ static void bnxt_init_dflt_coal(struct bnxt *bp)
        coal->coal_bufs = 30;
        coal->coal_ticks_irq = 1;
        coal->coal_bufs_irq = 2;
-       coal->idle_thresh = 25;
+       coal->idle_thresh = 50;
        coal->bufs_per_record = 2;
        coal->budget = 64;              /* NAPI budget */
 
@@ -8529,6 +8567,9 @@ static int bnxt_set_dflt_rings(struct bnxt *bp, bool sh)
 {
        int dflt_rings, max_rx_rings, max_tx_rings, rc;
 
+       if (!bnxt_can_reserve_rings(bp))
+               return 0;
+
        if (sh)
                bp->flags |= BNXT_FLAG_SHARED_RINGS;
        dflt_rings = netif_get_num_default_rss_queues();
@@ -8574,6 +8615,29 @@ static int bnxt_set_dflt_rings(struct bnxt *bp, bool sh)
        return rc;
 }
 
+static int bnxt_init_dflt_ring_mode(struct bnxt *bp)
+{
+       int rc;
+
+       if (bp->tx_nr_rings)
+               return 0;
+
+       rc = bnxt_set_dflt_rings(bp, true);
+       if (rc) {
+               netdev_err(bp->dev, "Not enough rings available.\n");
+               return rc;
+       }
+       rc = bnxt_init_int_mode(bp);
+       if (rc)
+               return rc;
+       bp->tx_nr_rings_per_tc = bp->tx_nr_rings;
+       if (bnxt_rfs_supported(bp) && bnxt_rfs_capable(bp)) {
+               bp->flags |= BNXT_FLAG_RFS;
+               bp->dev->features |= NETIF_F_NTUPLE;
+       }
+       return 0;
+}
+
 int bnxt_restore_pf_fw_resources(struct bnxt *bp)
 {
        int rc;
@@ -8614,8 +8678,8 @@ static int bnxt_init_mac_addr(struct bnxt *bp)
                        memcpy(bp->dev->dev_addr, vf->mac_addr, ETH_ALEN);
                } else {
                        eth_hw_addr_random(bp->dev);
-                       rc = bnxt_approve_mac(bp, bp->dev->dev_addr);
                }
+               rc = bnxt_approve_mac(bp, bp->dev->dev_addr);
 #endif
        }
        return rc;
@@ -9078,6 +9142,7 @@ static struct pci_driver bnxt_pci_driver = {
 
 static int __init bnxt_init(void)
 {
+       bnxt_debug_init();
        return pci_register_driver(&bnxt_pci_driver);
 }
 
@@ -9086,6 +9151,7 @@ static void __exit bnxt_exit(void)
        pci_unregister_driver(&bnxt_pci_driver);
        if (bnxt_pf_wq)
                destroy_workqueue(bnxt_pf_wq);
+       bnxt_debug_exit();
 }
 
 module_init(bnxt_init);
index 3d55d3b568654494188b14928b40fb2b5929cf84..9b14eb610b9f653b61092d74b3ab9257a84383d9 100644 (file)
@@ -532,6 +532,19 @@ struct rx_tpa_end_cmp_ext {
 #define BNXT_HWRM_REQ_MAX_SIZE         128
 #define BNXT_HWRM_REQS_PER_PAGE                (BNXT_PAGE_SIZE /       \
                                         BNXT_HWRM_REQ_MAX_SIZE)
+#define HWRM_SHORT_MIN_TIMEOUT         3
+#define HWRM_SHORT_MAX_TIMEOUT         10
+#define HWRM_SHORT_TIMEOUT_COUNTER     5
+
+#define HWRM_MIN_TIMEOUT               25
+#define HWRM_MAX_TIMEOUT               40
+
+#define HWRM_TOTAL_TIMEOUT(n)  (((n) <= HWRM_SHORT_TIMEOUT_COUNTER) ?  \
+       ((n) * HWRM_SHORT_MIN_TIMEOUT) :                                \
+       (HWRM_SHORT_TIMEOUT_COUNTER * HWRM_SHORT_MIN_TIMEOUT +          \
+        ((n) - HWRM_SHORT_TIMEOUT_COUNTER) * HWRM_MIN_TIMEOUT))
+
+#define HWRM_VALID_BIT_DELAY_USEC      20
 
 #define BNXT_RX_EVENT  1
 #define BNXT_AGG_EVENT 2
@@ -1242,6 +1255,7 @@ struct bnxt {
        u8                      max_tc;
        u8                      max_lltc;       /* lossless TCs */
        struct bnxt_queue_info  q_info[BNXT_MAX_QUEUE];
+       u8                      tc_to_qidx[BNXT_MAX_QUEUE];
 
        unsigned int            current_interval;
 #define BNXT_TIMER_INTERVAL    HZ
@@ -1384,6 +1398,8 @@ struct bnxt {
        u16                     *cfa_code_map; /* cfa_code -> vf_idx map */
        u8                      switch_id[8];
        struct bnxt_tc_info     *tc_info;
+       struct dentry           *debugfs_pdev;
+       struct dentry           *debugfs_dim;
 };
 
 #define BNXT_RX_STATS_OFFSET(counter)                  \
@@ -1398,8 +1414,7 @@ struct bnxt {
 
 #define I2C_DEV_ADDR_A0                                0xa0
 #define I2C_DEV_ADDR_A2                                0xa2
-#define SFP_EEPROM_SFF_8472_COMP_ADDR          0x5e
-#define SFP_EEPROM_SFF_8472_COMP_SIZE          1
+#define SFF_DIAG_SUPPORT_OFFSET                        0x5c
 #define SFF_MODULE_ID_SFP                      0x3
 #define SFF_MODULE_ID_QSFP                     0xc
 #define SFF_MODULE_ID_QSFP_PLUS                        0xd
index 3c746f2d9ed8137f84c01151000c127259e937a9..d5bc72cecde3287e37fba1365b1f54104853738d 100644 (file)
 #include "bnxt_dcb.h"
 
 #ifdef CONFIG_BNXT_DCB
+static int bnxt_queue_to_tc(struct bnxt *bp, u8 queue_id)
+{
+       int i, j;
+
+       for (i = 0; i < bp->max_tc; i++) {
+               if (bp->q_info[i].queue_id == queue_id) {
+                       for (j = 0; j < bp->max_tc; j++) {
+                               if (bp->tc_to_qidx[j] == i)
+                                       return j;
+                       }
+               }
+       }
+       return -EINVAL;
+}
+
 static int bnxt_hwrm_queue_pri2cos_cfg(struct bnxt *bp, struct ieee_ets *ets)
 {
        struct hwrm_queue_pri2cos_cfg_input req = {0};
@@ -33,10 +48,13 @@ static int bnxt_hwrm_queue_pri2cos_cfg(struct bnxt *bp, struct ieee_ets *ets)
 
        pri2cos = &req.pri0_cos_queue_id;
        for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+               u8 qidx;
+
                req.enables |= cpu_to_le32(
                        QUEUE_PRI2COS_CFG_REQ_ENABLES_PRI0_COS_QUEUE_ID << i);
 
-               pri2cos[i] = bp->q_info[ets->prio_tc[i]].queue_id;
+               qidx = bp->tc_to_qidx[ets->prio_tc[i]];
+               pri2cos[i] = bp->q_info[qidx].queue_id;
        }
        rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
        return rc;
@@ -55,17 +73,15 @@ static int bnxt_hwrm_queue_pri2cos_qcfg(struct bnxt *bp, struct ieee_ets *ets)
        rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
        if (!rc) {
                u8 *pri2cos = &resp->pri0_cos_queue_id;
-               int i, j;
+               int i;
 
                for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
                        u8 queue_id = pri2cos[i];
+                       int tc;
 
-                       for (j = 0; j < bp->max_tc; j++) {
-                               if (bp->q_info[j].queue_id == queue_id) {
-                                       ets->prio_tc[i] = j;
-                                       break;
-                               }
-                       }
+                       tc = bnxt_queue_to_tc(bp, queue_id);
+                       if (tc >= 0)
+                               ets->prio_tc[i] = tc;
                }
        }
        mutex_unlock(&bp->hwrm_cmd_lock);
@@ -81,13 +97,15 @@ static int bnxt_hwrm_queue_cos2bw_cfg(struct bnxt *bp, struct ieee_ets *ets,
        void *data;
 
        bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_QUEUE_COS2BW_CFG, -1, -1);
-       data = &req.unused_0;
-       for (i = 0; i < max_tc; i++, data += sizeof(cos2bw) - 4) {
+       for (i = 0; i < max_tc; i++) {
+               u8 qidx;
+
                req.enables |= cpu_to_le32(
                        QUEUE_COS2BW_CFG_REQ_ENABLES_COS_QUEUE_ID0_VALID << i);
 
                memset(&cos2bw, 0, sizeof(cos2bw));
-               cos2bw.queue_id = bp->q_info[i].queue_id;
+               qidx = bp->tc_to_qidx[i];
+               cos2bw.queue_id = bp->q_info[qidx].queue_id;
                if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_STRICT) {
                        cos2bw.tsa =
                                QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_TSA_ASSIGN_SP;
@@ -103,8 +121,9 @@ static int bnxt_hwrm_queue_cos2bw_cfg(struct bnxt *bp, struct ieee_ets *ets,
                                cpu_to_le32((ets->tc_tx_bw[i] * 100) |
                                            BW_VALUE_UNIT_PERCENT1_100);
                }
+               data = &req.unused_0 + qidx * (sizeof(cos2bw) - 4);
                memcpy(data, &cos2bw.queue_id, sizeof(cos2bw) - 4);
-               if (i == 0) {
+               if (qidx == 0) {
                        req.queue_id0 = cos2bw.queue_id;
                        req.unused_0 = 0;
                }
@@ -132,66 +151,81 @@ static int bnxt_hwrm_queue_cos2bw_qcfg(struct bnxt *bp, struct ieee_ets *ets)
 
        data = &resp->queue_id0 + offsetof(struct bnxt_cos2bw_cfg, queue_id);
        for (i = 0; i < bp->max_tc; i++, data += sizeof(cos2bw) - 4) {
-               int j;
+               int tc;
 
                memcpy(&cos2bw.queue_id, data, sizeof(cos2bw) - 4);
                if (i == 0)
                        cos2bw.queue_id = resp->queue_id0;
 
-               for (j = 0; j < bp->max_tc; j++) {
-                       if (bp->q_info[j].queue_id != cos2bw.queue_id)
-                               continue;
-                       if (cos2bw.tsa ==
-                           QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_TSA_ASSIGN_SP) {
-                               ets->tc_tsa[j] = IEEE_8021QAZ_TSA_STRICT;
-                       } else {
-                               ets->tc_tsa[j] = IEEE_8021QAZ_TSA_ETS;
-                               ets->tc_tx_bw[j] = cos2bw.bw_weight;
-                       }
+               tc = bnxt_queue_to_tc(bp, cos2bw.queue_id);
+               if (tc < 0)
+                       continue;
+
+               if (cos2bw.tsa ==
+                   QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_TSA_ASSIGN_SP) {
+                       ets->tc_tsa[tc] = IEEE_8021QAZ_TSA_STRICT;
+               } else {
+                       ets->tc_tsa[tc] = IEEE_8021QAZ_TSA_ETS;
+                       ets->tc_tx_bw[tc] = cos2bw.bw_weight;
                }
        }
        mutex_unlock(&bp->hwrm_cmd_lock);
        return 0;
 }
 
-static int bnxt_hwrm_queue_cfg(struct bnxt *bp, unsigned int lltc_mask)
+static int bnxt_queue_remap(struct bnxt *bp, unsigned int lltc_mask)
 {
-       struct hwrm_queue_cfg_input req = {0};
-       int i;
+       unsigned long qmap = 0;
+       int max = bp->max_tc;
+       int i, j, rc;
 
-       if (netif_running(bp->dev))
-               bnxt_tx_disable(bp);
+       /* Assign lossless TCs first */
+       for (i = 0, j = 0; i < max; ) {
+               if (lltc_mask & (1 << i)) {
+                       if (BNXT_LLQ(bp->q_info[j].queue_profile)) {
+                               bp->tc_to_qidx[i] = j;
+                               __set_bit(j, &qmap);
+                               i++;
+                       }
+                       j++;
+                       continue;
+               }
+               i++;
+       }
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_QUEUE_CFG, -1, -1);
-       req.flags = cpu_to_le32(QUEUE_CFG_REQ_FLAGS_PATH_BIDIR);
-       req.enables = cpu_to_le32(QUEUE_CFG_REQ_ENABLES_SERVICE_PROFILE);
+       for (i = 0, j = 0; i < max; i++) {
+               if (lltc_mask & (1 << i))
+                       continue;
+               j = find_next_zero_bit(&qmap, max, j);
+               bp->tc_to_qidx[i] = j;
+               __set_bit(j, &qmap);
+               j++;
+       }
 
-       /* Configure lossless queues to lossy first */
-       req.service_profile = QUEUE_CFG_REQ_SERVICE_PROFILE_LOSSY;
-       for (i = 0; i < bp->max_tc; i++) {
-               if (BNXT_LLQ(bp->q_info[i].queue_profile)) {
-                       req.queue_id = cpu_to_le32(bp->q_info[i].queue_id);
-                       hwrm_send_message(bp, &req, sizeof(req),
-                                         HWRM_CMD_TIMEOUT);
-                       bp->q_info[i].queue_profile =
-                               QUEUE_CFG_REQ_SERVICE_PROFILE_LOSSY;
+       if (netif_running(bp->dev)) {
+               bnxt_close_nic(bp, false, false);
+               rc = bnxt_open_nic(bp, false, false);
+               if (rc) {
+                       netdev_warn(bp->dev, "failed to open NIC, rc = %d\n", rc);
+                       return rc;
                }
        }
-
-       /* Now configure desired queues to lossless */
-       req.service_profile = QUEUE_CFG_REQ_SERVICE_PROFILE_LOSSLESS;
-       for (i = 0; i < bp->max_tc; i++) {
-               if (lltc_mask & (1 << i)) {
-                       req.queue_id = cpu_to_le32(bp->q_info[i].queue_id);
-                       hwrm_send_message(bp, &req, sizeof(req),
-                                         HWRM_CMD_TIMEOUT);
-                       bp->q_info[i].queue_profile =
-                               QUEUE_CFG_REQ_SERVICE_PROFILE_LOSSLESS;
+       if (bp->ieee_ets) {
+               int tc = netdev_get_num_tc(bp->dev);
+
+               if (!tc)
+                       tc = 1;
+               rc = bnxt_hwrm_queue_cos2bw_cfg(bp, bp->ieee_ets, tc);
+               if (rc) {
+                       netdev_warn(bp->dev, "failed to config BW, rc = %d\n", rc);
+                       return rc;
+               }
+               rc = bnxt_hwrm_queue_pri2cos_cfg(bp, bp->ieee_ets);
+               if (rc) {
+                       netdev_warn(bp->dev, "failed to config prio, rc = %d\n", rc);
+                       return rc;
                }
        }
-       if (netif_running(bp->dev))
-               bnxt_tx_enable(bp);
-
        return 0;
 }
 
@@ -201,7 +235,7 @@ static int bnxt_hwrm_queue_pfc_cfg(struct bnxt *bp, struct ieee_pfc *pfc)
        struct ieee_ets *my_ets = bp->ieee_ets;
        unsigned int tc_mask = 0, pri_mask = 0;
        u8 i, pri, lltc_count = 0;
-       bool need_q_recfg = false;
+       bool need_q_remap = false;
        int rc;
 
        if (!my_ets)
@@ -221,21 +255,25 @@ static int bnxt_hwrm_queue_pfc_cfg(struct bnxt *bp, struct ieee_pfc *pfc)
        if (lltc_count > bp->max_lltc)
                return -EINVAL;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_QUEUE_PFCENABLE_CFG, -1, -1);
-       req.flags = cpu_to_le32(pri_mask);
-       rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
-       if (rc)
-               return rc;
-
        for (i = 0; i < bp->max_tc; i++) {
                if (tc_mask & (1 << i)) {
-                       if (!BNXT_LLQ(bp->q_info[i].queue_profile))
-                               need_q_recfg = true;
+                       u8 qidx = bp->tc_to_qidx[i];
+
+                       if (!BNXT_LLQ(bp->q_info[qidx].queue_profile)) {
+                               need_q_remap = true;
+                               break;
+                       }
                }
        }
 
-       if (need_q_recfg)
-               rc = bnxt_hwrm_queue_cfg(bp, tc_mask);
+       if (need_q_remap)
+               rc = bnxt_queue_remap(bp, tc_mask);
+
+       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_QUEUE_PFCENABLE_CFG, -1, -1);
+       req.flags = cpu_to_le32(pri_mask);
+       rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       if (rc)
+               return rc;
 
        return rc;
 }
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_debugfs.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_debugfs.c
new file mode 100644 (file)
index 0000000..94e208e
--- /dev/null
@@ -0,0 +1,124 @@
+/* Broadcom NetXtreme-C/E network driver.
+ *
+ * Copyright (c) 2017-2018 Broadcom Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.
+ */
+
+#include <linux/debugfs.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include "bnxt_hsi.h"
+#include <linux/net_dim.h>
+#include "bnxt.h"
+#include "bnxt_debugfs.h"
+
+static struct dentry *bnxt_debug_mnt;
+
+static ssize_t debugfs_dim_read(struct file *filep,
+                               char __user *buffer,
+                               size_t count, loff_t *ppos)
+{
+       struct net_dim *dim = filep->private_data;
+       int len;
+       char *buf;
+
+       if (*ppos)
+               return 0;
+       if (!dim)
+               return -ENODEV;
+       buf = kasprintf(GFP_KERNEL,
+                       "state = %d\n" \
+                       "profile_ix = %d\n" \
+                       "mode = %d\n" \
+                       "tune_state = %d\n" \
+                       "steps_right = %d\n" \
+                       "steps_left = %d\n" \
+                       "tired = %d\n",
+                       dim->state,
+                       dim->profile_ix,
+                       dim->mode,
+                       dim->tune_state,
+                       dim->steps_right,
+                       dim->steps_left,
+                       dim->tired);
+       if (!buf)
+               return -ENOMEM;
+       if (count < strlen(buf)) {
+               kfree(buf);
+               return -ENOSPC;
+       }
+       len = simple_read_from_buffer(buffer, count, ppos, buf, strlen(buf));
+       kfree(buf);
+       return len;
+}
+
+static const struct file_operations debugfs_dim_fops = {
+       .owner = THIS_MODULE,
+       .open = simple_open,
+       .read = debugfs_dim_read,
+};
+
+static struct dentry *debugfs_dim_ring_init(struct net_dim *dim, int ring_idx,
+                                           struct dentry *dd)
+{
+       static char qname[16];
+
+       snprintf(qname, 10, "%d", ring_idx);
+       return debugfs_create_file(qname, 0600, dd,
+                                  dim, &debugfs_dim_fops);
+}
+
+void bnxt_debug_dev_init(struct bnxt *bp)
+{
+       const char *pname = pci_name(bp->pdev);
+       struct dentry *pdevf;
+       int i;
+
+       bp->debugfs_pdev = debugfs_create_dir(pname, bnxt_debug_mnt);
+       if (bp->debugfs_pdev) {
+               pdevf = debugfs_create_dir("dim", bp->debugfs_pdev);
+               if (!pdevf) {
+                       pr_err("failed to create debugfs entry %s/dim\n",
+                              pname);
+                       return;
+               }
+               bp->debugfs_dim = pdevf;
+               /* create files for each rx ring */
+               for (i = 0; i < bp->cp_nr_rings; i++) {
+                       struct bnxt_cp_ring_info *cpr = &bp->bnapi[i]->cp_ring;
+
+                       if (cpr && bp->bnapi[i]->rx_ring) {
+                               pdevf = debugfs_dim_ring_init(&cpr->dim, i,
+                                                             bp->debugfs_dim);
+                               if (!pdevf)
+                                       pr_err("failed to create debugfs entry %s/dim/%d\n",
+                                              pname, i);
+                       }
+               }
+       } else {
+               pr_err("failed to create debugfs entry %s\n", pname);
+       }
+}
+
+void bnxt_debug_dev_exit(struct bnxt *bp)
+{
+       if (bp) {
+               debugfs_remove_recursive(bp->debugfs_pdev);
+               bp->debugfs_pdev = NULL;
+       }
+}
+
+void bnxt_debug_init(void)
+{
+       bnxt_debug_mnt = debugfs_create_dir("bnxt_en", NULL);
+       if (!bnxt_debug_mnt)
+               pr_err("failed to init bnxt_en debugfs\n");
+}
+
+void bnxt_debug_exit(void)
+{
+       debugfs_remove_recursive(bnxt_debug_mnt);
+}
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_debugfs.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_debugfs.h
new file mode 100644 (file)
index 0000000..d0bb488
--- /dev/null
@@ -0,0 +1,23 @@
+/* Broadcom NetXtreme-C/E network driver.
+ *
+ * Copyright (c) 2017-2018 Broadcom Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.
+ */
+
+#include "bnxt_hsi.h"
+#include "bnxt.h"
+
+#ifdef CONFIG_DEBUG_FS
+void bnxt_debug_init(void);
+void bnxt_debug_exit(void);
+void bnxt_debug_dev_init(struct bnxt *bp);
+void bnxt_debug_dev_exit(struct bnxt *bp);
+#else
+static inline void bnxt_debug_init(void) {}
+static inline void bnxt_debug_exit(void) {}
+static inline void bnxt_debug_dev_init(struct bnxt *bp) {}
+static inline void bnxt_debug_dev_exit(struct bnxt *bp) {}
+#endif
index 408dd190331ed10018787b55d9cf1dfe04b23eb9..afa97c8bb081c6999cfdacfa3d34cc78b40a928f 100644 (file)
@@ -21,11 +21,11 @@ void bnxt_dim_work(struct work_struct *work)
        struct bnxt_napi *bnapi = container_of(cpr,
                                               struct bnxt_napi,
                                               cp_ring);
-       struct net_dim_cq_moder cur_profile = net_dim_get_profile(dim->mode,
-                                                                 dim->profile_ix);
+       struct net_dim_cq_moder cur_moder =
+               net_dim_get_rx_moderation(dim->mode, dim->profile_ix);
 
-       cpr->rx_ring_coal.coal_ticks = cur_profile.usec;
-       cpr->rx_ring_coal.coal_bufs = cur_profile.pkts;
+       cpr->rx_ring_coal.coal_ticks = cur_moder.usec;
+       cpr->rx_ring_coal.coal_bufs = cur_moder.pkts;
 
        bnxt_hwrm_set_ring_coal(bnapi->bp, bnapi);
        dim->state = NET_DIM_START_MEASURE;
index 8ba14ae00e8ffcbc0849ad2080c4efca6893ff53..7270c8b0cef38cd1e2165ce94503fc612030de9d 100644 (file)
@@ -140,6 +140,19 @@ static int bnxt_set_coalesce(struct net_device *dev,
 #define BNXT_RX_STATS_EXT_ENTRY(counter)       \
        { BNXT_RX_STATS_EXT_OFFSET(counter), __stringify(counter) }
 
+enum {
+       RX_TOTAL_DISCARDS,
+       TX_TOTAL_DISCARDS,
+};
+
+static struct {
+       u64                     counter;
+       char                    string[ETH_GSTRING_LEN];
+} bnxt_sw_func_stats[] = {
+       {0, "rx_total_discard_pkts"},
+       {0, "tx_total_discard_pkts"},
+};
+
 static const struct {
        long offset;
        char string[ETH_GSTRING_LEN];
@@ -237,6 +250,7 @@ static const struct {
        BNXT_RX_STATS_EXT_ENTRY(resume_roce_pause_events),
 };
 
+#define BNXT_NUM_SW_FUNC_STATS ARRAY_SIZE(bnxt_sw_func_stats)
 #define BNXT_NUM_PORT_STATS ARRAY_SIZE(bnxt_port_stats_arr)
 #define BNXT_NUM_PORT_STATS_EXT ARRAY_SIZE(bnxt_port_stats_ext_arr)
 
@@ -244,6 +258,8 @@ static int bnxt_get_num_stats(struct bnxt *bp)
 {
        int num_stats = BNXT_NUM_STATS * bp->cp_nr_rings;
 
+       num_stats += BNXT_NUM_SW_FUNC_STATS;
+
        if (bp->flags & BNXT_FLAG_PORT_STATS)
                num_stats += BNXT_NUM_PORT_STATS;
 
@@ -279,6 +295,9 @@ static void bnxt_get_ethtool_stats(struct net_device *dev,
        if (!bp->bnapi)
                return;
 
+       for (i = 0; i < BNXT_NUM_SW_FUNC_STATS; i++)
+               bnxt_sw_func_stats[i].counter = 0;
+
        for (i = 0; i < bp->cp_nr_rings; i++) {
                struct bnxt_napi *bnapi = bp->bnapi[i];
                struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring;
@@ -288,7 +307,16 @@ static void bnxt_get_ethtool_stats(struct net_device *dev,
                for (k = 0; k < stat_fields; j++, k++)
                        buf[j] = le64_to_cpu(hw_stats[k]);
                buf[j++] = cpr->rx_l4_csum_errors;
+
+               bnxt_sw_func_stats[RX_TOTAL_DISCARDS].counter +=
+                       le64_to_cpu(cpr->hw_stats->rx_discard_pkts);
+               bnxt_sw_func_stats[TX_TOTAL_DISCARDS].counter +=
+                       le64_to_cpu(cpr->hw_stats->tx_discard_pkts);
        }
+
+       for (i = 0; i < BNXT_NUM_SW_FUNC_STATS; i++, j++)
+               buf[j] = bnxt_sw_func_stats[i].counter;
+
        if (bp->flags & BNXT_FLAG_PORT_STATS) {
                __le64 *port_stats = (__le64 *)bp->hw_rx_port_stats;
 
@@ -359,6 +387,11 @@ static void bnxt_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
                        sprintf(buf, "[%d]: rx_l4_csum_errors", i);
                        buf += ETH_GSTRING_LEN;
                }
+               for (i = 0; i < BNXT_NUM_SW_FUNC_STATS; i++) {
+                       strcpy(buf, bnxt_sw_func_stats[i].string);
+                       buf += ETH_GSTRING_LEN;
+               }
+
                if (bp->flags & BNXT_FLAG_PORT_STATS) {
                        for (i = 0; i < BNXT_NUM_PORT_STATS; i++) {
                                strcpy(buf, bnxt_port_stats_arr[i].string);
@@ -551,6 +584,8 @@ static int bnxt_set_channels(struct net_device *dev,
                         * to renable
                         */
                }
+       } else {
+               rc = bnxt_reserve_rings(bp);
        }
 
        return rc;
@@ -1785,6 +1820,11 @@ static int nvm_get_dir_info(struct net_device *dev, u32 *entries, u32 *length)
 
 static int bnxt_get_eeprom_len(struct net_device *dev)
 {
+       struct bnxt *bp = netdev_priv(dev);
+
+       if (BNXT_VF(bp))
+               return 0;
+
        /* The -1 return value allows the entire 32-bit range of offsets to be
         * passed via the ethtool command-line utility.
         */
@@ -2144,9 +2184,8 @@ static int bnxt_read_sfp_module_eeprom_info(struct bnxt *bp, u16 i2c_addr,
 static int bnxt_get_module_info(struct net_device *dev,
                                struct ethtool_modinfo *modinfo)
 {
+       u8 data[SFF_DIAG_SUPPORT_OFFSET + 1];
        struct bnxt *bp = netdev_priv(dev);
-       struct hwrm_port_phy_i2c_read_input req = {0};
-       struct hwrm_port_phy_i2c_read_output *output = bp->hwrm_cmd_resp_addr;
        int rc;
 
        /* No point in going further if phy status indicates
@@ -2161,21 +2200,19 @@ static int bnxt_get_module_info(struct net_device *dev,
        if (bp->hwrm_spec_code < 0x10202)
                return -EOPNOTSUPP;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_PORT_PHY_I2C_READ, -1, -1);
-       req.i2c_slave_addr = I2C_DEV_ADDR_A0;
-       req.page_number = 0;
-       req.page_offset = cpu_to_le16(SFP_EEPROM_SFF_8472_COMP_ADDR);
-       req.data_length = SFP_EEPROM_SFF_8472_COMP_SIZE;
-       req.port_id = cpu_to_le16(bp->pf.port_id);
-       mutex_lock(&bp->hwrm_cmd_lock);
-       rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       rc = bnxt_read_sfp_module_eeprom_info(bp, I2C_DEV_ADDR_A0, 0, 0,
+                                             SFF_DIAG_SUPPORT_OFFSET + 1,
+                                             data);
        if (!rc) {
-               u32 module_id = le32_to_cpu(output->data[0]);
+               u8 module_id = data[0];
+               u8 diag_supported = data[SFF_DIAG_SUPPORT_OFFSET];
 
                switch (module_id) {
                case SFF_MODULE_ID_SFP:
                        modinfo->type = ETH_MODULE_SFF_8472;
                        modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN;
+                       if (!diag_supported)
+                               modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN;
                        break;
                case SFF_MODULE_ID_QSFP:
                case SFF_MODULE_ID_QSFP_PLUS:
@@ -2191,7 +2228,6 @@ static int bnxt_get_module_info(struct net_device *dev,
                        break;
                }
        }
-       mutex_unlock(&bp->hwrm_cmd_lock);
        return rc;
 }
 
index f952963d594e3ffc04a885abd330c6488f58c85b..a64910892c25e9526c313fa86a7dec0e1b05bf6f 100644 (file)
@@ -462,13 +462,13 @@ static int bnxt_hwrm_func_vf_resc_cfg(struct bnxt *bp, int num_vfs)
        vf_vnics = hw_resc->max_vnics - bp->nr_vnics;
        vf_vnics = min_t(u16, vf_vnics, vf_rx_rings);
 
-       req.min_rsscos_ctx = cpu_to_le16(1);
-       req.max_rsscos_ctx = cpu_to_le16(1);
+       req.min_rsscos_ctx = cpu_to_le16(BNXT_VF_MIN_RSS_CTX);
+       req.max_rsscos_ctx = cpu_to_le16(BNXT_VF_MAX_RSS_CTX);
        if (pf->vf_resv_strategy == BNXT_VF_RESV_STRATEGY_MINIMAL) {
                req.min_cmpl_rings = cpu_to_le16(1);
                req.min_tx_rings = cpu_to_le16(1);
                req.min_rx_rings = cpu_to_le16(1);
-               req.min_l2_ctxs = cpu_to_le16(1);
+               req.min_l2_ctxs = cpu_to_le16(BNXT_VF_MIN_L2_CTX);
                req.min_vnics = cpu_to_le16(1);
                req.min_stat_ctx = cpu_to_le16(1);
                req.min_hw_ring_grps = cpu_to_le16(1);
@@ -483,7 +483,7 @@ static int bnxt_hwrm_func_vf_resc_cfg(struct bnxt *bp, int num_vfs)
                req.min_cmpl_rings = cpu_to_le16(vf_cp_rings);
                req.min_tx_rings = cpu_to_le16(vf_tx_rings);
                req.min_rx_rings = cpu_to_le16(vf_rx_rings);
-               req.min_l2_ctxs = cpu_to_le16(4);
+               req.min_l2_ctxs = cpu_to_le16(BNXT_VF_MAX_L2_CTX);
                req.min_vnics = cpu_to_le16(vf_vnics);
                req.min_stat_ctx = cpu_to_le16(vf_stat_ctx);
                req.min_hw_ring_grps = cpu_to_le16(vf_ring_grps);
@@ -491,7 +491,7 @@ static int bnxt_hwrm_func_vf_resc_cfg(struct bnxt *bp, int num_vfs)
        req.max_cmpl_rings = cpu_to_le16(vf_cp_rings);
        req.max_tx_rings = cpu_to_le16(vf_tx_rings);
        req.max_rx_rings = cpu_to_le16(vf_rx_rings);
-       req.max_l2_ctxs = cpu_to_le16(4);
+       req.max_l2_ctxs = cpu_to_le16(BNXT_VF_MAX_L2_CTX);
        req.max_vnics = cpu_to_le16(vf_vnics);
        req.max_stat_ctx = cpu_to_le16(vf_stat_ctx);
        req.max_hw_ring_grps = cpu_to_le16(vf_ring_grps);
@@ -809,6 +809,9 @@ static int bnxt_hwrm_fwd_resp(struct bnxt *bp, struct bnxt_vf_info *vf,
        struct hwrm_fwd_resp_input req = {0};
        struct hwrm_fwd_resp_output *resp = bp->hwrm_cmd_resp_addr;
 
+       if (BNXT_FWD_RESP_SIZE_ERR(msg_size))
+               return -EINVAL;
+
        bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FWD_RESP, -1, -1);
 
        /* Set the new target id */
@@ -845,6 +848,9 @@ static int bnxt_hwrm_fwd_err_resp(struct bnxt *bp, struct bnxt_vf_info *vf,
        struct hwrm_reject_fwd_resp_input req = {0};
        struct hwrm_reject_fwd_resp_output *resp = bp->hwrm_cmd_resp_addr;
 
+       if (BNXT_REJ_FWD_RESP_SIZE_ERR(msg_size))
+               return -EINVAL;
+
        bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_REJECT_FWD_RESP, -1, -1);
        /* Set the new target id */
        req.target_id = cpu_to_le16(vf->fw_fid);
@@ -877,6 +883,9 @@ static int bnxt_hwrm_exec_fwd_resp(struct bnxt *bp, struct bnxt_vf_info *vf,
        struct hwrm_exec_fwd_resp_input req = {0};
        struct hwrm_exec_fwd_resp_output *resp = bp->hwrm_cmd_resp_addr;
 
+       if (BNXT_EXEC_FWD_RESP_SIZE_ERR(msg_size))
+               return -EINVAL;
+
        bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_EXEC_FWD_RESP, -1, -1);
        /* Set the new target id */
        req.target_id = cpu_to_le16(vf->fw_fid);
@@ -914,7 +923,8 @@ static int bnxt_vf_configure_mac(struct bnxt *bp, struct bnxt_vf_info *vf)
        if (req->enables & cpu_to_le32(FUNC_VF_CFG_REQ_ENABLES_DFLT_MAC_ADDR)) {
                if (is_valid_ether_addr(req->dflt_mac_addr) &&
                    ((vf->flags & BNXT_VF_TRUST) ||
-                    (!is_valid_ether_addr(vf->mac_addr)))) {
+                    !is_valid_ether_addr(vf->mac_addr) ||
+                    ether_addr_equal(req->dflt_mac_addr, vf->mac_addr))) {
                        ether_addr_copy(vf->vf_mac_addr, req->dflt_mac_addr);
                        return bnxt_hwrm_exec_fwd_resp(bp, vf, msg_size);
                }
index d10f6f6c7860fda4c5ed73d6fd083656d13e2f40..e9b20cd1988194c265e311ac61ca5ea7b738c118 100644 (file)
 #ifndef BNXT_SRIOV_H
 #define BNXT_SRIOV_H
 
+#define BNXT_FWD_RESP_SIZE_ERR(n)                                      \
+       ((offsetof(struct hwrm_fwd_resp_input, encap_resp) + n) >       \
+        sizeof(struct hwrm_fwd_resp_input))
+
+#define BNXT_EXEC_FWD_RESP_SIZE_ERR(n)                                 \
+       ((offsetof(struct hwrm_exec_fwd_resp_input, encap_request) + n) >\
+        offsetof(struct hwrm_exec_fwd_resp_input, encap_resp_target_id))
+
+#define BNXT_REJ_FWD_RESP_SIZE_ERR(n)                                  \
+       ((offsetof(struct hwrm_reject_fwd_resp_input, encap_request) + n) >\
+        offsetof(struct hwrm_reject_fwd_resp_input, encap_resp_target_id))
+
+#define BNXT_VF_MIN_RSS_CTX    1
+#define BNXT_VF_MAX_RSS_CTX    1
+#define BNXT_VF_MIN_L2_CTX     1
+#define BNXT_VF_MAX_L2_CTX     4
+
 int bnxt_get_vf_config(struct net_device *, int, struct ifla_vf_info *);
 int bnxt_set_vf_mac(struct net_device *, int, u8 *);
 int bnxt_set_vf_vlan(struct net_device *, int, u16, u8, __be16);
index 0445f2c0c6295bbeb9242c3375960b04dab94dbb..20c1681bb1afeea35e23f20242abc0fe34fd1304 100644 (file)
@@ -652,7 +652,7 @@ static void bcmgenet_set_ring_rx_coalesce(struct bcmgenet_rx_ring *ring,
        pkts = ring->rx_max_coalesced_frames;
 
        if (ec->use_adaptive_rx_coalesce && !ring->dim.use_dim) {
-               moder = net_dim_get_def_profile(ring->dim.dim.mode);
+               moder = net_dim_get_def_rx_moderation(ring->dim.dim.mode);
                usecs = moder.usec;
                pkts = moder.pkts;
        }
@@ -1925,7 +1925,7 @@ static void bcmgenet_dim_work(struct work_struct *work)
        struct bcmgenet_rx_ring *ring =
                        container_of(ndim, struct bcmgenet_rx_ring, dim);
        struct net_dim_cq_moder cur_profile =
-                       net_dim_get_profile(dim->mode, dim->profile_ix);
+                       net_dim_get_rx_moderation(dim->mode, dim->profile_ix);
 
        bcmgenet_set_rx_coalesce(ring, cur_profile.usec, cur_profile.pkts);
        dim->state = NET_DIM_START_MEASURE;
@@ -2102,7 +2102,7 @@ static void bcmgenet_init_rx_coalesce(struct bcmgenet_rx_ring *ring)
 
        /* If DIM was enabled, re-apply default parameters */
        if (dim->use_dim) {
-               moder = net_dim_get_def_profile(dim->dim.mode);
+               moder = net_dim_get_def_rx_moderation(dim->dim.mode);
                usecs = moder.usec;
                pkts = moder.pkts;
        }
index 08bbb639be1a5ab3bc10ea27b31df88cb6d2b569..9f59b1270a7c68da3086d22db2930d041cbaaec8 100644 (file)
@@ -8733,14 +8733,15 @@ static void tg3_free_consistent(struct tg3 *tp)
        tg3_mem_rx_release(tp);
        tg3_mem_tx_release(tp);
 
-       /* Protect tg3_get_stats64() from reading freed tp->hw_stats. */
-       tg3_full_lock(tp, 0);
+       /* tp->hw_stats can be referenced safely:
+        *     1. under rtnl_lock
+        *     2. or under tp->lock if TG3_FLAG_INIT_COMPLETE is set.
+        */
        if (tp->hw_stats) {
                dma_free_coherent(&tp->pdev->dev, sizeof(struct tg3_hw_stats),
                                  tp->hw_stats, tp->stats_mapping);
                tp->hw_stats = NULL;
        }
-       tg3_full_unlock(tp);
 }
 
 /*
@@ -14178,7 +14179,7 @@ static void tg3_get_stats64(struct net_device *dev,
        struct tg3 *tp = netdev_priv(dev);
 
        spin_lock_bh(&tp->lock);
-       if (!tp->hw_stats) {
+       if (!tp->hw_stats || !tg3_flag(tp, INIT_COMPLETE)) {
                *stats = tp->net_stats_prev;
                spin_unlock_bh(&tp->lock);
                return;
index b4c9268100bbc7ffd1b294dcfa2adc1902768ec5..3e93df5d4e3b2573f88cc427e7eefc6d1930e3ff 100644 (file)
@@ -591,16 +591,10 @@ static int macb_mii_init(struct macb *bp)
        dev_set_drvdata(&bp->dev->dev, bp->mii_bus);
 
        np = bp->pdev->dev.of_node;
+       if (pdata)
+               bp->mii_bus->phy_mask = pdata->phy_mask;
 
-       if (np) {
-               err = of_mdiobus_register(bp->mii_bus, np);
-       } else {
-               if (pdata)
-                       bp->mii_bus->phy_mask = pdata->phy_mask;
-
-               err = mdiobus_register(bp->mii_bus);
-       }
-
+       err = of_mdiobus_register(bp->mii_bus, np);
        if (err)
                goto err_out_free_mdiobus;
 
index bc9861c90ea3472060e15ded6a1032fffc0a6cd1..929d485a3a2fea6b7f13f389c9e90cda183da737 100644 (file)
@@ -1245,7 +1245,7 @@ static void cn23xx_setup_reg_address(struct octeon_device *oct)
            CN23XX_SLI_MAC_PF_INT_ENB64(oct->pcie_port, oct->pf_num);
 }
 
-static int cn23xx_sriov_config(struct octeon_device *oct)
+int cn23xx_sriov_config(struct octeon_device *oct)
 {
        struct octeon_cn23xx_pf *cn23xx = (struct octeon_cn23xx_pf *)oct->chip;
        u32 max_rings, total_rings, max_vfs, rings_per_vf;
@@ -1269,8 +1269,8 @@ static int cn23xx_sriov_config(struct octeon_device *oct)
                break;
        }
 
-       if (max_rings <= num_present_cpus())
-               num_pf_rings = 1;
+       if (oct->sriov_info.num_pf_rings)
+               num_pf_rings = oct->sriov_info.num_pf_rings;
        else
                num_pf_rings = num_present_cpus();
 
index 63b3de4f2bfedd4e91f97ddda41a0a217631391a..e6f31d0d5c0b66e99675109b073f301f06fdfaaa 100644 (file)
@@ -61,6 +61,8 @@ u32 cn23xx_pf_get_oq_ticks(struct octeon_device *oct, u32 time_intr_in_us);
 
 void cn23xx_dump_pf_initialized_regs(struct octeon_device *oct);
 
+int cn23xx_sriov_config(struct octeon_device *oct);
+
 int cn23xx_fw_loaded(struct octeon_device *oct);
 
 void cn23xx_tell_vf_its_macaddr_changed(struct octeon_device *oct, int vfidx,
index 2a94eee943b255637698efd12190dc74a1a1a47b..8093c5eafea23f215e68792c725fb05eeafa87e4 100644 (file)
 /* OOM task polling interval */
 #define LIO_OOM_POLL_INTERVAL_MS 250
 
+#define OCTNIC_MAX_SG  MAX_SKB_FRAGS
+
+/**
+ * \brief Callback for getting interface configuration
+ * @param status status of request
+ * @param buf pointer to resp structure
+ */
+void lio_if_cfg_callback(struct octeon_device *oct,
+                        u32 status __attribute__((unused)), void *buf)
+{
+       struct octeon_soft_command *sc = (struct octeon_soft_command *)buf;
+       struct liquidio_if_cfg_context *ctx;
+       struct liquidio_if_cfg_resp *resp;
+
+       resp = (struct liquidio_if_cfg_resp *)sc->virtrptr;
+       ctx = (struct liquidio_if_cfg_context *)sc->ctxptr;
+
+       oct = lio_get_device(ctx->octeon_id);
+       if (resp->status)
+               dev_err(&oct->pci_dev->dev, "nic if cfg instruction failed. Status: %llx\n",
+                       CVM_CAST64(resp->status));
+       WRITE_ONCE(ctx->cond, 1);
+
+       snprintf(oct->fw_info.liquidio_firmware_version, 32, "%s",
+                resp->cfg_info.liquidio_firmware_version);
+
+       /* This barrier is required to be sure that the response has been
+        * written fully before waking up the handler
+        */
+       wmb();
+
+       wake_up_interruptible(&ctx->wc);
+}
+
+/**
+ * \brief Delete gather lists
+ * @param lio per-network private data
+ */
+void lio_delete_glists(struct lio *lio)
+{
+       struct octnic_gather *g;
+       int i;
+
+       kfree(lio->glist_lock);
+       lio->glist_lock = NULL;
+
+       if (!lio->glist)
+               return;
+
+       for (i = 0; i < lio->oct_dev->num_iqs; i++) {
+               do {
+                       g = (struct octnic_gather *)
+                           lio_list_delete_head(&lio->glist[i]);
+                       kfree(g);
+               } while (g);
+
+               if (lio->glists_virt_base && lio->glists_virt_base[i] &&
+                   lio->glists_dma_base && lio->glists_dma_base[i]) {
+                       lio_dma_free(lio->oct_dev,
+                                    lio->glist_entry_size * lio->tx_qsize,
+                                    lio->glists_virt_base[i],
+                                    lio->glists_dma_base[i]);
+               }
+       }
+
+       kfree(lio->glists_virt_base);
+       lio->glists_virt_base = NULL;
+
+       kfree(lio->glists_dma_base);
+       lio->glists_dma_base = NULL;
+
+       kfree(lio->glist);
+       lio->glist = NULL;
+}
+
+/**
+ * \brief Setup gather lists
+ * @param lio per-network private data
+ */
+int lio_setup_glists(struct octeon_device *oct, struct lio *lio, int num_iqs)
+{
+       struct octnic_gather *g;
+       int i, j;
+
+       lio->glist_lock =
+           kcalloc(num_iqs, sizeof(*lio->glist_lock), GFP_KERNEL);
+       if (!lio->glist_lock)
+               return -ENOMEM;
+
+       lio->glist =
+           kcalloc(num_iqs, sizeof(*lio->glist), GFP_KERNEL);
+       if (!lio->glist) {
+               kfree(lio->glist_lock);
+               lio->glist_lock = NULL;
+               return -ENOMEM;
+       }
+
+       lio->glist_entry_size =
+               ROUNDUP8((ROUNDUP4(OCTNIC_MAX_SG) >> 2) * OCT_SG_ENTRY_SIZE);
+
+       /* allocate memory to store virtual and dma base address of
+        * per glist consistent memory
+        */
+       lio->glists_virt_base = kcalloc(num_iqs, sizeof(*lio->glists_virt_base),
+                                       GFP_KERNEL);
+       lio->glists_dma_base = kcalloc(num_iqs, sizeof(*lio->glists_dma_base),
+                                      GFP_KERNEL);
+
+       if (!lio->glists_virt_base || !lio->glists_dma_base) {
+               lio_delete_glists(lio);
+               return -ENOMEM;
+       }
+
+       for (i = 0; i < num_iqs; i++) {
+               int numa_node = dev_to_node(&oct->pci_dev->dev);
+
+               spin_lock_init(&lio->glist_lock[i]);
+
+               INIT_LIST_HEAD(&lio->glist[i]);
+
+               lio->glists_virt_base[i] =
+                       lio_dma_alloc(oct,
+                                     lio->glist_entry_size * lio->tx_qsize,
+                                     &lio->glists_dma_base[i]);
+
+               if (!lio->glists_virt_base[i]) {
+                       lio_delete_glists(lio);
+                       return -ENOMEM;
+               }
+
+               for (j = 0; j < lio->tx_qsize; j++) {
+                       g = kzalloc_node(sizeof(*g), GFP_KERNEL,
+                                        numa_node);
+                       if (!g)
+                               g = kzalloc(sizeof(*g), GFP_KERNEL);
+                       if (!g)
+                               break;
+
+                       g->sg = lio->glists_virt_base[i] +
+                               (j * lio->glist_entry_size);
+
+                       g->sg_dma_ptr = lio->glists_dma_base[i] +
+                                       (j * lio->glist_entry_size);
+
+                       list_add_tail(&g->list, &lio->glist[i]);
+               }
+
+               if (j != lio->tx_qsize) {
+                       lio_delete_glists(lio);
+                       return -ENOMEM;
+               }
+       }
+
+       return 0;
+}
+
 int liquidio_set_feature(struct net_device *netdev, int cmd, u16 param1)
 {
        struct lio *lio = GET_LIO(netdev);
@@ -880,8 +1036,8 @@ int octeon_setup_interrupt(struct octeon_device *oct, u32 num_ioqs)
        int num_ioq_vectors;
        int irqret, err;
 
-       oct->num_msix_irqs = num_ioqs;
        if (oct->msix_on) {
+               oct->num_msix_irqs = num_ioqs;
                if (OCTEON_CN23XX_PF(oct)) {
                        num_interrupts = MAX_IOQ_INTERRUPTS_PER_PF + 1;
 
@@ -1169,3 +1325,355 @@ int lio_wait_for_clean_oq(struct octeon_device *oct)
 
        return pending_pkts;
 }
+
+static void
+octnet_nic_stats_callback(struct octeon_device *oct_dev,
+                         u32 status, void *ptr)
+{
+       struct octeon_soft_command *sc = (struct octeon_soft_command *)ptr;
+       struct oct_nic_stats_resp *resp =
+           (struct oct_nic_stats_resp *)sc->virtrptr;
+       struct oct_nic_stats_ctrl *ctrl =
+           (struct oct_nic_stats_ctrl *)sc->ctxptr;
+       struct nic_rx_stats *rsp_rstats = &resp->stats.fromwire;
+       struct nic_tx_stats *rsp_tstats = &resp->stats.fromhost;
+       struct nic_rx_stats *rstats = &oct_dev->link_stats.fromwire;
+       struct nic_tx_stats *tstats = &oct_dev->link_stats.fromhost;
+
+       if (status != OCTEON_REQUEST_TIMEOUT && !resp->status) {
+               octeon_swap_8B_data((u64 *)&resp->stats,
+                                   (sizeof(struct oct_link_stats)) >> 3);
+
+               /* RX link-level stats */
+               rstats->total_rcvd = rsp_rstats->total_rcvd;
+               rstats->bytes_rcvd = rsp_rstats->bytes_rcvd;
+               rstats->total_bcst = rsp_rstats->total_bcst;
+               rstats->total_mcst = rsp_rstats->total_mcst;
+               rstats->runts      = rsp_rstats->runts;
+               rstats->ctl_rcvd   = rsp_rstats->ctl_rcvd;
+               /* Accounts for over/under-run of buffers */
+               rstats->fifo_err  = rsp_rstats->fifo_err;
+               rstats->dmac_drop = rsp_rstats->dmac_drop;
+               rstats->fcs_err   = rsp_rstats->fcs_err;
+               rstats->jabber_err = rsp_rstats->jabber_err;
+               rstats->l2_err    = rsp_rstats->l2_err;
+               rstats->frame_err = rsp_rstats->frame_err;
+               rstats->red_drops = rsp_rstats->red_drops;
+
+               /* RX firmware stats */
+               rstats->fw_total_rcvd = rsp_rstats->fw_total_rcvd;
+               rstats->fw_total_fwd = rsp_rstats->fw_total_fwd;
+               rstats->fw_total_mcast = rsp_rstats->fw_total_mcast;
+               rstats->fw_total_bcast = rsp_rstats->fw_total_bcast;
+               rstats->fw_err_pko = rsp_rstats->fw_err_pko;
+               rstats->fw_err_link = rsp_rstats->fw_err_link;
+               rstats->fw_err_drop = rsp_rstats->fw_err_drop;
+               rstats->fw_rx_vxlan = rsp_rstats->fw_rx_vxlan;
+               rstats->fw_rx_vxlan_err = rsp_rstats->fw_rx_vxlan_err;
+
+               /* Number of packets that are LROed      */
+               rstats->fw_lro_pkts = rsp_rstats->fw_lro_pkts;
+               /* Number of octets that are LROed       */
+               rstats->fw_lro_octs = rsp_rstats->fw_lro_octs;
+               /* Number of LRO packets formed          */
+               rstats->fw_total_lro = rsp_rstats->fw_total_lro;
+               /* Number of times lRO of packet aborted */
+               rstats->fw_lro_aborts = rsp_rstats->fw_lro_aborts;
+               rstats->fw_lro_aborts_port = rsp_rstats->fw_lro_aborts_port;
+               rstats->fw_lro_aborts_seq = rsp_rstats->fw_lro_aborts_seq;
+               rstats->fw_lro_aborts_tsval = rsp_rstats->fw_lro_aborts_tsval;
+               rstats->fw_lro_aborts_timer = rsp_rstats->fw_lro_aborts_timer;
+               /* intrmod: packet forward rate */
+               rstats->fwd_rate = rsp_rstats->fwd_rate;
+
+               /* TX link-level stats */
+               tstats->total_pkts_sent = rsp_tstats->total_pkts_sent;
+               tstats->total_bytes_sent = rsp_tstats->total_bytes_sent;
+               tstats->mcast_pkts_sent = rsp_tstats->mcast_pkts_sent;
+               tstats->bcast_pkts_sent = rsp_tstats->bcast_pkts_sent;
+               tstats->ctl_sent = rsp_tstats->ctl_sent;
+               /* Packets sent after one collision*/
+               tstats->one_collision_sent = rsp_tstats->one_collision_sent;
+               /* Packets sent after multiple collision*/
+               tstats->multi_collision_sent = rsp_tstats->multi_collision_sent;
+               /* Packets not sent due to max collisions */
+               tstats->max_collision_fail = rsp_tstats->max_collision_fail;
+               /* Packets not sent due to max deferrals */
+               tstats->max_deferral_fail = rsp_tstats->max_deferral_fail;
+               /* Accounts for over/under-run of buffers */
+               tstats->fifo_err = rsp_tstats->fifo_err;
+               tstats->runts = rsp_tstats->runts;
+               /* Total number of collisions detected */
+               tstats->total_collisions = rsp_tstats->total_collisions;
+
+               /* firmware stats */
+               tstats->fw_total_sent = rsp_tstats->fw_total_sent;
+               tstats->fw_total_fwd = rsp_tstats->fw_total_fwd;
+               tstats->fw_total_mcast_sent = rsp_tstats->fw_total_mcast_sent;
+               tstats->fw_total_bcast_sent = rsp_tstats->fw_total_bcast_sent;
+               tstats->fw_err_pko = rsp_tstats->fw_err_pko;
+               tstats->fw_err_pki = rsp_tstats->fw_err_pki;
+               tstats->fw_err_link = rsp_tstats->fw_err_link;
+               tstats->fw_err_drop = rsp_tstats->fw_err_drop;
+               tstats->fw_tso = rsp_tstats->fw_tso;
+               tstats->fw_tso_fwd = rsp_tstats->fw_tso_fwd;
+               tstats->fw_err_tso = rsp_tstats->fw_err_tso;
+               tstats->fw_tx_vxlan = rsp_tstats->fw_tx_vxlan;
+
+               resp->status = 1;
+       } else {
+               resp->status = -1;
+       }
+       complete(&ctrl->complete);
+}
+
+int octnet_get_link_stats(struct net_device *netdev)
+{
+       struct lio *lio = GET_LIO(netdev);
+       struct octeon_device *oct_dev = lio->oct_dev;
+       struct octeon_soft_command *sc;
+       struct oct_nic_stats_ctrl *ctrl;
+       struct oct_nic_stats_resp *resp;
+       int retval;
+
+       /* Alloc soft command */
+       sc = (struct octeon_soft_command *)
+               octeon_alloc_soft_command(oct_dev,
+                                         0,
+                                         sizeof(struct oct_nic_stats_resp),
+                                         sizeof(struct octnic_ctrl_pkt));
+
+       if (!sc)
+               return -ENOMEM;
+
+       resp = (struct oct_nic_stats_resp *)sc->virtrptr;
+       memset(resp, 0, sizeof(struct oct_nic_stats_resp));
+
+       ctrl = (struct oct_nic_stats_ctrl *)sc->ctxptr;
+       memset(ctrl, 0, sizeof(struct oct_nic_stats_ctrl));
+       ctrl->netdev = netdev;
+       init_completion(&ctrl->complete);
+
+       sc->iq_no = lio->linfo.txpciq[0].s.q_no;
+
+       octeon_prepare_soft_command(oct_dev, sc, OPCODE_NIC,
+                                   OPCODE_NIC_PORT_STATS, 0, 0, 0);
+
+       sc->callback = octnet_nic_stats_callback;
+       sc->callback_arg = sc;
+       sc->wait_time = 500;    /*in milli seconds*/
+
+       retval = octeon_send_soft_command(oct_dev, sc);
+       if (retval == IQ_SEND_FAILED) {
+               octeon_free_soft_command(oct_dev, sc);
+               return -EINVAL;
+       }
+
+       wait_for_completion_timeout(&ctrl->complete, msecs_to_jiffies(1000));
+
+       if (resp->status != 1) {
+               octeon_free_soft_command(oct_dev, sc);
+
+               return -EINVAL;
+       }
+
+       octeon_free_soft_command(oct_dev, sc);
+
+       return 0;
+}
+
+static void liquidio_nic_seapi_ctl_callback(struct octeon_device *oct,
+                                           u32 status,
+                                           void *buf)
+{
+       struct liquidio_nic_seapi_ctl_context *ctx;
+       struct octeon_soft_command *sc = buf;
+
+       ctx = sc->ctxptr;
+
+       oct = lio_get_device(ctx->octeon_id);
+       if (status) {
+               dev_err(&oct->pci_dev->dev, "%s: instruction failed. Status: %llx\n",
+                       __func__,
+                       CVM_CAST64(status));
+       }
+       ctx->status = status;
+       complete(&ctx->complete);
+}
+
+int liquidio_set_speed(struct lio *lio, int speed)
+{
+       struct liquidio_nic_seapi_ctl_context *ctx;
+       struct octeon_device *oct = lio->oct_dev;
+       struct oct_nic_seapi_resp *resp;
+       struct octeon_soft_command *sc;
+       union octnet_cmd *ncmd;
+       u32 ctx_size;
+       int retval;
+       u32 var;
+
+       if (oct->speed_setting == speed)
+               return 0;
+
+       if (!OCTEON_CN23XX_PF(oct)) {
+               dev_err(&oct->pci_dev->dev, "%s: SET SPEED only for PF\n",
+                       __func__);
+               return -EOPNOTSUPP;
+       }
+
+       ctx_size = sizeof(struct liquidio_nic_seapi_ctl_context);
+       sc = octeon_alloc_soft_command(oct, OCTNET_CMD_SIZE,
+                                      sizeof(struct oct_nic_seapi_resp),
+                                      ctx_size);
+       if (!sc)
+               return -ENOMEM;
+
+       ncmd = sc->virtdptr;
+       ctx  = sc->ctxptr;
+       resp = sc->virtrptr;
+       memset(resp, 0, sizeof(struct oct_nic_seapi_resp));
+
+       ctx->octeon_id = lio_get_device_id(oct);
+       ctx->status = 0;
+       init_completion(&ctx->complete);
+
+       ncmd->u64 = 0;
+       ncmd->s.cmd = SEAPI_CMD_SPEED_SET;
+       ncmd->s.param1 = speed;
+
+       octeon_swap_8B_data((u64 *)ncmd, (OCTNET_CMD_SIZE >> 3));
+
+       sc->iq_no = lio->linfo.txpciq[0].s.q_no;
+
+       octeon_prepare_soft_command(oct, sc, OPCODE_NIC,
+                                   OPCODE_NIC_UBOOT_CTL, 0, 0, 0);
+
+       sc->callback = liquidio_nic_seapi_ctl_callback;
+       sc->callback_arg = sc;
+       sc->wait_time = 5000;
+
+       retval = octeon_send_soft_command(oct, sc);
+       if (retval == IQ_SEND_FAILED) {
+               dev_info(&oct->pci_dev->dev, "Failed to send soft command\n");
+               retval = -EBUSY;
+       } else {
+               /* Wait for response or timeout */
+               if (wait_for_completion_timeout(&ctx->complete,
+                                               msecs_to_jiffies(10000)) == 0) {
+                       dev_err(&oct->pci_dev->dev, "%s: sc timeout\n",
+                               __func__);
+                       octeon_free_soft_command(oct, sc);
+                       return -EINTR;
+               }
+
+               retval = resp->status;
+
+               if (retval) {
+                       dev_err(&oct->pci_dev->dev, "%s failed, retval=%d\n",
+                               __func__, retval);
+                       octeon_free_soft_command(oct, sc);
+                       return -EIO;
+               }
+
+               var = be32_to_cpu((__force __be32)resp->speed);
+               if (var != speed) {
+                       dev_err(&oct->pci_dev->dev,
+                               "%s: setting failed speed= %x, expect %x\n",
+                               __func__, var, speed);
+               }
+
+               oct->speed_setting = var;
+       }
+
+       octeon_free_soft_command(oct, sc);
+
+       return retval;
+}
+
+int liquidio_get_speed(struct lio *lio)
+{
+       struct liquidio_nic_seapi_ctl_context *ctx;
+       struct octeon_device *oct = lio->oct_dev;
+       struct oct_nic_seapi_resp *resp;
+       struct octeon_soft_command *sc;
+       union octnet_cmd *ncmd;
+       u32 ctx_size;
+       int retval;
+
+       ctx_size = sizeof(struct liquidio_nic_seapi_ctl_context);
+       sc = octeon_alloc_soft_command(oct, OCTNET_CMD_SIZE,
+                                      sizeof(struct oct_nic_seapi_resp),
+                                      ctx_size);
+       if (!sc)
+               return -ENOMEM;
+
+       ncmd = sc->virtdptr;
+       ctx  = sc->ctxptr;
+       resp = sc->virtrptr;
+       memset(resp, 0, sizeof(struct oct_nic_seapi_resp));
+
+       ctx->octeon_id = lio_get_device_id(oct);
+       ctx->status = 0;
+       init_completion(&ctx->complete);
+
+       ncmd->u64 = 0;
+       ncmd->s.cmd = SEAPI_CMD_SPEED_GET;
+
+       octeon_swap_8B_data((u64 *)ncmd, (OCTNET_CMD_SIZE >> 3));
+
+       sc->iq_no = lio->linfo.txpciq[0].s.q_no;
+
+       octeon_prepare_soft_command(oct, sc, OPCODE_NIC,
+                                   OPCODE_NIC_UBOOT_CTL, 0, 0, 0);
+
+       sc->callback = liquidio_nic_seapi_ctl_callback;
+       sc->callback_arg = sc;
+       sc->wait_time = 5000;
+
+       retval = octeon_send_soft_command(oct, sc);
+       if (retval == IQ_SEND_FAILED) {
+               dev_info(&oct->pci_dev->dev, "Failed to send soft command\n");
+               oct->no_speed_setting = 1;
+               oct->speed_setting = 25;
+
+               retval = -EBUSY;
+       } else {
+               if (wait_for_completion_timeout(&ctx->complete,
+                                               msecs_to_jiffies(10000)) == 0) {
+                       dev_err(&oct->pci_dev->dev, "%s: sc timeout\n",
+                               __func__);
+
+                       oct->speed_setting = 25;
+                       oct->no_speed_setting = 1;
+
+                       octeon_free_soft_command(oct, sc);
+
+                       return -EINTR;
+               }
+               retval = resp->status;
+               if (retval) {
+                       dev_err(&oct->pci_dev->dev,
+                               "%s failed retval=%d\n", __func__, retval);
+                       oct->no_speed_setting = 1;
+                       oct->speed_setting = 25;
+                       octeon_free_soft_command(oct, sc);
+                       retval = -EIO;
+               } else {
+                       u32 var;
+
+                       var = be32_to_cpu((__force __be32)resp->speed);
+                       oct->speed_setting = var;
+                       if (var == 0xffff) {
+                               oct->no_speed_setting = 1;
+                               /* unable to access boot variables
+                                * get the default value based on the NIC type
+                                */
+                               oct->speed_setting = 25;
+                       }
+               }
+       }
+
+       octeon_free_soft_command(oct, sc);
+
+       return retval;
+}
index 9926a12dd80512fa0d3b66d8b28f8473444eb9f5..06f7449c569d5cdc03051b1f8e5617643a910406 100644 (file)
@@ -32,7 +32,6 @@
 #include "cn23xx_vf_device.h"
 
 static int lio_reset_queues(struct net_device *netdev, uint32_t num_qs);
-static int octnet_get_link_stats(struct net_device *netdev);
 
 struct oct_intrmod_context {
        int octeon_id;
@@ -113,6 +112,9 @@ static const char oct_stats_strings[][ETH_GSTRING_LEN] = {
        "tx_tso_err",
        "tx_vxlan",
 
+       "tx_mcast",
+       "tx_bcast",
+
        "mac_tx_total_pkts",
        "mac_tx_total_bytes",
        "mac_tx_mcast_pkts",
@@ -120,7 +122,7 @@ static const char oct_stats_strings[][ETH_GSTRING_LEN] = {
        "mac_tx_ctl_packets",
        "mac_tx_total_collisions",
        "mac_tx_one_collision",
-       "mac_tx_multi_collison",
+       "mac_tx_multi_collision",
        "mac_tx_max_collision_fail",
        "mac_tx_max_deferal_fail",
        "mac_tx_fifo_err",
@@ -128,6 +130,8 @@ static const char oct_stats_strings[][ETH_GSTRING_LEN] = {
 
        "rx_total_rcvd",
        "rx_total_fwd",
+       "rx_mcast",
+       "rx_bcast",
        "rx_jabber_err",
        "rx_l2_err",
        "rx_frame_err",
@@ -172,6 +176,10 @@ static const char oct_vf_stats_strings[][ETH_GSTRING_LEN] = {
        "tx_errors",
        "rx_dropped",
        "tx_dropped",
+       "rx_mcast",
+       "tx_mcast",
+       "rx_bcast",
+       "tx_bcast",
        "link_state_changes",
 };
 
@@ -222,46 +230,147 @@ static int lio_get_link_ksettings(struct net_device *netdev,
        struct lio *lio = GET_LIO(netdev);
        struct octeon_device *oct = lio->oct_dev;
        struct oct_link_info *linfo;
-       u32 supported = 0, advertising = 0;
 
        linfo = &lio->linfo;
 
+       ethtool_link_ksettings_zero_link_mode(ecmd, supported);
+       ethtool_link_ksettings_zero_link_mode(ecmd, advertising);
+
        switch (linfo->link.s.phy_type) {
        case LIO_PHY_PORT_TP:
                ecmd->base.port = PORT_TP;
-               supported = (SUPPORTED_10000baseT_Full |
-                            SUPPORTED_TP | SUPPORTED_Pause);
-               advertising = (ADVERTISED_10000baseT_Full | ADVERTISED_Pause);
                ecmd->base.autoneg = AUTONEG_DISABLE;
+               ethtool_link_ksettings_add_link_mode(ecmd, supported, TP);
+               ethtool_link_ksettings_add_link_mode(ecmd, supported, Pause);
+               ethtool_link_ksettings_add_link_mode(ecmd, supported,
+                                                    10000baseT_Full);
+
+               ethtool_link_ksettings_add_link_mode(ecmd, advertising, Pause);
+               ethtool_link_ksettings_add_link_mode(ecmd, advertising,
+                                                    10000baseT_Full);
+
                break;
 
        case LIO_PHY_PORT_FIBRE:
-               ecmd->base.port = PORT_FIBRE;
-
-               if (linfo->link.s.speed == SPEED_10000) {
-                       supported = SUPPORTED_10000baseT_Full;
-                       advertising = ADVERTISED_10000baseT_Full;
+               if (linfo->link.s.if_mode == INTERFACE_MODE_XAUI ||
+                   linfo->link.s.if_mode == INTERFACE_MODE_RXAUI ||
+                   linfo->link.s.if_mode == INTERFACE_MODE_XLAUI ||
+                   linfo->link.s.if_mode == INTERFACE_MODE_XFI) {
+                       dev_dbg(&oct->pci_dev->dev, "ecmd->base.transceiver is XCVR_EXTERNAL\n");
+               } else {
+                       dev_err(&oct->pci_dev->dev, "Unknown link interface mode: %d\n",
+                               linfo->link.s.if_mode);
                }
 
-               supported |= SUPPORTED_FIBRE | SUPPORTED_Pause;
-               advertising |= ADVERTISED_Pause;
+               ecmd->base.port = PORT_FIBRE;
                ecmd->base.autoneg = AUTONEG_DISABLE;
+               ethtool_link_ksettings_add_link_mode(ecmd, supported, FIBRE);
+
+               ethtool_link_ksettings_add_link_mode(ecmd, supported, Pause);
+               ethtool_link_ksettings_add_link_mode(ecmd, advertising, Pause);
+               if (oct->subsystem_id == OCTEON_CN2350_25GB_SUBSYS_ID ||
+                   oct->subsystem_id == OCTEON_CN2360_25GB_SUBSYS_ID) {
+                       if (OCTEON_CN23XX_PF(oct)) {
+                               ethtool_link_ksettings_add_link_mode
+                                       (ecmd, supported, 25000baseSR_Full);
+                               ethtool_link_ksettings_add_link_mode
+                                       (ecmd, supported, 25000baseKR_Full);
+                               ethtool_link_ksettings_add_link_mode
+                                       (ecmd, supported, 25000baseCR_Full);
+
+                               if (oct->no_speed_setting == 0)  {
+                                       ethtool_link_ksettings_add_link_mode
+                                               (ecmd, supported,
+                                                10000baseSR_Full);
+                                       ethtool_link_ksettings_add_link_mode
+                                               (ecmd, supported,
+                                                10000baseKR_Full);
+                                       ethtool_link_ksettings_add_link_mode
+                                               (ecmd, supported,
+                                                10000baseCR_Full);
+                               }
+
+                               if (oct->no_speed_setting == 0)
+                                       liquidio_get_speed(lio);
+                               else
+                                       oct->speed_setting = 25;
+
+                               if (oct->speed_setting == 10) {
+                                       ethtool_link_ksettings_add_link_mode
+                                               (ecmd, advertising,
+                                                10000baseSR_Full);
+                                       ethtool_link_ksettings_add_link_mode
+                                               (ecmd, advertising,
+                                                10000baseKR_Full);
+                                       ethtool_link_ksettings_add_link_mode
+                                               (ecmd, advertising,
+                                                10000baseCR_Full);
+                               }
+                               if (oct->speed_setting == 25) {
+                                       ethtool_link_ksettings_add_link_mode
+                                               (ecmd, advertising,
+                                                25000baseSR_Full);
+                                       ethtool_link_ksettings_add_link_mode
+                                               (ecmd, advertising,
+                                                25000baseKR_Full);
+                                       ethtool_link_ksettings_add_link_mode
+                                               (ecmd, advertising,
+                                                25000baseCR_Full);
+                               }
+                       } else { /* VF */
+                               if (linfo->link.s.speed == 10000) {
+                                       ethtool_link_ksettings_add_link_mode
+                                               (ecmd, supported,
+                                                10000baseSR_Full);
+                                       ethtool_link_ksettings_add_link_mode
+                                               (ecmd, supported,
+                                                10000baseKR_Full);
+                                       ethtool_link_ksettings_add_link_mode
+                                               (ecmd, supported,
+                                                10000baseCR_Full);
+
+                                       ethtool_link_ksettings_add_link_mode
+                                               (ecmd, advertising,
+                                                10000baseSR_Full);
+                                       ethtool_link_ksettings_add_link_mode
+                                               (ecmd, advertising,
+                                                10000baseKR_Full);
+                                       ethtool_link_ksettings_add_link_mode
+                                               (ecmd, advertising,
+                                                10000baseCR_Full);
+                               }
+
+                               if (linfo->link.s.speed == 25000) {
+                                       ethtool_link_ksettings_add_link_mode
+                                               (ecmd, supported,
+                                                25000baseSR_Full);
+                                       ethtool_link_ksettings_add_link_mode
+                                               (ecmd, supported,
+                                                25000baseKR_Full);
+                                       ethtool_link_ksettings_add_link_mode
+                                               (ecmd, supported,
+                                                25000baseCR_Full);
+
+                                       ethtool_link_ksettings_add_link_mode
+                                               (ecmd, advertising,
+                                                25000baseSR_Full);
+                                       ethtool_link_ksettings_add_link_mode
+                                               (ecmd, advertising,
+                                                25000baseKR_Full);
+                                       ethtool_link_ksettings_add_link_mode
+                                               (ecmd, advertising,
+                                                25000baseCR_Full);
+                               }
+                       }
+               } else {
+                       ethtool_link_ksettings_add_link_mode(ecmd, supported,
+                                                            10000baseT_Full);
+                       ethtool_link_ksettings_add_link_mode(ecmd, advertising,
+                                                            10000baseT_Full);
+               }
                break;
        }
 
-       if (linfo->link.s.if_mode == INTERFACE_MODE_XAUI ||
-           linfo->link.s.if_mode == INTERFACE_MODE_RXAUI ||
-           linfo->link.s.if_mode == INTERFACE_MODE_XLAUI ||
-           linfo->link.s.if_mode == INTERFACE_MODE_XFI) {
-               ethtool_convert_legacy_u32_to_link_mode(
-                       ecmd->link_modes.supported, supported);
-               ethtool_convert_legacy_u32_to_link_mode(
-                       ecmd->link_modes.advertising, advertising);
-       } else {
-               dev_err(&oct->pci_dev->dev, "Unknown link interface reported %d\n",
-                       linfo->link.s.if_mode);
-       }
-
        if (linfo->link.s.link_up) {
                ecmd->base.speed = linfo->link.s.speed;
                ecmd->base.duplex = linfo->link.s.duplex;
@@ -273,6 +382,51 @@ static int lio_get_link_ksettings(struct net_device *netdev,
        return 0;
 }
 
+static int lio_set_link_ksettings(struct net_device *netdev,
+                                 const struct ethtool_link_ksettings *ecmd)
+{
+       const int speed = ecmd->base.speed;
+       struct lio *lio = GET_LIO(netdev);
+       struct oct_link_info *linfo;
+       struct octeon_device *oct;
+       u32 is25G = 0;
+
+       oct = lio->oct_dev;
+
+       linfo = &lio->linfo;
+
+       if (oct->subsystem_id == OCTEON_CN2350_25GB_SUBSYS_ID ||
+           oct->subsystem_id == OCTEON_CN2360_25GB_SUBSYS_ID) {
+               is25G = 1;
+       } else {
+               return -EOPNOTSUPP;
+       }
+
+       if (oct->no_speed_setting) {
+               dev_err(&oct->pci_dev->dev, "%s: Changing speed is not supported\n",
+                       __func__);
+               return -EOPNOTSUPP;
+       }
+
+       if ((ecmd->base.duplex != DUPLEX_UNKNOWN &&
+            ecmd->base.duplex != linfo->link.s.duplex) ||
+            ecmd->base.autoneg != AUTONEG_DISABLE ||
+           (ecmd->base.speed != 10000 && ecmd->base.speed != 25000 &&
+            ecmd->base.speed != SPEED_UNKNOWN))
+               return -EOPNOTSUPP;
+
+       if ((oct->speed_boot == speed / 1000) &&
+           oct->speed_boot == oct->speed_setting)
+               return 0;
+
+       liquidio_set_speed(lio, speed / 1000);
+
+       dev_dbg(&oct->pci_dev->dev, "Port speed is set to %dG\n",
+               oct->speed_setting);
+
+       return 0;
+}
+
 static void
 lio_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo)
 {
@@ -353,7 +507,14 @@ lio_ethtool_get_channels(struct net_device *dev,
                rx_count = CFG_GET_NUM_RXQS_NIC_IF(conf6x, lio->ifidx);
                tx_count = CFG_GET_NUM_TXQS_NIC_IF(conf6x, lio->ifidx);
        } else if (OCTEON_CN23XX_PF(oct)) {
-               max_combined = lio->linfo.num_txpciq;
+               if (oct->sriov_info.sriov_enabled) {
+                       max_combined = lio->linfo.num_txpciq;
+               } else {
+                       struct octeon_config *conf23_pf =
+                               CHIP_CONF(oct, cn23xx_pf);
+
+                       max_combined = CFG_GET_IQ_MAX_Q(conf23_pf);
+               }
                combined_count = oct->num_iqs;
        } else if (OCTEON_CN23XX_VF(oct)) {
                u64 reg_val = 0ULL;
@@ -417,9 +578,15 @@ lio_irq_reallocate_irqs(struct octeon_device *oct, uint32_t num_ioqs)
 
        kfree(oct->irq_name_storage);
        oct->irq_name_storage = NULL;
+
+       if (octeon_allocate_ioq_vector(oct, num_ioqs)) {
+               dev_err(&oct->pci_dev->dev, "OCTEON: ioq vector allocation failed\n");
+               return -1;
+       }
+
        if (octeon_setup_interrupt(oct, num_ioqs)) {
                dev_info(&oct->pci_dev->dev, "Setup interrupt failed\n");
-               return 1;
+               return -1;
        }
 
        /* Enable Octeon device interrupts */
@@ -449,7 +616,16 @@ lio_ethtool_set_channels(struct net_device *dev,
        combined_count = channel->combined_count;
 
        if (OCTEON_CN23XX_PF(oct)) {
-               max_combined = channel->max_combined;
+               if (oct->sriov_info.sriov_enabled) {
+                       max_combined = lio->linfo.num_txpciq;
+               } else {
+                       struct octeon_config *conf23_pf =
+                               CHIP_CONF(oct,
+                                         cn23xx_pf);
+
+                       max_combined =
+                               CFG_GET_IQ_MAX_Q(conf23_pf);
+               }
        } else if (OCTEON_CN23XX_VF(oct)) {
                u64 reg_val = 0ULL;
                u64 ctrl = CN23XX_VF_SLI_IQ_PKT_CONTROL64(0);
@@ -477,7 +653,6 @@ lio_ethtool_set_channels(struct net_device *dev,
        if (lio_reset_queues(dev, combined_count))
                return -EINVAL;
 
-       lio_irq_reallocate_irqs(oct, combined_count);
        if (stopped)
                dev->netdev_ops->ndo_open(dev);
 
@@ -816,12 +991,120 @@ lio_ethtool_get_ringparam(struct net_device *netdev,
        ering->rx_jumbo_max_pending = 0;
 }
 
+static int lio_23xx_reconfigure_queue_count(struct lio *lio)
+{
+       struct octeon_device *oct = lio->oct_dev;
+       struct liquidio_if_cfg_context *ctx;
+       u32 resp_size, ctx_size, data_size;
+       struct liquidio_if_cfg_resp *resp;
+       struct octeon_soft_command *sc;
+       union oct_nic_if_cfg if_cfg;
+       struct lio_version *vdata;
+       u32 ifidx_or_pfnum;
+       int retval;
+       int j;
+
+       resp_size = sizeof(struct liquidio_if_cfg_resp);
+       ctx_size = sizeof(struct liquidio_if_cfg_context);
+       data_size = sizeof(struct lio_version);
+       sc = (struct octeon_soft_command *)
+               octeon_alloc_soft_command(oct, data_size,
+                                         resp_size, ctx_size);
+       if (!sc) {
+               dev_err(&oct->pci_dev->dev, "%s: Failed to allocate soft command\n",
+                       __func__);
+               return -1;
+       }
+
+       resp = (struct liquidio_if_cfg_resp *)sc->virtrptr;
+       ctx  = (struct liquidio_if_cfg_context *)sc->ctxptr;
+       vdata = (struct lio_version *)sc->virtdptr;
+
+       vdata->major = (__force u16)cpu_to_be16(LIQUIDIO_BASE_MAJOR_VERSION);
+       vdata->minor = (__force u16)cpu_to_be16(LIQUIDIO_BASE_MINOR_VERSION);
+       vdata->micro = (__force u16)cpu_to_be16(LIQUIDIO_BASE_MICRO_VERSION);
+
+       ifidx_or_pfnum = oct->pf_num;
+       WRITE_ONCE(ctx->cond, 0);
+       ctx->octeon_id = lio_get_device_id(oct);
+       init_waitqueue_head(&ctx->wc);
+
+       if_cfg.u64 = 0;
+       if_cfg.s.num_iqueues = oct->sriov_info.num_pf_rings;
+       if_cfg.s.num_oqueues = oct->sriov_info.num_pf_rings;
+       if_cfg.s.base_queue = oct->sriov_info.pf_srn;
+       if_cfg.s.gmx_port_id = oct->pf_num;
+
+       sc->iq_no = 0;
+       octeon_prepare_soft_command(oct, sc, OPCODE_NIC,
+                                   OPCODE_NIC_QCOUNT_UPDATE, 0,
+                                   if_cfg.u64, 0);
+       sc->callback = lio_if_cfg_callback;
+       sc->callback_arg = sc;
+       sc->wait_time = LIO_IFCFG_WAIT_TIME;
+
+       retval = octeon_send_soft_command(oct, sc);
+       if (retval == IQ_SEND_FAILED) {
+               dev_err(&oct->pci_dev->dev,
+                       "iq/oq config failed status: %x\n",
+                       retval);
+               goto qcount_update_fail;
+       }
+
+       if (sleep_cond(&ctx->wc, &ctx->cond) == -EINTR) {
+               dev_err(&oct->pci_dev->dev, "Wait interrupted\n");
+               return -1;
+       }
+
+       retval = resp->status;
+       if (retval) {
+               dev_err(&oct->pci_dev->dev, "iq/oq config failed\n");
+               goto qcount_update_fail;
+       }
+
+       octeon_swap_8B_data((u64 *)(&resp->cfg_info),
+                           (sizeof(struct liquidio_if_cfg_info)) >> 3);
+
+       lio->ifidx = ifidx_or_pfnum;
+       lio->linfo.num_rxpciq = hweight64(resp->cfg_info.iqmask);
+       lio->linfo.num_txpciq = hweight64(resp->cfg_info.iqmask);
+       for (j = 0; j < lio->linfo.num_rxpciq; j++) {
+               lio->linfo.rxpciq[j].u64 =
+                       resp->cfg_info.linfo.rxpciq[j].u64;
+       }
+
+       for (j = 0; j < lio->linfo.num_txpciq; j++) {
+               lio->linfo.txpciq[j].u64 =
+                       resp->cfg_info.linfo.txpciq[j].u64;
+       }
+
+       lio->linfo.hw_addr = resp->cfg_info.linfo.hw_addr;
+       lio->linfo.gmxport = resp->cfg_info.linfo.gmxport;
+       lio->linfo.link.u64 = resp->cfg_info.linfo.link.u64;
+       lio->txq = lio->linfo.txpciq[0].s.q_no;
+       lio->rxq = lio->linfo.rxpciq[0].s.q_no;
+
+       octeon_free_soft_command(oct, sc);
+       dev_info(&oct->pci_dev->dev, "Queue count updated to %d\n",
+                lio->linfo.num_rxpciq);
+
+       return 0;
+
+qcount_update_fail:
+       octeon_free_soft_command(oct, sc);
+
+       return -1;
+}
+
 static int lio_reset_queues(struct net_device *netdev, uint32_t num_qs)
 {
        struct lio *lio = GET_LIO(netdev);
        struct octeon_device *oct = lio->oct_dev;
+       int i, queue_count_update = 0;
        struct napi_struct *napi, *n;
-       int i, update = 0;
+       int ret;
+
+       schedule_timeout_uninterruptible(msecs_to_jiffies(100));
 
        if (wait_for_pending_requests(oct))
                dev_err(&oct->pci_dev->dev, "There were pending requests\n");
@@ -830,7 +1113,7 @@ static int lio_reset_queues(struct net_device *netdev, uint32_t num_qs)
                dev_err(&oct->pci_dev->dev, "IQ had pending instructions\n");
 
        if (octeon_set_io_queues_off(oct)) {
-               dev_err(&oct->pci_dev->dev, "setting io queues off failed\n");
+               dev_err(&oct->pci_dev->dev, "Setting io queues off failed\n");
                return -1;
        }
 
@@ -843,9 +1126,40 @@ static int lio_reset_queues(struct net_device *netdev, uint32_t num_qs)
                netif_napi_del(napi);
 
        if (num_qs != oct->num_iqs) {
-               netif_set_real_num_rx_queues(netdev, num_qs);
-               netif_set_real_num_tx_queues(netdev, num_qs);
-               update = 1;
+               ret = netif_set_real_num_rx_queues(netdev, num_qs);
+               if (ret) {
+                       dev_err(&oct->pci_dev->dev,
+                               "Setting real number rx failed\n");
+                       return ret;
+               }
+
+               ret = netif_set_real_num_tx_queues(netdev, num_qs);
+               if (ret) {
+                       dev_err(&oct->pci_dev->dev,
+                               "Setting real number tx failed\n");
+                       return ret;
+               }
+
+               /* The value of queue_count_update decides whether it is the
+                * queue count or the descriptor count that is being
+                * re-configured.
+                */
+               queue_count_update = 1;
+       }
+
+       /* Re-configuration of queues can happen in two scenarios, SRIOV enabled
+        * and SRIOV disabled. Few things like recreating queue zero, resetting
+        * glists and IRQs are required for both. For the latter, some more
+        * steps like updating sriov_info for the octeon device need to be done.
+        */
+       if (queue_count_update) {
+               lio_delete_glists(lio);
+
+               /* Delete mbox for PF which is SRIOV disabled because sriov_info
+                * will be now changed.
+                */
+               if ((OCTEON_CN23XX_PF(oct)) && !oct->sriov_info.sriov_enabled)
+                       oct->fn_list.free_mbox(oct);
        }
 
        for (i = 0; i < MAX_OCTEON_OUTPUT_QUEUES(oct); i++) {
@@ -860,24 +1174,91 @@ static int lio_reset_queues(struct net_device *netdev, uint32_t num_qs)
                octeon_delete_instr_queue(oct, i);
        }
 
+       if (queue_count_update) {
+               /* For PF re-configure sriov related information */
+               if ((OCTEON_CN23XX_PF(oct)) &&
+                   !oct->sriov_info.sriov_enabled) {
+                       oct->sriov_info.num_pf_rings = num_qs;
+                       if (cn23xx_sriov_config(oct)) {
+                               dev_err(&oct->pci_dev->dev,
+                                       "Queue reset aborted: SRIOV config failed\n");
+                               return -1;
+                       }
+
+                       num_qs = oct->sriov_info.num_pf_rings;
+               }
+       }
+
        if (oct->fn_list.setup_device_regs(oct)) {
                dev_err(&oct->pci_dev->dev, "Failed to configure device registers\n");
                return -1;
        }
 
-       if (liquidio_setup_io_queues(oct, 0, num_qs, num_qs)) {
-               dev_err(&oct->pci_dev->dev, "IO queues initialization failed\n");
-               return -1;
+       /* The following are needed in case of queue count re-configuration and
+        * not for descriptor count re-configuration.
+        */
+       if (queue_count_update) {
+               if (octeon_setup_instr_queues(oct))
+                       return -1;
+
+               if (octeon_setup_output_queues(oct))
+                       return -1;
+
+               /* Recreating mbox for PF that is SRIOV disabled */
+               if (OCTEON_CN23XX_PF(oct) && !oct->sriov_info.sriov_enabled) {
+                       if (oct->fn_list.setup_mbox(oct)) {
+                               dev_err(&oct->pci_dev->dev, "Mailbox setup failed\n");
+                               return -1;
+                       }
+               }
+
+               /* Deleting and recreating IRQs whether the interface is SRIOV
+                * enabled or disabled.
+                */
+               if (lio_irq_reallocate_irqs(oct, num_qs)) {
+                       dev_err(&oct->pci_dev->dev, "IRQs could not be allocated\n");
+                       return -1;
+               }
+
+               /* Enable the input and output queues for this Octeon device */
+               if (oct->fn_list.enable_io_queues(oct)) {
+                       dev_err(&oct->pci_dev->dev, "Failed to enable input/output queues\n");
+                       return -1;
+               }
+
+               for (i = 0; i < oct->num_oqs; i++)
+                       writel(oct->droq[i]->max_count,
+                              oct->droq[i]->pkts_credit_reg);
+
+               /* Informing firmware about the new queue count. It is required
+                * for firmware to allocate more number of queues than those at
+                * load time.
+                */
+               if (OCTEON_CN23XX_PF(oct) && !oct->sriov_info.sriov_enabled) {
+                       if (lio_23xx_reconfigure_queue_count(lio))
+                               return -1;
+               }
        }
 
-       /* Enable the input and output queues for this Octeon device */
-       if (oct->fn_list.enable_io_queues(oct)) {
-               dev_err(&oct->pci_dev->dev, "Failed to enable input/output queues");
+       /* Once firmware is aware of the new value, queues can be recreated */
+       if (liquidio_setup_io_queues(oct, 0, num_qs, num_qs)) {
+               dev_err(&oct->pci_dev->dev, "I/O queues creation failed\n");
                return -1;
        }
 
-       if (update && lio_send_queue_count_update(netdev, num_qs))
-               return -1;
+       if (queue_count_update) {
+               if (lio_setup_glists(oct, lio, num_qs)) {
+                       dev_err(&oct->pci_dev->dev, "Gather list allocation failed\n");
+                       return -1;
+               }
+
+               /* Send firmware the information about new number of queues
+                * if the interface is a VF or a PF that is SRIOV enabled.
+                */
+               if (oct->sriov_info.sriov_enabled || OCTEON_CN23XX_VF(oct))
+                       if (lio_send_queue_count_update(netdev, num_qs))
+                               return -1;
+       }
 
        return 0;
 }
@@ -922,7 +1303,7 @@ static int lio_ethtool_set_ringparam(struct net_device *netdev,
                CFG_SET_NUM_RX_DESCS_NIC_IF(octeon_get_conf(oct), lio->ifidx,
                                            rx_count);
 
-       if (lio_reset_queues(netdev, lio->linfo.num_txpciq))
+       if (lio_reset_queues(netdev, oct->num_iqs))
                goto err_lio_reset_queues;
 
        if (stopped)
@@ -1057,50 +1438,48 @@ lio_get_ethtool_stats(struct net_device *netdev,
 {
        struct lio *lio = GET_LIO(netdev);
        struct octeon_device *oct_dev = lio->oct_dev;
-       struct net_device_stats *netstats = &netdev->stats;
+       struct rtnl_link_stats64 lstats;
        int i = 0, j;
 
        if (ifstate_check(lio, LIO_IFSTATE_RESETTING))
                return;
 
-       netdev->netdev_ops->ndo_get_stats(netdev);
-       octnet_get_link_stats(netdev);
-
+       netdev->netdev_ops->ndo_get_stats64(netdev, &lstats);
        /*sum of oct->droq[oq_no]->stats->rx_pkts_received */
-       data[i++] = CVM_CAST64(netstats->rx_packets);
+       data[i++] = lstats.rx_packets;
        /*sum of oct->instr_queue[iq_no]->stats.tx_done */
-       data[i++] = CVM_CAST64(netstats->tx_packets);
+       data[i++] = lstats.tx_packets;
        /*sum of oct->droq[oq_no]->stats->rx_bytes_received */
-       data[i++] = CVM_CAST64(netstats->rx_bytes);
+       data[i++] = lstats.rx_bytes;
        /*sum of oct->instr_queue[iq_no]->stats.tx_tot_bytes */
-       data[i++] = CVM_CAST64(netstats->tx_bytes);
-       data[i++] = CVM_CAST64(netstats->rx_errors +
-                              oct_dev->link_stats.fromwire.fcs_err +
-                              oct_dev->link_stats.fromwire.jabber_err +
-                              oct_dev->link_stats.fromwire.l2_err +
-                              oct_dev->link_stats.fromwire.frame_err);
-       data[i++] = CVM_CAST64(netstats->tx_errors);
+       data[i++] = lstats.tx_bytes;
+       data[i++] = lstats.rx_errors +
+                       oct_dev->link_stats.fromwire.fcs_err +
+                       oct_dev->link_stats.fromwire.jabber_err +
+                       oct_dev->link_stats.fromwire.l2_err +
+                       oct_dev->link_stats.fromwire.frame_err;
+       data[i++] = lstats.tx_errors;
        /*sum of oct->droq[oq_no]->stats->rx_dropped +
         *oct->droq[oq_no]->stats->dropped_nodispatch +
         *oct->droq[oq_no]->stats->dropped_toomany +
         *oct->droq[oq_no]->stats->dropped_nomem
         */
-       data[i++] = CVM_CAST64(netstats->rx_dropped +
-                              oct_dev->link_stats.fromwire.fifo_err +
-                              oct_dev->link_stats.fromwire.dmac_drop +
-                              oct_dev->link_stats.fromwire.red_drops +
-                              oct_dev->link_stats.fromwire.fw_err_pko +
-                              oct_dev->link_stats.fromwire.fw_err_link +
-                              oct_dev->link_stats.fromwire.fw_err_drop);
+       data[i++] = lstats.rx_dropped +
+                       oct_dev->link_stats.fromwire.fifo_err +
+                       oct_dev->link_stats.fromwire.dmac_drop +
+                       oct_dev->link_stats.fromwire.red_drops +
+                       oct_dev->link_stats.fromwire.fw_err_pko +
+                       oct_dev->link_stats.fromwire.fw_err_link +
+                       oct_dev->link_stats.fromwire.fw_err_drop;
        /*sum of oct->instr_queue[iq_no]->stats.tx_dropped */
-       data[i++] = CVM_CAST64(netstats->tx_dropped +
-                              oct_dev->link_stats.fromhost.max_collision_fail +
-                              oct_dev->link_stats.fromhost.max_deferral_fail +
-                              oct_dev->link_stats.fromhost.total_collisions +
-                              oct_dev->link_stats.fromhost.fw_err_pko +
-                              oct_dev->link_stats.fromhost.fw_err_link +
-                              oct_dev->link_stats.fromhost.fw_err_drop +
-                              oct_dev->link_stats.fromhost.fw_err_pki);
+       data[i++] = lstats.tx_dropped +
+                       oct_dev->link_stats.fromhost.max_collision_fail +
+                       oct_dev->link_stats.fromhost.max_deferral_fail +
+                       oct_dev->link_stats.fromhost.total_collisions +
+                       oct_dev->link_stats.fromhost.fw_err_pko +
+                       oct_dev->link_stats.fromhost.fw_err_link +
+                       oct_dev->link_stats.fromhost.fw_err_drop +
+                       oct_dev->link_stats.fromhost.fw_err_pki;
 
        /* firmware tx stats */
        /*per_core_stats[cvmx_get_core_num()].link_stats[mdata->from_ifidx].
@@ -1135,6 +1514,10 @@ lio_get_ethtool_stats(struct net_device *netdev,
         */
        data[i++] = CVM_CAST64(oct_dev->link_stats.fromhost.fw_tx_vxlan);
 
+       /* Multicast packets sent by this port */
+       data[i++] = oct_dev->link_stats.fromhost.fw_total_mcast_sent;
+       data[i++] = oct_dev->link_stats.fromhost.fw_total_bcast_sent;
+
        /* mac tx statistics */
        /*CVMX_BGXX_CMRX_TX_STAT5 */
        data[i++] = CVM_CAST64(oct_dev->link_stats.fromhost.total_pkts_sent);
@@ -1171,6 +1554,9 @@ lio_get_ethtool_stats(struct net_device *netdev,
         *fw_total_fwd
         */
        data[i++] = CVM_CAST64(oct_dev->link_stats.fromwire.fw_total_fwd);
+       /* Multicast packets received on this port */
+       data[i++] = oct_dev->link_stats.fromwire.fw_total_mcast;
+       data[i++] = oct_dev->link_stats.fromwire.fw_total_bcast;
        /*per_core_stats[core_id].link_stats[ifidx].fromwire.jabber_err */
        data[i++] = CVM_CAST64(oct_dev->link_stats.fromwire.jabber_err);
        /*per_core_stats[core_id].link_stats[ifidx].fromwire.l2_err */
@@ -1339,7 +1725,7 @@ static void lio_vf_get_ethtool_stats(struct net_device *netdev,
                                     __attribute__((unused)),
                                     u64 *data)
 {
-       struct net_device_stats *netstats = &netdev->stats;
+       struct rtnl_link_stats64 lstats;
        struct lio *lio = GET_LIO(netdev);
        struct octeon_device *oct_dev = lio->oct_dev;
        int i = 0, j, vj;
@@ -1347,25 +1733,31 @@ static void lio_vf_get_ethtool_stats(struct net_device *netdev,
        if (ifstate_check(lio, LIO_IFSTATE_RESETTING))
                return;
 
-       netdev->netdev_ops->ndo_get_stats(netdev);
+       netdev->netdev_ops->ndo_get_stats64(netdev, &lstats);
        /* sum of oct->droq[oq_no]->stats->rx_pkts_received */
-       data[i++] = CVM_CAST64(netstats->rx_packets);
+       data[i++] = lstats.rx_packets;
        /* sum of oct->instr_queue[iq_no]->stats.tx_done */
-       data[i++] = CVM_CAST64(netstats->tx_packets);
+       data[i++] = lstats.tx_packets;
        /* sum of oct->droq[oq_no]->stats->rx_bytes_received */
-       data[i++] = CVM_CAST64(netstats->rx_bytes);
+       data[i++] = lstats.rx_bytes;
        /* sum of oct->instr_queue[iq_no]->stats.tx_tot_bytes */
-       data[i++] = CVM_CAST64(netstats->tx_bytes);
-       data[i++] = CVM_CAST64(netstats->rx_errors);
-       data[i++] = CVM_CAST64(netstats->tx_errors);
+       data[i++] = lstats.tx_bytes;
+       data[i++] = lstats.rx_errors;
+       data[i++] = lstats.tx_errors;
         /* sum of oct->droq[oq_no]->stats->rx_dropped +
          * oct->droq[oq_no]->stats->dropped_nodispatch +
          * oct->droq[oq_no]->stats->dropped_toomany +
          * oct->droq[oq_no]->stats->dropped_nomem
          */
-       data[i++] = CVM_CAST64(netstats->rx_dropped);
+       data[i++] = lstats.rx_dropped;
        /* sum of oct->instr_queue[iq_no]->stats.tx_dropped */
-       data[i++] = CVM_CAST64(netstats->tx_dropped);
+       data[i++] = lstats.tx_dropped;
+
+       data[i++] = oct_dev->link_stats.fromwire.fw_total_mcast;
+       data[i++] = oct_dev->link_stats.fromhost.fw_total_mcast_sent;
+       data[i++] = oct_dev->link_stats.fromwire.fw_total_bcast;
+       data[i++] = oct_dev->link_stats.fromhost.fw_total_bcast_sent;
+
        /* lio->link_changes */
        data[i++] = CVM_CAST64(lio->link_changes);
 
@@ -1776,162 +2168,6 @@ static int octnet_set_intrmod_cfg(struct lio *lio,
        return -EINTR;
 }
 
-static void
-octnet_nic_stats_callback(struct octeon_device *oct_dev,
-                         u32 status, void *ptr)
-{
-       struct octeon_soft_command *sc = (struct octeon_soft_command *)ptr;
-       struct oct_nic_stats_resp *resp =
-           (struct oct_nic_stats_resp *)sc->virtrptr;
-       struct oct_nic_stats_ctrl *ctrl =
-           (struct oct_nic_stats_ctrl *)sc->ctxptr;
-       struct nic_rx_stats *rsp_rstats = &resp->stats.fromwire;
-       struct nic_tx_stats *rsp_tstats = &resp->stats.fromhost;
-
-       struct nic_rx_stats *rstats = &oct_dev->link_stats.fromwire;
-       struct nic_tx_stats *tstats = &oct_dev->link_stats.fromhost;
-
-       if ((status != OCTEON_REQUEST_TIMEOUT) && !resp->status) {
-               octeon_swap_8B_data((u64 *)&resp->stats,
-                                   (sizeof(struct oct_link_stats)) >> 3);
-
-               /* RX link-level stats */
-               rstats->total_rcvd = rsp_rstats->total_rcvd;
-               rstats->bytes_rcvd = rsp_rstats->bytes_rcvd;
-               rstats->total_bcst = rsp_rstats->total_bcst;
-               rstats->total_mcst = rsp_rstats->total_mcst;
-               rstats->runts      = rsp_rstats->runts;
-               rstats->ctl_rcvd   = rsp_rstats->ctl_rcvd;
-               /* Accounts for over/under-run of buffers */
-               rstats->fifo_err  = rsp_rstats->fifo_err;
-               rstats->dmac_drop = rsp_rstats->dmac_drop;
-               rstats->fcs_err   = rsp_rstats->fcs_err;
-               rstats->jabber_err = rsp_rstats->jabber_err;
-               rstats->l2_err    = rsp_rstats->l2_err;
-               rstats->frame_err = rsp_rstats->frame_err;
-               rstats->red_drops = rsp_rstats->red_drops;
-
-               /* RX firmware stats */
-               rstats->fw_total_rcvd = rsp_rstats->fw_total_rcvd;
-               rstats->fw_total_fwd = rsp_rstats->fw_total_fwd;
-               rstats->fw_err_pko = rsp_rstats->fw_err_pko;
-               rstats->fw_err_link = rsp_rstats->fw_err_link;
-               rstats->fw_err_drop = rsp_rstats->fw_err_drop;
-               rstats->fw_rx_vxlan = rsp_rstats->fw_rx_vxlan;
-               rstats->fw_rx_vxlan_err = rsp_rstats->fw_rx_vxlan_err;
-
-               /* Number of packets that are LROed      */
-               rstats->fw_lro_pkts = rsp_rstats->fw_lro_pkts;
-               /* Number of octets that are LROed       */
-               rstats->fw_lro_octs = rsp_rstats->fw_lro_octs;
-               /* Number of LRO packets formed          */
-               rstats->fw_total_lro = rsp_rstats->fw_total_lro;
-               /* Number of times lRO of packet aborted */
-               rstats->fw_lro_aborts = rsp_rstats->fw_lro_aborts;
-               rstats->fw_lro_aborts_port = rsp_rstats->fw_lro_aborts_port;
-               rstats->fw_lro_aborts_seq = rsp_rstats->fw_lro_aborts_seq;
-               rstats->fw_lro_aborts_tsval = rsp_rstats->fw_lro_aborts_tsval;
-               rstats->fw_lro_aborts_timer = rsp_rstats->fw_lro_aborts_timer;
-               /* intrmod: packet forward rate */
-               rstats->fwd_rate = rsp_rstats->fwd_rate;
-
-               /* TX link-level stats */
-               tstats->total_pkts_sent = rsp_tstats->total_pkts_sent;
-               tstats->total_bytes_sent = rsp_tstats->total_bytes_sent;
-               tstats->mcast_pkts_sent = rsp_tstats->mcast_pkts_sent;
-               tstats->bcast_pkts_sent = rsp_tstats->bcast_pkts_sent;
-               tstats->ctl_sent = rsp_tstats->ctl_sent;
-               /* Packets sent after one collision*/
-               tstats->one_collision_sent = rsp_tstats->one_collision_sent;
-               /* Packets sent after multiple collision*/
-               tstats->multi_collision_sent = rsp_tstats->multi_collision_sent;
-               /* Packets not sent due to max collisions */
-               tstats->max_collision_fail = rsp_tstats->max_collision_fail;
-               /* Packets not sent due to max deferrals */
-               tstats->max_deferral_fail = rsp_tstats->max_deferral_fail;
-               /* Accounts for over/under-run of buffers */
-               tstats->fifo_err = rsp_tstats->fifo_err;
-               tstats->runts = rsp_tstats->runts;
-               /* Total number of collisions detected */
-               tstats->total_collisions = rsp_tstats->total_collisions;
-
-               /* firmware stats */
-               tstats->fw_total_sent = rsp_tstats->fw_total_sent;
-               tstats->fw_total_fwd = rsp_tstats->fw_total_fwd;
-               tstats->fw_err_pko = rsp_tstats->fw_err_pko;
-               tstats->fw_err_pki = rsp_tstats->fw_err_pki;
-               tstats->fw_err_link = rsp_tstats->fw_err_link;
-               tstats->fw_err_drop = rsp_tstats->fw_err_drop;
-               tstats->fw_tso = rsp_tstats->fw_tso;
-               tstats->fw_tso_fwd = rsp_tstats->fw_tso_fwd;
-               tstats->fw_err_tso = rsp_tstats->fw_err_tso;
-               tstats->fw_tx_vxlan = rsp_tstats->fw_tx_vxlan;
-
-               resp->status = 1;
-       } else {
-               resp->status = -1;
-       }
-       complete(&ctrl->complete);
-}
-
-/*  Configure interrupt moderation parameters */
-static int octnet_get_link_stats(struct net_device *netdev)
-{
-       struct lio *lio = GET_LIO(netdev);
-       struct octeon_device *oct_dev = lio->oct_dev;
-
-       struct octeon_soft_command *sc;
-       struct oct_nic_stats_ctrl *ctrl;
-       struct oct_nic_stats_resp *resp;
-
-       int retval;
-
-       /* Alloc soft command */
-       sc = (struct octeon_soft_command *)
-               octeon_alloc_soft_command(oct_dev,
-                                         0,
-                                         sizeof(struct oct_nic_stats_resp),
-                                         sizeof(struct octnic_ctrl_pkt));
-
-       if (!sc)
-               return -ENOMEM;
-
-       resp = (struct oct_nic_stats_resp *)sc->virtrptr;
-       memset(resp, 0, sizeof(struct oct_nic_stats_resp));
-
-       ctrl = (struct oct_nic_stats_ctrl *)sc->ctxptr;
-       memset(ctrl, 0, sizeof(struct oct_nic_stats_ctrl));
-       ctrl->netdev = netdev;
-       init_completion(&ctrl->complete);
-
-       sc->iq_no = lio->linfo.txpciq[0].s.q_no;
-
-       octeon_prepare_soft_command(oct_dev, sc, OPCODE_NIC,
-                                   OPCODE_NIC_PORT_STATS, 0, 0, 0);
-
-       sc->callback = octnet_nic_stats_callback;
-       sc->callback_arg = sc;
-       sc->wait_time = 500;    /*in milli seconds*/
-
-       retval = octeon_send_soft_command(oct_dev, sc);
-       if (retval == IQ_SEND_FAILED) {
-               octeon_free_soft_command(oct_dev, sc);
-               return -EINVAL;
-       }
-
-       wait_for_completion_timeout(&ctrl->complete, msecs_to_jiffies(1000));
-
-       if (resp->status != 1) {
-               octeon_free_soft_command(oct_dev, sc);
-
-               return -EINVAL;
-       }
-
-       octeon_free_soft_command(oct_dev, sc);
-
-       return 0;
-}
-
 static int lio_get_intr_coalesce(struct net_device *netdev,
                                 struct ethtool_coalesce *intr_coal)
 {
@@ -2876,6 +3112,7 @@ static int lio_set_priv_flags(struct net_device *netdev, u32 flags)
 
 static const struct ethtool_ops lio_ethtool_ops = {
        .get_link_ksettings     = lio_get_link_ksettings,
+       .set_link_ksettings     = lio_set_link_ksettings,
        .get_link               = ethtool_op_get_link,
        .get_drvinfo            = lio_get_drvinfo,
        .get_ringparam          = lio_ethtool_get_ringparam,
index f3891ae11b02cfd9b37844f64bf2e98504007d82..e500528ad75109954a41ce2a60cd6fe9f7591315 100644 (file)
@@ -138,33 +138,10 @@ union tx_info {
  * by this structure in the NIC module.
  */
 
-#define OCTNIC_MAX_SG  (MAX_SKB_FRAGS)
-
 #define OCTNIC_GSO_MAX_HEADER_SIZE 128
 #define OCTNIC_GSO_MAX_SIZE                                                    \
        (CN23XX_DEFAULT_INPUT_JABBER - OCTNIC_GSO_MAX_HEADER_SIZE)
 
-/** Structure of a node in list of gather components maintained by
- * NIC driver for each network device.
- */
-struct octnic_gather {
-       /** List manipulation. Next and prev pointers. */
-       struct list_head list;
-
-       /** Size of the gather component at sg in bytes. */
-       int sg_size;
-
-       /** Number of bytes that sg was adjusted to make it 8B-aligned. */
-       int adjust;
-
-       /** Gather component that can accommodate max sized fragment list
-        *  received from the IP layer.
-        */
-       struct octeon_sg_entry *sg;
-
-       dma_addr_t sg_dma_ptr;
-};
-
 struct handshake {
        struct completion init;
        struct completion started;
@@ -520,7 +497,7 @@ static void liquidio_deinit_pci(void)
  */
 static inline int check_txq_status(struct lio *lio)
 {
-       int numqs = lio->netdev->num_tx_queues;
+       int numqs = lio->netdev->real_num_tx_queues;
        int ret_val = 0;
        int q, iq;
 
@@ -541,148 +518,6 @@ static inline int check_txq_status(struct lio *lio)
        return ret_val;
 }
 
-/**
- * Remove the node at the head of the list. The list would be empty at
- * the end of this call if there are no more nodes in the list.
- */
-static inline struct list_head *list_delete_head(struct list_head *root)
-{
-       struct list_head *node;
-
-       if ((root->prev == root) && (root->next == root))
-               node = NULL;
-       else
-               node = root->next;
-
-       if (node)
-               list_del(node);
-
-       return node;
-}
-
-/**
- * \brief Delete gather lists
- * @param lio per-network private data
- */
-static void delete_glists(struct lio *lio)
-{
-       struct octnic_gather *g;
-       int i;
-
-       kfree(lio->glist_lock);
-       lio->glist_lock = NULL;
-
-       if (!lio->glist)
-               return;
-
-       for (i = 0; i < lio->linfo.num_txpciq; i++) {
-               do {
-                       g = (struct octnic_gather *)
-                               list_delete_head(&lio->glist[i]);
-                       if (g)
-                               kfree(g);
-               } while (g);
-
-               if (lio->glists_virt_base && lio->glists_virt_base[i] &&
-                   lio->glists_dma_base && lio->glists_dma_base[i]) {
-                       lio_dma_free(lio->oct_dev,
-                                    lio->glist_entry_size * lio->tx_qsize,
-                                    lio->glists_virt_base[i],
-                                    lio->glists_dma_base[i]);
-               }
-       }
-
-       kfree(lio->glists_virt_base);
-       lio->glists_virt_base = NULL;
-
-       kfree(lio->glists_dma_base);
-       lio->glists_dma_base = NULL;
-
-       kfree(lio->glist);
-       lio->glist = NULL;
-}
-
-/**
- * \brief Setup gather lists
- * @param lio per-network private data
- */
-static int setup_glists(struct octeon_device *oct, struct lio *lio, int num_iqs)
-{
-       int i, j;
-       struct octnic_gather *g;
-
-       lio->glist_lock = kcalloc(num_iqs, sizeof(*lio->glist_lock),
-                                 GFP_KERNEL);
-       if (!lio->glist_lock)
-               return -ENOMEM;
-
-       lio->glist = kcalloc(num_iqs, sizeof(*lio->glist),
-                            GFP_KERNEL);
-       if (!lio->glist) {
-               kfree(lio->glist_lock);
-               lio->glist_lock = NULL;
-               return -ENOMEM;
-       }
-
-       lio->glist_entry_size =
-               ROUNDUP8((ROUNDUP4(OCTNIC_MAX_SG) >> 2) * OCT_SG_ENTRY_SIZE);
-
-       /* allocate memory to store virtual and dma base address of
-        * per glist consistent memory
-        */
-       lio->glists_virt_base = kcalloc(num_iqs, sizeof(*lio->glists_virt_base),
-                                       GFP_KERNEL);
-       lio->glists_dma_base = kcalloc(num_iqs, sizeof(*lio->glists_dma_base),
-                                      GFP_KERNEL);
-
-       if (!lio->glists_virt_base || !lio->glists_dma_base) {
-               delete_glists(lio);
-               return -ENOMEM;
-       }
-
-       for (i = 0; i < num_iqs; i++) {
-               int numa_node = dev_to_node(&oct->pci_dev->dev);
-
-               spin_lock_init(&lio->glist_lock[i]);
-
-               INIT_LIST_HEAD(&lio->glist[i]);
-
-               lio->glists_virt_base[i] =
-                       lio_dma_alloc(oct,
-                                     lio->glist_entry_size * lio->tx_qsize,
-                                     &lio->glists_dma_base[i]);
-
-               if (!lio->glists_virt_base[i]) {
-                       delete_glists(lio);
-                       return -ENOMEM;
-               }
-
-               for (j = 0; j < lio->tx_qsize; j++) {
-                       g = kzalloc_node(sizeof(*g), GFP_KERNEL,
-                                        numa_node);
-                       if (!g)
-                               g = kzalloc(sizeof(*g), GFP_KERNEL);
-                       if (!g)
-                               break;
-
-                       g->sg = lio->glists_virt_base[i] +
-                               (j * lio->glist_entry_size);
-
-                       g->sg_dma_ptr = lio->glists_dma_base[i] +
-                                       (j * lio->glist_entry_size);
-
-                       list_add_tail(&g->list, &lio->glist[i]);
-               }
-
-               if (j != lio->tx_qsize) {
-                       delete_glists(lio);
-                       return -ENOMEM;
-               }
-       }
-
-       return 0;
-}
-
 /**
  * \brief Print link information
  * @param netdev network device
@@ -1077,6 +912,9 @@ liquidio_probe(struct pci_dev *pdev,
        /* set linux specific device pointer */
        oct_dev->pci_dev = (void *)pdev;
 
+       oct_dev->subsystem_id = pdev->subsystem_vendor |
+               (pdev->subsystem_device << 16);
+
        hs = &handshake[oct_dev->octeon_id];
        init_completion(&hs->init);
        init_completion(&hs->started);
@@ -1471,7 +1309,7 @@ static void liquidio_destroy_nic_device(struct octeon_device *oct, int ifidx)
 
        cleanup_rx_oom_poll_fn(netdev);
 
-       delete_glists(lio);
+       lio_delete_glists(lio);
 
        free_netdev(netdev);
 
@@ -1686,7 +1524,7 @@ static void free_netsgbuf(void *buf)
                i++;
        }
 
-       iq = skb_iq(lio, skb);
+       iq = skb_iq(lio->oct_dev, skb);
        spin_lock(&lio->glist_lock[iq]);
        list_add_tail(&g->list, &lio->glist[iq]);
        spin_unlock(&lio->glist_lock[iq]);
@@ -1729,7 +1567,7 @@ static void free_netsgbuf_with_resp(void *buf)
                i++;
        }
 
-       iq = skb_iq(lio, skb);
+       iq = skb_iq(lio->oct_dev, skb);
 
        spin_lock(&lio->glist_lock[iq]);
        list_add_tail(&g->list, &lio->glist[iq]);
@@ -1941,39 +1779,6 @@ static int load_firmware(struct octeon_device *oct)
        return ret;
 }
 
-/**
- * \brief Callback for getting interface configuration
- * @param status status of request
- * @param buf pointer to resp structure
- */
-static void if_cfg_callback(struct octeon_device *oct,
-                           u32 status __attribute__((unused)),
-                           void *buf)
-{
-       struct octeon_soft_command *sc = (struct octeon_soft_command *)buf;
-       struct liquidio_if_cfg_resp *resp;
-       struct liquidio_if_cfg_context *ctx;
-
-       resp = (struct liquidio_if_cfg_resp *)sc->virtrptr;
-       ctx = (struct liquidio_if_cfg_context *)sc->ctxptr;
-
-       oct = lio_get_device(ctx->octeon_id);
-       if (resp->status)
-               dev_err(&oct->pci_dev->dev, "nic if cfg instruction failed. Status: 0x%llx (0x%08x)\n",
-                       CVM_CAST64(resp->status), status);
-       WRITE_ONCE(ctx->cond, 1);
-
-       snprintf(oct->fw_info.liquidio_firmware_version, 32, "%s",
-                resp->cfg_info.liquidio_firmware_version);
-
-       /* This barrier is required to be sure that the response has been
-        * written fully before waking up the handler
-        */
-       wmb();
-
-       wake_up_interruptible(&ctx->wc);
-}
-
 /**
  * \brief Poll routine for checking transmit queue status
  * @param work work_struct data structure
@@ -2049,11 +1854,6 @@ static int liquidio_open(struct net_device *netdev)
 
        ifstate_set(lio, LIO_IFSTATE_RUNNING);
 
-       /* Ready for link status updates */
-       lio->intf_open = 1;
-
-       netif_info(lio, ifup, lio->netdev, "Interface Open, ready for traffic\n");
-
        if (OCTEON_CN23XX_PF(oct)) {
                if (!oct->msix_on)
                        if (setup_tx_poll_fn(netdev))
@@ -2063,7 +1863,12 @@ static int liquidio_open(struct net_device *netdev)
                        return -1;
        }
 
-       start_txqs(netdev);
+       netif_tx_start_all_queues(netdev);
+
+       /* Ready for link status updates */
+       lio->intf_open = 1;
+
+       netif_info(lio, ifup, lio->netdev, "Interface Open, ready for traffic\n");
 
        /* tell Octeon to start forwarding packets to host */
        send_rx_ctrl_cmd(lio, 1);
@@ -2086,11 +1891,15 @@ static int liquidio_stop(struct net_device *netdev)
 
        ifstate_reset(lio, LIO_IFSTATE_RUNNING);
 
-       netif_tx_disable(netdev);
+       /* Stop any link updates */
+       lio->intf_open = 0;
+
+       stop_txqs(netdev);
 
        /* Inform that netif carrier is down */
        netif_carrier_off(netdev);
-       lio->intf_open = 0;
+       netif_tx_disable(netdev);
+
        lio->linfo.link.s.link_up = 0;
        lio->link_changes++;
 
@@ -2252,14 +2061,11 @@ static int liquidio_set_mac(struct net_device *netdev, void *p)
        return 0;
 }
 
-/**
- * \brief Net device get_stats
- * @param netdev network device
- */
-static struct net_device_stats *liquidio_get_stats(struct net_device *netdev)
+static void
+liquidio_get_stats64(struct net_device *netdev,
+                    struct rtnl_link_stats64 *lstats)
 {
        struct lio *lio = GET_LIO(netdev);
-       struct net_device_stats *stats = &netdev->stats;
        struct octeon_device *oct;
        u64 pkts = 0, drop = 0, bytes = 0;
        struct oct_droq_stats *oq_stats;
@@ -2269,7 +2075,7 @@ static struct net_device_stats *liquidio_get_stats(struct net_device *netdev)
        oct = lio->oct_dev;
 
        if (ifstate_check(lio, LIO_IFSTATE_RESETTING))
-               return stats;
+               return;
 
        for (i = 0; i < oct->num_iqs; i++) {
                iq_no = lio->linfo.txpciq[i].s.q_no;
@@ -2279,9 +2085,9 @@ static struct net_device_stats *liquidio_get_stats(struct net_device *netdev)
                bytes += iq_stats->tx_tot_bytes;
        }
 
-       stats->tx_packets = pkts;
-       stats->tx_bytes = bytes;
-       stats->tx_dropped = drop;
+       lstats->tx_packets = pkts;
+       lstats->tx_bytes = bytes;
+       lstats->tx_dropped = drop;
 
        pkts = 0;
        drop = 0;
@@ -2298,11 +2104,34 @@ static struct net_device_stats *liquidio_get_stats(struct net_device *netdev)
                bytes += oq_stats->rx_bytes_received;
        }
 
-       stats->rx_bytes = bytes;
-       stats->rx_packets = pkts;
-       stats->rx_dropped = drop;
-
-       return stats;
+       lstats->rx_bytes = bytes;
+       lstats->rx_packets = pkts;
+       lstats->rx_dropped = drop;
+
+       octnet_get_link_stats(netdev);
+       lstats->multicast = oct->link_stats.fromwire.fw_total_mcast;
+       lstats->collisions = oct->link_stats.fromhost.total_collisions;
+
+       /* detailed rx_errors: */
+       lstats->rx_length_errors = oct->link_stats.fromwire.l2_err;
+       /* recved pkt with crc error    */
+       lstats->rx_crc_errors = oct->link_stats.fromwire.fcs_err;
+       /* recv'd frame alignment error */
+       lstats->rx_frame_errors = oct->link_stats.fromwire.frame_err;
+       /* recv'r fifo overrun */
+       lstats->rx_fifo_errors = oct->link_stats.fromwire.fifo_err;
+
+       lstats->rx_errors = lstats->rx_length_errors + lstats->rx_crc_errors +
+               lstats->rx_frame_errors + lstats->rx_fifo_errors;
+
+       /* detailed tx_errors */
+       lstats->tx_aborted_errors = oct->link_stats.fromhost.fw_err_pko;
+       lstats->tx_carrier_errors = oct->link_stats.fromhost.fw_err_link;
+       lstats->tx_fifo_errors = oct->link_stats.fromhost.fifo_err;
+
+       lstats->tx_errors = lstats->tx_aborted_errors +
+               lstats->tx_carrier_errors +
+               lstats->tx_fifo_errors;
 }
 
 /**
@@ -2510,7 +2339,7 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
        lio = GET_LIO(netdev);
        oct = lio->oct_dev;
 
-       q_idx = skb_iq(lio, skb);
+       q_idx = skb_iq(oct, skb);
        tag = q_idx;
        iq_no = lio->linfo.txpciq[q_idx].s.q_no;
 
@@ -2603,7 +2432,7 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
 
                spin_lock(&lio->glist_lock[q_idx]);
                g = (struct octnic_gather *)
-                       list_delete_head(&lio->glist[q_idx]);
+                       lio_list_delete_head(&lio->glist[q_idx]);
                spin_unlock(&lio->glist_lock[q_idx]);
 
                if (!g) {
@@ -3355,7 +3184,7 @@ static const struct net_device_ops lionetdevops = {
        .ndo_open               = liquidio_open,
        .ndo_stop               = liquidio_stop,
        .ndo_start_xmit         = liquidio_xmit,
-       .ndo_get_stats          = liquidio_get_stats,
+       .ndo_get_stats64        = liquidio_get_stats64,
        .ndo_set_mac_address    = liquidio_set_mac,
        .ndo_set_rx_mode        = liquidio_set_mcast_list,
        .ndo_tx_timeout         = liquidio_tx_timeout,
@@ -3476,6 +3305,7 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
        struct liquidio_if_cfg_resp *resp;
        struct octdev_props *props;
        int retval, num_iqueues, num_oqueues;
+       int max_num_queues = 0;
        union oct_nic_if_cfg if_cfg;
        unsigned int base_queue;
        unsigned int gmx_port_id;
@@ -3556,9 +3386,9 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
                                            OPCODE_NIC_IF_CFG, 0,
                                            if_cfg.u64, 0);
 
-               sc->callback = if_cfg_callback;
+               sc->callback = lio_if_cfg_callback;
                sc->callback_arg = sc;
-               sc->wait_time = 3000;
+               sc->wait_time = LIO_IFCFG_WAIT_TIME;
 
                retval = octeon_send_soft_command(octeon_dev, sc);
                if (retval == IQ_SEND_FAILED) {
@@ -3612,11 +3442,20 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
                                resp->cfg_info.oqmask);
                        goto setup_nic_dev_fail;
                }
+
+               if (OCTEON_CN6XXX(octeon_dev)) {
+                       max_num_queues = CFG_GET_IQ_MAX_Q(CHIP_CONF(octeon_dev,
+                                                                   cn6xxx));
+               } else if (OCTEON_CN23XX_PF(octeon_dev)) {
+                       max_num_queues = CFG_GET_IQ_MAX_Q(CHIP_CONF(octeon_dev,
+                                                                   cn23xx_pf));
+               }
+
                dev_dbg(&octeon_dev->pci_dev->dev,
-                       "interface %d, iqmask %016llx, oqmask %016llx, numiqueues %d, numoqueues %d\n",
+                       "interface %d, iqmask %016llx, oqmask %016llx, numiqueues %d, numoqueues %d max_num_queues: %d\n",
                        i, resp->cfg_info.iqmask, resp->cfg_info.oqmask,
-                       num_iqueues, num_oqueues);
-               netdev = alloc_etherdev_mq(LIO_SIZE, num_iqueues);
+                       num_iqueues, num_oqueues, max_num_queues);
+               netdev = alloc_etherdev_mq(LIO_SIZE, max_num_queues);
 
                if (!netdev) {
                        dev_err(&octeon_dev->pci_dev->dev, "Device allocation failed\n");
@@ -3631,6 +3470,20 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
                netdev->netdev_ops = &lionetdevops;
                SWITCHDEV_SET_OPS(netdev, &lio_pf_switchdev_ops);
 
+               retval = netif_set_real_num_rx_queues(netdev, num_oqueues);
+               if (retval) {
+                       dev_err(&octeon_dev->pci_dev->dev,
+                               "setting real number rx failed\n");
+                       goto setup_nic_dev_fail;
+               }
+
+               retval = netif_set_real_num_tx_queues(netdev, num_iqueues);
+               if (retval) {
+                       dev_err(&octeon_dev->pci_dev->dev,
+                               "setting real number tx failed\n");
+                       goto setup_nic_dev_fail;
+               }
+
                lio = GET_LIO(netdev);
 
                memset(lio, 0, sizeof(struct lio));
@@ -3752,7 +3605,7 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
                lio->tx_qsize = octeon_get_tx_qsize(octeon_dev, lio->txq);
                lio->rx_qsize = octeon_get_rx_qsize(octeon_dev, lio->rxq);
 
-               if (setup_glists(octeon_dev, lio, num_iqueues)) {
+               if (lio_setup_glists(octeon_dev, lio, num_iqueues)) {
                        dev_err(&octeon_dev->pci_dev->dev,
                                "Gather list allocation failed\n");
                        goto setup_nic_dev_fail;
@@ -3814,6 +3667,23 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
                        "NIC ifidx:%d Setup successful\n", i);
 
                octeon_free_soft_command(octeon_dev, sc);
+
+               if (octeon_dev->subsystem_id ==
+                       OCTEON_CN2350_25GB_SUBSYS_ID ||
+                   octeon_dev->subsystem_id ==
+                       OCTEON_CN2360_25GB_SUBSYS_ID) {
+                       liquidio_get_speed(lio);
+
+                       if (octeon_dev->speed_setting == 0) {
+                               octeon_dev->speed_setting = 25;
+                               octeon_dev->no_speed_setting = 1;
+                       }
+               } else {
+                       octeon_dev->no_speed_setting = 1;
+                       octeon_dev->speed_setting = 10;
+               }
+               octeon_dev->speed_boot = octeon_dev->speed_setting;
+
        }
 
        devlink = devlink_alloc(&liquidio_devlink_ops,
@@ -4251,7 +4121,9 @@ static int octeon_device_init(struct octeon_device *octeon_dev)
                }
                atomic_set(&octeon_dev->status, OCT_DEV_MBOX_SETUP_DONE);
 
-               if (octeon_allocate_ioq_vector(octeon_dev)) {
+               if (octeon_allocate_ioq_vector
+                               (octeon_dev,
+                                octeon_dev->sriov_info.num_pf_rings)) {
                        dev_err(&octeon_dev->pci_dev->dev, "OCTEON: ioq vector allocation failed\n");
                        return 1;
                }
index f92dfa411de68627ccb5a59d189d5f21e976ed8c..7fa0212873aceaf876dd144a2d5e06dd199659b6 100644 (file)
@@ -69,30 +69,10 @@ union tx_info {
        } s;
 };
 
-#define OCTNIC_MAX_SG  (MAX_SKB_FRAGS)
-
 #define OCTNIC_GSO_MAX_HEADER_SIZE 128
 #define OCTNIC_GSO_MAX_SIZE \
                (CN23XX_DEFAULT_INPUT_JABBER - OCTNIC_GSO_MAX_HEADER_SIZE)
 
-struct octnic_gather {
-       /* List manipulation. Next and prev pointers. */
-       struct list_head list;
-
-       /* Size of the gather component at sg in bytes. */
-       int sg_size;
-
-       /* Number of bytes that sg was adjusted to make it 8B-aligned. */
-       int adjust;
-
-       /* Gather component that can accommodate max sized fragment list
-        * received from the IP layer.
-        */
-       struct octeon_sg_entry *sg;
-
-       dma_addr_t sg_dma_ptr;
-};
-
 static int
 liquidio_vf_probe(struct pci_dev *pdev, const struct pci_device_id *ent);
 static void liquidio_vf_remove(struct pci_dev *pdev);
@@ -284,142 +264,6 @@ static struct pci_driver liquidio_vf_pci_driver = {
        .err_handler    = &liquidio_vf_err_handler,    /* For AER */
 };
 
-/**
- * Remove the node at the head of the list. The list would be empty at
- * the end of this call if there are no more nodes in the list.
- */
-static struct list_head *list_delete_head(struct list_head *root)
-{
-       struct list_head *node;
-
-       if ((root->prev == root) && (root->next == root))
-               node = NULL;
-       else
-               node = root->next;
-
-       if (node)
-               list_del(node);
-
-       return node;
-}
-
-/**
- * \brief Delete gather lists
- * @param lio per-network private data
- */
-static void delete_glists(struct lio *lio)
-{
-       struct octnic_gather *g;
-       int i;
-
-       kfree(lio->glist_lock);
-       lio->glist_lock = NULL;
-
-       if (!lio->glist)
-               return;
-
-       for (i = 0; i < lio->linfo.num_txpciq; i++) {
-               do {
-                       g = (struct octnic_gather *)
-                           list_delete_head(&lio->glist[i]);
-                       kfree(g);
-               } while (g);
-
-               if (lio->glists_virt_base && lio->glists_virt_base[i] &&
-                   lio->glists_dma_base && lio->glists_dma_base[i]) {
-                       lio_dma_free(lio->oct_dev,
-                                    lio->glist_entry_size * lio->tx_qsize,
-                                    lio->glists_virt_base[i],
-                                    lio->glists_dma_base[i]);
-               }
-       }
-
-       kfree(lio->glists_virt_base);
-       lio->glists_virt_base = NULL;
-
-       kfree(lio->glists_dma_base);
-       lio->glists_dma_base = NULL;
-
-       kfree(lio->glist);
-       lio->glist = NULL;
-}
-
-/**
- * \brief Setup gather lists
- * @param lio per-network private data
- */
-static int setup_glists(struct lio *lio, int num_iqs)
-{
-       struct octnic_gather *g;
-       int i, j;
-
-       lio->glist_lock =
-           kzalloc(sizeof(*lio->glist_lock) * num_iqs, GFP_KERNEL);
-       if (!lio->glist_lock)
-               return -ENOMEM;
-
-       lio->glist =
-           kzalloc(sizeof(*lio->glist) * num_iqs, GFP_KERNEL);
-       if (!lio->glist) {
-               kfree(lio->glist_lock);
-               lio->glist_lock = NULL;
-               return -ENOMEM;
-       }
-
-       lio->glist_entry_size =
-               ROUNDUP8((ROUNDUP4(OCTNIC_MAX_SG) >> 2) * OCT_SG_ENTRY_SIZE);
-
-       /* allocate memory to store virtual and dma base address of
-        * per glist consistent memory
-        */
-       lio->glists_virt_base = kcalloc(num_iqs, sizeof(*lio->glists_virt_base),
-                                       GFP_KERNEL);
-       lio->glists_dma_base = kcalloc(num_iqs, sizeof(*lio->glists_dma_base),
-                                      GFP_KERNEL);
-
-       if (!lio->glists_virt_base || !lio->glists_dma_base) {
-               delete_glists(lio);
-               return -ENOMEM;
-       }
-
-       for (i = 0; i < num_iqs; i++) {
-               spin_lock_init(&lio->glist_lock[i]);
-
-               INIT_LIST_HEAD(&lio->glist[i]);
-
-               lio->glists_virt_base[i] =
-                       lio_dma_alloc(lio->oct_dev,
-                                     lio->glist_entry_size * lio->tx_qsize,
-                                     &lio->glists_dma_base[i]);
-
-               if (!lio->glists_virt_base[i]) {
-                       delete_glists(lio);
-                       return -ENOMEM;
-               }
-
-               for (j = 0; j < lio->tx_qsize; j++) {
-                       g = kzalloc(sizeof(*g), GFP_KERNEL);
-                       if (!g)
-                               break;
-
-                       g->sg = lio->glists_virt_base[i] +
-                               (j * lio->glist_entry_size);
-
-                       g->sg_dma_ptr = lio->glists_dma_base[i] +
-                                       (j * lio->glist_entry_size);
-
-                       list_add_tail(&g->list, &lio->glist[i]);
-               }
-
-               if (j != lio->tx_qsize) {
-                       delete_glists(lio);
-                       return -ENOMEM;
-               }
-       }
-
-       return 0;
-}
-
 /**
  * \brief Print link information
  * @param netdev network device
@@ -567,6 +411,9 @@ liquidio_vf_probe(struct pci_dev *pdev,
        /* set linux specific device pointer */
        oct_dev->pci_dev = pdev;
 
+       oct_dev->subsystem_id = pdev->subsystem_vendor |
+               (pdev->subsystem_device << 16);
+
        if (octeon_device_init(oct_dev)) {
                liquidio_vf_remove(pdev);
                return -ENOMEM;
@@ -856,7 +703,7 @@ static void liquidio_destroy_nic_device(struct octeon_device *oct, int ifidx)
 
        cleanup_link_status_change_wq(netdev);
 
-       delete_glists(lio);
+       lio_delete_glists(lio);
 
        free_netdev(netdev);
 
@@ -1005,7 +852,7 @@ static void free_netsgbuf(void *buf)
                i++;
        }
 
-       iq = skb_iq(lio, skb);
+       iq = skb_iq(lio->oct_dev, skb);
 
        spin_lock(&lio->glist_lock[iq]);
        list_add_tail(&g->list, &lio->glist[iq]);
@@ -1049,7 +896,7 @@ static void free_netsgbuf_with_resp(void *buf)
                i++;
        }
 
-       iq = skb_iq(lio, skb);
+       iq = skb_iq(lio->oct_dev, skb);
 
        spin_lock(&lio->glist_lock[iq]);
        list_add_tail(&g->list, &lio->glist[iq]);
@@ -1058,38 +905,6 @@ static void free_netsgbuf_with_resp(void *buf)
        /* Don't free the skb yet */
 }
 
-/**
- * \brief Callback for getting interface configuration
- * @param status status of request
- * @param buf pointer to resp structure
- */
-static void if_cfg_callback(struct octeon_device *oct,
-                           u32 status __attribute__((unused)), void *buf)
-{
-       struct octeon_soft_command *sc = (struct octeon_soft_command *)buf;
-       struct liquidio_if_cfg_context *ctx;
-       struct liquidio_if_cfg_resp *resp;
-
-       resp = (struct liquidio_if_cfg_resp *)sc->virtrptr;
-       ctx = (struct liquidio_if_cfg_context *)sc->ctxptr;
-
-       oct = lio_get_device(ctx->octeon_id);
-       if (resp->status)
-               dev_err(&oct->pci_dev->dev, "nic if cfg instruction failed. Status: %llx\n",
-                       CVM_CAST64(resp->status));
-       WRITE_ONCE(ctx->cond, 1);
-
-       snprintf(oct->fw_info.liquidio_firmware_version, 32, "%s",
-                resp->cfg_info.liquidio_firmware_version);
-
-       /* This barrier is required to be sure that the response has been
-        * written fully before waking up the handler
-        */
-       wmb();
-
-       wake_up_interruptible(&ctx->wc);
-}
-
 /**
  * \brief Net device open for LiquidIO
  * @param netdev network device
@@ -1336,24 +1151,21 @@ static int liquidio_set_mac(struct net_device *netdev, void *p)
        return 0;
 }
 
-/**
- * \brief Net device get_stats
- * @param netdev network device
- */
-static struct net_device_stats *liquidio_get_stats(struct net_device *netdev)
+static void
+liquidio_get_stats64(struct net_device *netdev,
+                    struct rtnl_link_stats64 *lstats)
 {
        struct lio *lio = GET_LIO(netdev);
-       struct net_device_stats *stats = &netdev->stats;
+       struct octeon_device *oct;
        u64 pkts = 0, drop = 0, bytes = 0;
        struct oct_droq_stats *oq_stats;
        struct oct_iq_stats *iq_stats;
-       struct octeon_device *oct;
        int i, iq_no, oq_no;
 
        oct = lio->oct_dev;
 
        if (ifstate_check(lio, LIO_IFSTATE_RESETTING))
-               return stats;
+               return;
 
        for (i = 0; i < oct->num_iqs; i++) {
                iq_no = lio->linfo.txpciq[i].s.q_no;
@@ -1363,9 +1175,9 @@ static struct net_device_stats *liquidio_get_stats(struct net_device *netdev)
                bytes += iq_stats->tx_tot_bytes;
        }
 
-       stats->tx_packets = pkts;
-       stats->tx_bytes = bytes;
-       stats->tx_dropped = drop;
+       lstats->tx_packets = pkts;
+       lstats->tx_bytes = bytes;
+       lstats->tx_dropped = drop;
 
        pkts = 0;
        drop = 0;
@@ -1382,11 +1194,29 @@ static struct net_device_stats *liquidio_get_stats(struct net_device *netdev)
                bytes += oq_stats->rx_bytes_received;
        }
 
-       stats->rx_bytes = bytes;
-       stats->rx_packets = pkts;
-       stats->rx_dropped = drop;
+       lstats->rx_bytes = bytes;
+       lstats->rx_packets = pkts;
+       lstats->rx_dropped = drop;
+
+       octnet_get_link_stats(netdev);
+       lstats->multicast = oct->link_stats.fromwire.fw_total_mcast;
+
+       /* detailed rx_errors: */
+       lstats->rx_length_errors = oct->link_stats.fromwire.l2_err;
+       /* recved pkt with crc error */
+       lstats->rx_crc_errors = oct->link_stats.fromwire.fcs_err;
+       /* recv'd frame alignment error */
+       lstats->rx_frame_errors = oct->link_stats.fromwire.frame_err;
 
-       return stats;
+       lstats->rx_errors = lstats->rx_length_errors + lstats->rx_crc_errors +
+                           lstats->rx_frame_errors;
+
+       /* detailed tx_errors */
+       lstats->tx_aborted_errors = oct->link_stats.fromhost.fw_err_pko;
+       lstats->tx_carrier_errors = oct->link_stats.fromhost.fw_err_link;
+
+       lstats->tx_errors = lstats->tx_aborted_errors +
+               lstats->tx_carrier_errors;
 }
 
 /**
@@ -1580,7 +1410,7 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
        lio = GET_LIO(netdev);
        oct = lio->oct_dev;
 
-       q_idx = skb_iq(lio, skb);
+       q_idx = skb_iq(lio->oct_dev, skb);
        tag = q_idx;
        iq_no = lio->linfo.txpciq[q_idx].s.q_no;
 
@@ -1661,8 +1491,8 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
                int i, frags;
 
                spin_lock(&lio->glist_lock[q_idx]);
-               g = (struct octnic_gather *)list_delete_head(
-                   &lio->glist[q_idx]);
+               g = (struct octnic_gather *)
+                       lio_list_delete_head(&lio->glist[q_idx]);
                spin_unlock(&lio->glist_lock[q_idx]);
 
                if (!g) {
@@ -2034,7 +1864,7 @@ static const struct net_device_ops lionetdevops = {
        .ndo_open               = liquidio_open,
        .ndo_stop               = liquidio_stop,
        .ndo_start_xmit         = liquidio_xmit,
-       .ndo_get_stats          = liquidio_get_stats,
+       .ndo_get_stats64        = liquidio_get_stats64,
        .ndo_set_mac_address    = liquidio_set_mac,
        .ndo_set_rx_mode        = liquidio_set_mcast_list,
        .ndo_tx_timeout         = liquidio_tx_timeout,
@@ -2156,7 +1986,7 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
                                            OPCODE_NIC_IF_CFG, 0, if_cfg.u64,
                                            0);
 
-               sc->callback = if_cfg_callback;
+               sc->callback = lio_if_cfg_callback;
                sc->callback_arg = sc;
                sc->wait_time = 5000;
 
@@ -2273,6 +2103,7 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
                netdev->features = (lio->dev_capability & ~NETIF_F_LRO);
 
                netdev->hw_features = lio->dev_capability;
+               netdev->hw_features &= ~NETIF_F_HW_VLAN_CTAG_RX;
 
                /* MTU range: 68 - 16000 */
                netdev->min_mtu = LIO_MIN_MTU_SIZE;
@@ -2321,7 +2152,7 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
                lio->tx_qsize = octeon_get_tx_qsize(octeon_dev, lio->txq);
                lio->rx_qsize = octeon_get_rx_qsize(octeon_dev, lio->rxq);
 
-               if (setup_glists(lio, num_iqueues)) {
+               if (lio_setup_glists(octeon_dev, lio, num_iqueues)) {
                        dev_err(&octeon_dev->pci_dev->dev,
                                "Gather list allocation failed\n");
                        goto setup_nic_dev_fail;
@@ -2371,6 +2202,8 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
                        "NIC ifidx:%d Setup successful\n", i);
 
                octeon_free_soft_command(octeon_dev, sc);
+
+               octeon_dev->no_speed_setting = 1;
        }
 
        return 0;
@@ -2512,7 +2345,7 @@ static int octeon_device_init(struct octeon_device *oct)
        }
        atomic_set(&oct->status, OCT_DEV_MBOX_SETUP_DONE);
 
-       if (octeon_allocate_ioq_vector(oct)) {
+       if (octeon_allocate_ioq_vector(oct, oct->sriov_info.rings_per_vf)) {
                dev_err(&oct->pci_dev->dev, "ioq vector allocation failed\n");
                return 1;
        }
index 2adafa366d3fb07886404c93f8b0f1afc6d8fa60..ddd7431579f4e6d51a335d9336074268f62fce8f 100644 (file)
@@ -201,13 +201,14 @@ lio_vf_rep_get_stats64(struct net_device *dev,
 {
        struct lio_vf_rep_desc *vf_rep = netdev_priv(dev);
 
-       stats64->tx_packets = vf_rep->stats.tx_packets;
-       stats64->tx_bytes   = vf_rep->stats.tx_bytes;
-       stats64->tx_dropped = vf_rep->stats.tx_dropped;
-
-       stats64->rx_packets = vf_rep->stats.rx_packets;
-       stats64->rx_bytes   = vf_rep->stats.rx_bytes;
-       stats64->rx_dropped = vf_rep->stats.rx_dropped;
+       /* Swap tx and rx stats as VF rep is a switch port */
+       stats64->tx_packets = vf_rep->stats.rx_packets;
+       stats64->tx_bytes   = vf_rep->stats.rx_bytes;
+       stats64->tx_dropped = vf_rep->stats.rx_dropped;
+
+       stats64->rx_packets = vf_rep->stats.tx_packets;
+       stats64->rx_bytes   = vf_rep->stats.tx_bytes;
+       stats64->rx_dropped = vf_rep->stats.tx_dropped;
 }
 
 static int
index 34a94daca590da91b2fcb0bb4a670f4cbfc802d5..690424b6781ad4378a6dfdeac73325b53ec9b5ee 100644 (file)
@@ -28,7 +28,7 @@
 #define LIQUIDIO_PACKAGE ""
 #define LIQUIDIO_BASE_MAJOR_VERSION 1
 #define LIQUIDIO_BASE_MINOR_VERSION 7
-#define LIQUIDIO_BASE_MICRO_VERSION 0
+#define LIQUIDIO_BASE_MICRO_VERSION 2
 #define LIQUIDIO_BASE_VERSION   __stringify(LIQUIDIO_BASE_MAJOR_VERSION) "." \
                                __stringify(LIQUIDIO_BASE_MINOR_VERSION)
 #define LIQUIDIO_MICRO_VERSION  "." __stringify(LIQUIDIO_BASE_MICRO_VERSION)
@@ -84,6 +84,7 @@ enum octeon_tag_type {
 #define OPCODE_NIC_IF_CFG              0x09
 #define OPCODE_NIC_VF_DRV_NOTICE       0x0A
 #define OPCODE_NIC_INTRMOD_PARAMS      0x0B
+#define OPCODE_NIC_QCOUNT_UPDATE       0x12
 #define OPCODE_NIC_SET_TRUSTED_VF      0x13
 #define OPCODE_NIC_SYNC_OCTEON_TIME    0x14
 #define VF_DRV_LOADED                  1
@@ -92,6 +93,7 @@ enum octeon_tag_type {
 
 #define OPCODE_NIC_VF_REP_PKT          0x15
 #define OPCODE_NIC_VF_REP_CMD          0x16
+#define OPCODE_NIC_UBOOT_CTL           0x17
 
 #define CORE_DRV_TEST_SCATTER_OP    0xFFF5
 
@@ -248,6 +250,9 @@ static inline void add_sg_size(struct octeon_sg_entry *sg_entry,
 #define   OCTNET_CMD_VLAN_FILTER_ENABLE 0x1
 #define   OCTNET_CMD_VLAN_FILTER_DISABLE 0x0
 
+#define   SEAPI_CMD_SPEED_SET           0x2
+#define   SEAPI_CMD_SPEED_GET           0x3
+
 #define   LIO_CMD_WAIT_TM 100
 
 /* RX(packets coming from wire) Checksum verification flags */
@@ -802,6 +807,9 @@ struct nic_rx_stats {
        u64 fw_total_rcvd;
        u64 fw_total_fwd;
        u64 fw_total_fwd_bytes;
+       u64 fw_total_mcast;
+       u64 fw_total_bcast;
+
        u64 fw_err_pko;
        u64 fw_err_link;
        u64 fw_err_drop;
@@ -858,6 +866,8 @@ struct nic_tx_stats {
        u64 fw_total_sent;
        u64 fw_total_fwd;
        u64 fw_total_fwd_bytes;
+       u64 fw_total_mcast_sent;
+       u64 fw_total_bcast_sent;
        u64 fw_err_pko;
        u64 fw_err_link;
        u64 fw_err_drop;
index f38abf6264122016ff704520400a38618477a114..f878a552fef3b36fcf98ee822d6e7a635401b92b 100644 (file)
@@ -824,23 +824,18 @@ int octeon_deregister_device(struct octeon_device *oct)
 }
 
 int
-octeon_allocate_ioq_vector(struct octeon_device  *oct)
+octeon_allocate_ioq_vector(struct octeon_device *oct, u32 num_ioqs)
 {
-       int i, num_ioqs = 0;
        struct octeon_ioq_vector *ioq_vector;
        int cpu_num;
        int size;
-
-       if (OCTEON_CN23XX_PF(oct))
-               num_ioqs = oct->sriov_info.num_pf_rings;
-       else if (OCTEON_CN23XX_VF(oct))
-               num_ioqs = oct->sriov_info.rings_per_vf;
+       int i;
 
        size = sizeof(struct octeon_ioq_vector) * num_ioqs;
 
        oct->ioq_vector = vzalloc(size);
        if (!oct->ioq_vector)
-               return 1;
+               return -1;
        for (i = 0; i < num_ioqs; i++) {
                ioq_vector              = &oct->ioq_vector[i];
                ioq_vector->oct_dev     = oct;
@@ -856,6 +851,7 @@ octeon_allocate_ioq_vector(struct octeon_device  *oct)
                else
                        ioq_vector->ioq_num     = i;
        }
+
        return 0;
 }
 
index 91937cc5c1d7447021057dec9e1e7b4c709b1684..94a4ed88d6188ca4ed44ab5ebd2673832d33c254 100644 (file)
 #define  OCTEON_CN23XX_REV_1_1        0x01
 #define  OCTEON_CN23XX_REV_2_0        0x80
 
+/**SubsystemId for the chips */
+#define         OCTEON_CN2350_10GB_SUBSYS_ID_1 0X3177d
+#define         OCTEON_CN2350_10GB_SUBSYS_ID_2 0X4177d
+#define         OCTEON_CN2360_10GB_SUBSYS_ID   0X5177d
+#define         OCTEON_CN2350_25GB_SUBSYS_ID   0X7177d
+#define         OCTEON_CN2360_25GB_SUBSYS_ID   0X6177d
+
 /** Endian-swap modes supported by Octeon. */
 enum octeon_pci_swap_mode {
        OCTEON_PCI_PASSTHROUGH = 0,
@@ -430,6 +437,8 @@ struct octeon_device {
 
        u16 rev_id;
 
+       u32 subsystem_id;
+
        u16 pf_num;
 
        u16 vf_num;
@@ -584,6 +593,11 @@ struct octeon_device {
        struct lio_vf_rep_list vf_rep_list;
        struct devlink *devlink;
        enum devlink_eswitch_mode eswitch_mode;
+
+       /* for 25G NIC speed change */
+       u8  speed_boot;
+       u8  speed_setting;
+       u8  no_speed_setting;
 };
 
 #define  OCT_DRV_ONLINE 1
@@ -867,7 +881,7 @@ void *oct_get_config_info(struct octeon_device *oct, u16 card_type);
 struct octeon_config *octeon_get_conf(struct octeon_device *oct);
 
 void octeon_free_ioq_vector(struct octeon_device *oct);
-int octeon_allocate_ioq_vector(struct octeon_device  *oct);
+int octeon_allocate_ioq_vector(struct octeon_device  *oct, u32 num_ioqs);
 void lio_enable_irq(struct octeon_droq *droq, struct octeon_instr_queue *iq);
 
 /* LiquidIO driver pivate flags */
index 4069710796a84e2aad17d5ccd6bfeda0867b4d3b..d7a3916fe877c3714c7b66f454be89e93ad8e3e6 100644 (file)
@@ -47,6 +47,29 @@ struct liquidio_if_cfg_resp {
        u64 status;
 };
 
+#define LIO_IFCFG_WAIT_TIME    3000 /* In milli seconds */
+
+/* Structure of a node in list of gather components maintained by
+ * NIC driver for each network device.
+ */
+struct octnic_gather {
+       /* List manipulation. Next and prev pointers. */
+       struct list_head list;
+
+       /* Size of the gather component at sg in bytes. */
+       int sg_size;
+
+       /* Number of bytes that sg was adjusted to make it 8B-aligned. */
+       int adjust;
+
+       /* Gather component that can accommodate max sized fragment list
+        * received from the IP layer.
+        */
+       struct octeon_sg_entry *sg;
+
+       dma_addr_t sg_dma_ptr;
+};
+
 struct oct_nic_stats_resp {
        u64     rh;
        struct oct_link_stats stats;
@@ -58,6 +81,18 @@ struct oct_nic_stats_ctrl {
        struct net_device *netdev;
 };
 
+struct oct_nic_seapi_resp {
+       u64 rh;
+       u32 speed;
+       u64 status;
+};
+
+struct liquidio_nic_seapi_ctl_context {
+       int octeon_id;
+       u32 status;
+       struct completion complete;
+};
+
 /** LiquidIO per-interface network private data */
 struct lio {
        /** State of the interface. Rx/Tx happens only in the RUNNING state.  */
@@ -157,7 +192,7 @@ struct lio {
 #define LIO_SIZE         (sizeof(struct lio))
 #define GET_LIO(netdev)  ((struct lio *)netdev_priv(netdev))
 
-#define LIO_MAX_CORES                12
+#define LIO_MAX_CORES                16
 
 /**
  * \brief Enable or disable feature
@@ -190,6 +225,8 @@ irqreturn_t liquidio_msix_intr_handler(int irq __attribute__((unused)),
 
 int octeon_setup_interrupt(struct octeon_device *oct, u32 num_ioqs);
 
+int octnet_get_link_stats(struct net_device *netdev);
+
 int lio_wait_for_clean_oq(struct octeon_device *oct);
 /**
  * \brief Register ethtool operations
@@ -197,6 +234,17 @@ int lio_wait_for_clean_oq(struct octeon_device *oct);
  */
 void liquidio_set_ethtool_ops(struct net_device *netdev);
 
+void lio_if_cfg_callback(struct octeon_device *oct,
+                        u32 status __attribute__((unused)),
+                        void *buf);
+
+void lio_delete_glists(struct lio *lio);
+
+int lio_setup_glists(struct octeon_device *oct, struct lio *lio, int num_qs);
+
+int liquidio_get_speed(struct lio *lio);
+int liquidio_set_speed(struct lio *lio, int speed);
+
 /**
  * \brief Net device change_mtu
  * @param netdev network device
@@ -515,7 +563,7 @@ static inline void stop_txqs(struct net_device *netdev)
 {
        int i;
 
-       for (i = 0; i < netdev->num_tx_queues; i++)
+       for (i = 0; i < netdev->real_num_tx_queues; i++)
                netif_stop_subqueue(netdev, i);
 }
 
@@ -528,7 +576,7 @@ static inline void wake_txqs(struct net_device *netdev)
        struct lio *lio = GET_LIO(netdev);
        int i, qno;
 
-       for (i = 0; i < netdev->num_tx_queues; i++) {
+       for (i = 0; i < netdev->real_num_tx_queues; i++) {
                qno = lio->linfo.txpciq[i % lio->oct_dev->num_iqs].s.q_no;
 
                if (__netif_subqueue_stopped(netdev, i)) {
@@ -549,14 +597,33 @@ static inline void start_txqs(struct net_device *netdev)
        int i;
 
        if (lio->linfo.link.s.link_up) {
-               for (i = 0; i < netdev->num_tx_queues; i++)
+               for (i = 0; i < netdev->real_num_tx_queues; i++)
                        netif_start_subqueue(netdev, i);
        }
 }
 
-static inline int skb_iq(struct lio *lio, struct sk_buff *skb)
+static inline int skb_iq(struct octeon_device *oct, struct sk_buff *skb)
 {
-       return skb->queue_mapping % lio->linfo.num_txpciq;
+       return skb->queue_mapping % oct->num_iqs;
+}
+
+/**
+ * Remove the node at the head of the list. The list would be empty at
+ * the end of this call if there are no more nodes in the list.
+ */
+static inline struct list_head *lio_list_delete_head(struct list_head *root)
+{
+       struct list_head *node;
+
+       if (root->prev == root && root->next == root)
+               node = NULL;
+       else
+               node = root->next;
+
+       if (node)
+               list_del(node);
+
+       return node;
 }
 
 #endif
index b57acb8dc35bd1325d2e1932b9280d49c61d243f..3c5057868ab3a94758529375b4125da02a47fb8f 100644 (file)
@@ -62,6 +62,18 @@ struct cudbg_hw_sched {
        u32 map;
 };
 
+#define SGE_QBASE_DATA_REG_NUM 4
+
+struct sge_qbase_reg_field {
+       u32 reg_addr;
+       u32 reg_data[SGE_QBASE_DATA_REG_NUM];
+       /* Max supported PFs */
+       u32 pf_data_value[PCIE_FW_MASTER_M + 1][SGE_QBASE_DATA_REG_NUM];
+       /* Max supported VFs */
+       u32 vf_data_value[T6_VF_M + 1][SGE_QBASE_DATA_REG_NUM];
+       u32 vfcount; /* Actual number of max vfs in current configuration */
+};
+
 struct ireg_field {
        u32 ireg_addr;
        u32 ireg_data;
@@ -235,6 +247,9 @@ struct cudbg_vpd_data {
 };
 
 #define CUDBG_MAX_TCAM_TID 0x800
+#define CUDBG_T6_CLIP 1536
+#define CUDBG_MAX_TID_COMP_EN 6144
+#define CUDBG_MAX_TID_COMP_DIS 3072
 
 enum cudbg_le_entry_types {
        LE_ET_UNKNOWN = 0,
@@ -354,6 +369,11 @@ static const u32 t5_sge_dbg_index_array[2][IREG_NUM_ELEM] = {
        {0x10cc, 0x10d4, 0x0, 16},
 };
 
+static const u32 t6_sge_qbase_index_array[] = {
+       /* 1 addr reg SGE_QBASE_INDEX and 4 data reg SGE_QBASE_MAP[0-3] */
+       0x1250, 0x1240, 0x1244, 0x1248, 0x124c,
+};
+
 static const u32 t5_pcie_pdbg_array[][IREG_NUM_ELEM] = {
        {0x5a04, 0x5a0c, 0x00, 0x20}, /* t5_pcie_pdbg_regs_00_to_20 */
        {0x5a04, 0x5a0c, 0x21, 0x20}, /* t5_pcie_pdbg_regs_21_to_40 */
@@ -419,15 +439,15 @@ static const u32 t6_up_cim_reg_array[][IREG_NUM_ELEM + 1] = {
        {0x7b50, 0x7b54, 0x280, 0x20, 0}, /* up_cim_280_to_2fc */
        {0x7b50, 0x7b54, 0x300, 0x20, 0}, /* up_cim_300_to_37c */
        {0x7b50, 0x7b54, 0x380, 0x14, 0}, /* up_cim_380_to_3cc */
-       {0x7b50, 0x7b54, 0x2900, 0x4, 0x4}, /* up_cim_2900_to_3d40 */
-       {0x7b50, 0x7b54, 0x2904, 0x4, 0x4}, /* up_cim_2904_to_3d44 */
-       {0x7b50, 0x7b54, 0x2908, 0x4, 0x4}, /* up_cim_2908_to_3d48 */
-       {0x7b50, 0x7b54, 0x2910, 0x4, 0x4}, /* up_cim_2910_to_3d4c */
-       {0x7b50, 0x7b54, 0x2914, 0x4, 0x4}, /* up_cim_2914_to_3d50 */
-       {0x7b50, 0x7b54, 0x2920, 0x10, 0x10}, /* up_cim_2920_to_2a10 */
-       {0x7b50, 0x7b54, 0x2924, 0x10, 0x10}, /* up_cim_2924_to_2a14 */
-       {0x7b50, 0x7b54, 0x2928, 0x10, 0x10}, /* up_cim_2928_to_2a18 */
-       {0x7b50, 0x7b54, 0x292c, 0x10, 0x10}, /* up_cim_292c_to_2a1c */
+       {0x7b50, 0x7b54, 0x4900, 0x4, 0x4}, /* up_cim_4900_to_4c60 */
+       {0x7b50, 0x7b54, 0x4904, 0x4, 0x4}, /* up_cim_4904_to_4c64 */
+       {0x7b50, 0x7b54, 0x4908, 0x4, 0x4}, /* up_cim_4908_to_4c68 */
+       {0x7b50, 0x7b54, 0x4910, 0x4, 0x4}, /* up_cim_4910_to_4c70 */
+       {0x7b50, 0x7b54, 0x4914, 0x4, 0x4}, /* up_cim_4914_to_4c74 */
+       {0x7b50, 0x7b54, 0x4920, 0x10, 0x10}, /* up_cim_4920_to_4a10 */
+       {0x7b50, 0x7b54, 0x4924, 0x10, 0x10}, /* up_cim_4924_to_4a14 */
+       {0x7b50, 0x7b54, 0x4928, 0x10, 0x10}, /* up_cim_4928_to_4a18 */
+       {0x7b50, 0x7b54, 0x492c, 0x10, 0x10}, /* up_cim_492c_to_4a1c */
 };
 
 static const u32 t5_up_cim_reg_array[][IREG_NUM_ELEM + 1] = {
@@ -444,16 +464,6 @@ static const u32 t5_up_cim_reg_array[][IREG_NUM_ELEM + 1] = {
        {0x7b50, 0x7b54, 0x280, 0x20, 0}, /* up_cim_280_to_2fc */
        {0x7b50, 0x7b54, 0x300, 0x20, 0}, /* up_cim_300_to_37c */
        {0x7b50, 0x7b54, 0x380, 0x14, 0}, /* up_cim_380_to_3cc */
-       {0x7b50, 0x7b54, 0x2900, 0x4, 0x4}, /* up_cim_2900_to_3d40 */
-       {0x7b50, 0x7b54, 0x2904, 0x4, 0x4}, /* up_cim_2904_to_3d44 */
-       {0x7b50, 0x7b54, 0x2908, 0x4, 0x4}, /* up_cim_2908_to_3d48 */
-       {0x7b50, 0x7b54, 0x2910, 0x4, 0x4}, /* up_cim_2910_to_3d4c */
-       {0x7b50, 0x7b54, 0x2914, 0x4, 0x4}, /* up_cim_2914_to_3d50 */
-       {0x7b50, 0x7b54, 0x2918, 0x4, 0x4}, /* up_cim_2918_to_3d54 */
-       {0x7b50, 0x7b54, 0x291c, 0x4, 0x4}, /* up_cim_291c_to_3d58 */
-       {0x7b50, 0x7b54, 0x2924, 0x10, 0x10}, /* up_cim_2924_to_2914 */
-       {0x7b50, 0x7b54, 0x2928, 0x10, 0x10}, /* up_cim_2928_to_2a18 */
-       {0x7b50, 0x7b54, 0x292c, 0x10, 0x10}, /* up_cim_292c_to_2a1c */
 };
 
 static const u32 t6_hma_ireg_array[][IREG_NUM_ELEM] = {
index 8568a51f641483ac1ec352ef0d4b0f28dac35102..215fe6260fd75d348b93558e8f354e6a7359bdc8 100644 (file)
@@ -24,6 +24,7 @@
 #define CUDBG_STATUS_NOT_IMPLEMENTED -28
 #define CUDBG_SYSTEM_ERROR -29
 #define CUDBG_STATUS_CCLK_NOT_DEFINED -32
+#define CUDBG_STATUS_PARTIAL_DATA -41
 
 #define CUDBG_MAJOR_VERSION 1
 #define CUDBG_MINOR_VERSION 14
index 9da6f57901a9ae8317d0a8fe74066772a971d0fa..0afcfe99bff304acaf2a701d2e61db40e6629074 100644 (file)
@@ -1339,16 +1339,39 @@ int cudbg_collect_tp_indirect(struct cudbg_init *pdbg_init,
        return cudbg_write_and_release_buff(pdbg_init, &temp_buff, dbg_buff);
 }
 
+static void cudbg_read_sge_qbase_indirect_reg(struct adapter *padap,
+                                             struct sge_qbase_reg_field *qbase,
+                                             u32 func, bool is_pf)
+{
+       u32 *buff, i;
+
+       if (is_pf) {
+               buff = qbase->pf_data_value[func];
+       } else {
+               buff = qbase->vf_data_value[func];
+               /* In SGE_QBASE_INDEX,
+                * Entries 0->7 are PF0->7, Entries 8->263 are VFID0->256.
+                */
+               func += 8;
+       }
+
+       t4_write_reg(padap, qbase->reg_addr, func);
+       for (i = 0; i < SGE_QBASE_DATA_REG_NUM; i++, buff++)
+               *buff = t4_read_reg(padap, qbase->reg_data[i]);
+}
+
 int cudbg_collect_sge_indirect(struct cudbg_init *pdbg_init,
                               struct cudbg_buffer *dbg_buff,
                               struct cudbg_error *cudbg_err)
 {
        struct adapter *padap = pdbg_init->adap;
        struct cudbg_buffer temp_buff = { 0 };
+       struct sge_qbase_reg_field *sge_qbase;
        struct ireg_buf *ch_sge_dbg;
        int i, rc;
 
-       rc = cudbg_get_buff(pdbg_init, dbg_buff, sizeof(*ch_sge_dbg) * 2,
+       rc = cudbg_get_buff(pdbg_init, dbg_buff,
+                           sizeof(*ch_sge_dbg) * 2 + sizeof(*sge_qbase),
                            &temp_buff);
        if (rc)
                return rc;
@@ -1370,6 +1393,28 @@ int cudbg_collect_sge_indirect(struct cudbg_init *pdbg_init,
                                 sge_pio->ireg_local_offset);
                ch_sge_dbg++;
        }
+
+       if (CHELSIO_CHIP_VERSION(padap->params.chip) > CHELSIO_T5) {
+               sge_qbase = (struct sge_qbase_reg_field *)ch_sge_dbg;
+               /* 1 addr reg SGE_QBASE_INDEX and 4 data reg
+                * SGE_QBASE_MAP[0-3]
+                */
+               sge_qbase->reg_addr = t6_sge_qbase_index_array[0];
+               for (i = 0; i < SGE_QBASE_DATA_REG_NUM; i++)
+                       sge_qbase->reg_data[i] =
+                               t6_sge_qbase_index_array[i + 1];
+
+               for (i = 0; i <= PCIE_FW_MASTER_M; i++)
+                       cudbg_read_sge_qbase_indirect_reg(padap, sge_qbase,
+                                                         i, true);
+
+               for (i = 0; i < padap->params.arch.vfcount; i++)
+                       cudbg_read_sge_qbase_indirect_reg(padap, sge_qbase,
+                                                         i, false);
+
+               sge_qbase->vfcount = padap->params.arch.vfcount;
+       }
+
        return cudbg_write_and_release_buff(pdbg_init, &temp_buff, dbg_buff);
 }
 
@@ -2366,8 +2411,11 @@ void cudbg_fill_le_tcam_info(struct adapter *padap,
        value = t4_read_reg(padap, LE_DB_ROUTING_TABLE_INDEX_A);
        tcam_region->routing_start = value;
 
-       /*Get clip table index */
-       value = t4_read_reg(padap, LE_DB_CLIP_TABLE_INDEX_A);
+       /* Get clip table index. For T6 there is separate CLIP TCAM */
+       if (is_t6(padap->params.chip))
+               value = t4_read_reg(padap, LE_DB_CLCAM_TID_BASE_A);
+       else
+               value = t4_read_reg(padap, LE_DB_CLIP_TABLE_INDEX_A);
        tcam_region->clip_start = value;
 
        /* Get filter table index */
@@ -2392,8 +2440,16 @@ void cudbg_fill_le_tcam_info(struct adapter *padap,
                                               tcam_region->tid_hash_base;
                }
        } else { /* hash not enabled */
-               tcam_region->max_tid = CUDBG_MAX_TCAM_TID;
+               if (is_t6(padap->params.chip))
+                       tcam_region->max_tid = (value & ASLIPCOMPEN_F) ?
+                                              CUDBG_MAX_TID_COMP_EN :
+                                              CUDBG_MAX_TID_COMP_DIS;
+               else
+                       tcam_region->max_tid = CUDBG_MAX_TCAM_TID;
        }
+
+       if (is_t6(padap->params.chip))
+               tcam_region->max_tid += CUDBG_T6_CLIP;
 }
 
 int cudbg_collect_le_tcam(struct cudbg_init *pdbg_init,
@@ -2423,18 +2479,31 @@ int cudbg_collect_le_tcam(struct cudbg_init *pdbg_init,
        for (i = 0; i < tcam_region.max_tid; ) {
                rc = cudbg_read_tid(pdbg_init, i, tid_data);
                if (rc) {
-                       cudbg_err->sys_err = rc;
-                       cudbg_put_buff(pdbg_init, &temp_buff);
-                       return rc;
+                       cudbg_err->sys_warn = CUDBG_STATUS_PARTIAL_DATA;
+                       /* Update tcam header and exit */
+                       tcam_region.max_tid = i;
+                       memcpy(temp_buff.data, &tcam_region,
+                              sizeof(struct cudbg_tcam));
+                       goto out;
                }
 
-               /* ipv6 takes two tids */
-               cudbg_is_ipv6_entry(tid_data, tcam_region) ? i += 2 : i++;
+               if (cudbg_is_ipv6_entry(tid_data, tcam_region)) {
+                       /* T6 CLIP TCAM: ipv6 takes 4 entries */
+                       if (is_t6(padap->params.chip) &&
+                           i >= tcam_region.clip_start &&
+                           i < tcam_region.clip_start + CUDBG_T6_CLIP)
+                               i += 4;
+                       else /* Main TCAM: ipv6 takes two tids */
+                               i += 2;
+               } else {
+                       i++;
+               }
 
                tid_data++;
                bytes += sizeof(struct cudbg_tid_data);
        }
 
+out:
        return cudbg_write_and_release_buff(pdbg_init, &temp_buff, dbg_buff);
 }
 
index 688f95440af26c48d84d146ed98409e5e0856015..211086bdbe20d0a894a3acffdbc067c8c0acc85f 100644 (file)
@@ -50,6 +50,7 @@
 #include <linux/net_tstamp.h>
 #include <linux/ptp_clock_kernel.h>
 #include <linux/ptp_classify.h>
+#include <linux/crash_dump.h>
 #include <asm/io.h>
 #include "t4_chip_type.h"
 #include "cxgb4_uld.h"
@@ -964,6 +965,9 @@ struct adapter {
        struct hma_data hma;
 
        struct srq_data *srq;
+
+       /* Dump buffer for collecting logs in kdump kernel */
+       struct vmcoredd_data vmcoredd;
 };
 
 /* Support for "sched-class" command to allow a TX Scheduling Class to be
@@ -1034,6 +1038,7 @@ struct ch_sched_queue {
 #define VF_BITWIDTH 8
 #define IVLAN_BITWIDTH 16
 #define OVLAN_BITWIDTH 16
+#define ENCAP_VNI_BITWIDTH 24
 
 /* Filter matching rules.  These consist of a set of ingress packet field
  * (value, mask) tuples.  The associated ingress packet field matches the
@@ -1064,6 +1069,7 @@ struct ch_filter_tuple {
        uint32_t ivlan_vld:1;                   /* inner VLAN valid */
        uint32_t ovlan_vld:1;                   /* outer VLAN valid */
        uint32_t pfvf_vld:1;                    /* PF/VF valid */
+       uint32_t encap_vld:1;                   /* Encapsulation valid */
        uint32_t macidx:MACIDX_BITWIDTH;        /* exact match MAC index */
        uint32_t fcoe:FCOE_BITWIDTH;            /* FCoE packet */
        uint32_t iport:IPORT_BITWIDTH;          /* ingress port */
@@ -1074,6 +1080,7 @@ struct ch_filter_tuple {
        uint32_t vf:VF_BITWIDTH;                /* PCI-E VF ID */
        uint32_t ivlan:IVLAN_BITWIDTH;          /* inner VLAN */
        uint32_t ovlan:OVLAN_BITWIDTH;          /* outer VLAN */
+       uint32_t vni:ENCAP_VNI_BITWIDTH;        /* VNI of tunnel */
 
        /* Uncompressed header matching field rules.  These are always
         * available for field rules.
@@ -1690,6 +1697,12 @@ int t4_set_rxmode(struct adapter *adap, unsigned int mbox, unsigned int viid,
 int t4_free_raw_mac_filt(struct adapter *adap, unsigned int viid,
                         const u8 *addr, const u8 *mask, unsigned int idx,
                         u8 lookup_type, u8 port_id, bool sleep_ok);
+int t4_free_encap_mac_filt(struct adapter *adap, unsigned int viid, int idx,
+                          bool sleep_ok);
+int t4_alloc_encap_mac_filt(struct adapter *adap, unsigned int viid,
+                           const u8 *addr, const u8 *mask, unsigned int vni,
+                           unsigned int vni_mask, u8 dip_hit, u8 lookup_type,
+                           bool sleep_ok);
 int t4_alloc_raw_mac_filt(struct adapter *adap, unsigned int viid,
                          const u8 *addr, const u8 *mask, unsigned int idx,
                          u8 lookup_type, u8 port_id, bool sleep_ok);
index 143686c60234ebfc23fe84762f40dbe4bdf98d18..8d751efcb90e58b5161a491921d204ce197870c2 100644 (file)
@@ -214,7 +214,8 @@ static u32 cxgb4_get_entity_length(struct adapter *adap, u32 entity)
                len = sizeof(struct ireg_buf) * n;
                break;
        case CUDBG_SGE_INDIRECT:
-               len = sizeof(struct ireg_buf) * 2;
+               len = sizeof(struct ireg_buf) * 2 +
+                     sizeof(struct sge_qbase_reg_field);
                break;
        case CUDBG_ULPRX_LA:
                len = sizeof(struct cudbg_ulprx_la);
@@ -488,3 +489,28 @@ void cxgb4_init_ethtool_dump(struct adapter *adapter)
        adapter->eth_dump.version = adapter->params.fw_vers;
        adapter->eth_dump.len = 0;
 }
+
+static int cxgb4_cudbg_vmcoredd_collect(struct vmcoredd_data *data, void *buf)
+{
+       struct adapter *adap = container_of(data, struct adapter, vmcoredd);
+       u32 len = data->size;
+
+       return cxgb4_cudbg_collect(adap, buf, &len, CXGB4_ETH_DUMP_ALL);
+}
+
+int cxgb4_cudbg_vmcore_add_dump(struct adapter *adap)
+{
+       struct vmcoredd_data *data = &adap->vmcoredd;
+       u32 len;
+
+       len = sizeof(struct cudbg_hdr) +
+             sizeof(struct cudbg_entity_hdr) * CUDBG_MAX_ENTITY;
+       len += CUDBG_DUMP_BUFF_SIZE;
+
+       data->size = len;
+       snprintf(data->dump_name, sizeof(data->dump_name), "%s_%s",
+                cxgb4_driver_name, adap->name);
+       data->vmcoredd_callback = cxgb4_cudbg_vmcoredd_collect;
+
+       return vmcore_add_device_dump(data);
+}
index ce1ac9a1c8781c843e9e453bceb85f56efe3268a..ef59ba1ed96850d9b59b812bd00b07944e5e572f 100644 (file)
@@ -41,8 +41,11 @@ enum CXGB4_ETHTOOL_DUMP_FLAGS {
        CXGB4_ETH_DUMP_HW = (1 << 1), /* various FW and HW dumps */
 };
 
+#define CXGB4_ETH_DUMP_ALL (CXGB4_ETH_DUMP_MEM | CXGB4_ETH_DUMP_HW)
+
 u32 cxgb4_get_dump_length(struct adapter *adap, u32 flag);
 int cxgb4_cudbg_collect(struct adapter *adap, void *buf, u32 *buf_size,
                        u32 flag);
 void cxgb4_init_ethtool_dump(struct adapter *adapter);
+int cxgb4_cudbg_vmcore_add_dump(struct adapter *adap);
 #endif /* __CXGB4_CUDBG_H__ */
index db92f1858060ec685d7b59740ada8422097f178f..00fc5f1afb1d0024b6477d6c63408afcd9858ad9 100644 (file)
@@ -64,8 +64,7 @@ static int set_tcb_field(struct adapter *adap, struct filter_entry *f,
        if (!skb)
                return -ENOMEM;
 
-       req = (struct cpl_set_tcb_field *)__skb_put(skb, sizeof(*req));
-       memset(req, 0, sizeof(*req));
+       req = (struct cpl_set_tcb_field *)__skb_put_zero(skb, sizeof(*req));
        INIT_TP_WR_CPL(req, CPL_SET_TCB_FIELD, ftid);
        req->reply_ctrl = htons(REPLY_CHAN_V(0) |
                                QUEUENO_V(adap->sge.fw_evtq.abs_id) |
@@ -266,6 +265,8 @@ static int validate_filter(struct net_device *dev,
                        fs->mask.pfvf_vld) ||
            unsupported(fconf, VNIC_ID_F, fs->val.ovlan_vld,
                        fs->mask.ovlan_vld) ||
+           unsupported(fconf, VNIC_ID_F, fs->val.encap_vld,
+                       fs->mask.encap_vld) ||
            unsupported(fconf, VLAN_F, fs->val.ivlan_vld, fs->mask.ivlan_vld))
                return -EOPNOTSUPP;
 
@@ -276,8 +277,12 @@ static int validate_filter(struct net_device *dev,
         * carries that overlap, we need to translate any PF/VF
         * specification into that internal format below.
         */
-       if (is_field_set(fs->val.pfvf_vld, fs->mask.pfvf_vld) &&
-           is_field_set(fs->val.ovlan_vld, fs->mask.ovlan_vld))
+       if ((is_field_set(fs->val.pfvf_vld, fs->mask.pfvf_vld) &&
+            is_field_set(fs->val.ovlan_vld, fs->mask.ovlan_vld)) ||
+           (is_field_set(fs->val.pfvf_vld, fs->mask.pfvf_vld) &&
+            is_field_set(fs->val.encap_vld, fs->mask.encap_vld)) ||
+           (is_field_set(fs->val.ovlan_vld, fs->mask.ovlan_vld) &&
+            is_field_set(fs->val.encap_vld, fs->mask.encap_vld)))
                return -EOPNOTSUPP;
        if (unsupported(iconf, VNIC_F, fs->val.pfvf_vld, fs->mask.pfvf_vld) ||
            (is_field_set(fs->val.ovlan_vld, fs->mask.ovlan_vld) &&
@@ -307,6 +312,9 @@ static int validate_filter(struct net_device *dev,
             fs->newvlan == VLAN_REWRITE))
                return -EOPNOTSUPP;
 
+       if (fs->val.encap_vld &&
+           CHELSIO_CHIP_VERSION(adapter->params.chip) < CHELSIO_T6)
+               return -EOPNOTSUPP;
        return 0;
 }
 
@@ -706,6 +714,8 @@ int delete_filter(struct adapter *adapter, unsigned int fidx)
  */
 void clear_filter(struct adapter *adap, struct filter_entry *f)
 {
+       struct port_info *pi = netdev_priv(f->dev);
+
        /* If the new or old filter have loopback rewriteing rules then we'll
         * need to free any existing L2T, SMT, CLIP entries of filter
         * rule.
@@ -716,6 +726,12 @@ void clear_filter(struct adapter *adap, struct filter_entry *f)
        if (f->smt)
                cxgb4_smt_release(f->smt);
 
+       if (f->fs.val.encap_vld && f->fs.val.ovlan_vld)
+               if (atomic_dec_and_test(&adap->mps_encap[f->fs.val.ovlan &
+                                                        0x1ff].refcnt))
+                       t4_free_encap_mac_filt(adap, pi->viid,
+                                              f->fs.val.ovlan & 0x1ff, 0);
+
        if ((f->fs.hash || is_t6(adap->params.chip)) && f->fs.type)
                cxgb4_clip_release(f->dev, (const u32 *)&f->fs.val.lip, 1);
 
@@ -836,11 +852,15 @@ bool is_filter_exact_match(struct adapter *adap,
 {
        struct tp_params *tp = &adap->params.tp;
        u64 hash_filter_mask = tp->hash_filter_mask;
-       u32 mask;
+       u64 ntuple_mask = 0;
 
        if (!is_hashfilter(adap))
                return false;
 
+        /* Keep tunnel VNI match disabled for hash-filters for now */
+       if (fs->mask.encap_vld)
+               return false;
+
        if (fs->type) {
                if (is_inaddr_any(fs->val.fip, AF_INET6) ||
                    !is_addr_all_mask(fs->mask.fip, AF_INET6))
@@ -865,73 +885,45 @@ bool is_filter_exact_match(struct adapter *adap,
        if (!fs->val.fport || fs->mask.fport != 0xffff)
                return false;
 
-       if (tp->fcoe_shift >= 0) {
-               mask = (hash_filter_mask >> tp->fcoe_shift) & FT_FCOE_W;
-               if (mask && !fs->mask.fcoe)
-                       return false;
-       }
+       /* calculate tuple mask and compare with mask configured in hw */
+       if (tp->fcoe_shift >= 0)
+               ntuple_mask |= (u64)fs->mask.fcoe << tp->fcoe_shift;
 
-       if (tp->port_shift >= 0) {
-               mask = (hash_filter_mask >> tp->port_shift) & FT_PORT_W;
-               if (mask && !fs->mask.iport)
-                       return false;
-       }
+       if (tp->port_shift >= 0)
+               ntuple_mask |= (u64)fs->mask.iport << tp->port_shift;
 
        if (tp->vnic_shift >= 0) {
-               mask = (hash_filter_mask >> tp->vnic_shift) & FT_VNIC_ID_W;
-
-               if ((adap->params.tp.ingress_config & VNIC_F)) {
-                       if (mask && !fs->mask.pfvf_vld)
-                               return false;
-               } else {
-                       if (mask && !fs->mask.ovlan_vld)
-                               return false;
-               }
+               if ((adap->params.tp.ingress_config & VNIC_F))
+                       ntuple_mask |= (u64)fs->mask.pfvf_vld << tp->vnic_shift;
+               else
+                       ntuple_mask |= (u64)fs->mask.ovlan_vld <<
+                               tp->vnic_shift;
        }
 
-       if (tp->vlan_shift >= 0) {
-               mask = (hash_filter_mask >> tp->vlan_shift) & FT_VLAN_W;
-               if (mask && !fs->mask.ivlan)
-                       return false;
-       }
+       if (tp->vlan_shift >= 0)
+               ntuple_mask |= (u64)fs->mask.ivlan << tp->vlan_shift;
 
-       if (tp->tos_shift >= 0) {
-               mask = (hash_filter_mask >> tp->tos_shift) & FT_TOS_W;
-               if (mask && !fs->mask.tos)
-                       return false;
-       }
+       if (tp->tos_shift >= 0)
+               ntuple_mask |= (u64)fs->mask.tos << tp->tos_shift;
 
-       if (tp->protocol_shift >= 0) {
-               mask = (hash_filter_mask >> tp->protocol_shift) & FT_PROTOCOL_W;
-               if (mask && !fs->mask.proto)
-                       return false;
-       }
+       if (tp->protocol_shift >= 0)
+               ntuple_mask |= (u64)fs->mask.proto << tp->protocol_shift;
 
-       if (tp->ethertype_shift >= 0) {
-               mask = (hash_filter_mask >> tp->ethertype_shift) &
-                       FT_ETHERTYPE_W;
-               if (mask && !fs->mask.ethtype)
-                       return false;
-       }
+       if (tp->ethertype_shift >= 0)
+               ntuple_mask |= (u64)fs->mask.ethtype << tp->ethertype_shift;
 
-       if (tp->macmatch_shift >= 0) {
-               mask = (hash_filter_mask >> tp->macmatch_shift) & FT_MACMATCH_W;
-               if (mask && !fs->mask.macidx)
-                       return false;
-       }
+       if (tp->macmatch_shift >= 0)
+               ntuple_mask |= (u64)fs->mask.macidx << tp->macmatch_shift;
+
+       if (tp->matchtype_shift >= 0)
+               ntuple_mask |= (u64)fs->mask.matchtype << tp->matchtype_shift;
+
+       if (tp->frag_shift >= 0)
+               ntuple_mask |= (u64)fs->mask.frag << tp->frag_shift;
+
+       if (ntuple_mask != hash_filter_mask)
+               return false;
 
-       if (tp->matchtype_shift >= 0) {
-               mask = (hash_filter_mask >> tp->matchtype_shift) &
-                       FT_MPSHITTYPE_W;
-               if (mask && !fs->mask.matchtype)
-                       return false;
-       }
-       if (tp->frag_shift >= 0) {
-               mask = (hash_filter_mask >> tp->frag_shift) &
-                       FT_FRAGMENTATION_W;
-               if (mask && !fs->mask.frag)
-                       return false;
-       }
        return true;
 }
 
@@ -962,8 +954,12 @@ static u64 hash_filter_ntuple(struct ch_filter_specification *fs,
                ntuple |= (u64)(fs->val.tos) << tp->tos_shift;
 
        if (tp->vnic_shift >= 0) {
-               if ((adap->params.tp.ingress_config & VNIC_F) &&
-                   fs->mask.pfvf_vld)
+               if ((adap->params.tp.ingress_config & USE_ENC_IDX_F) &&
+                   fs->mask.encap_vld)
+                       ntuple |= (u64)((fs->val.encap_vld << 16) |
+                                       (fs->val.ovlan)) << tp->vnic_shift;
+               else if ((adap->params.tp.ingress_config & VNIC_F) &&
+                        fs->mask.pfvf_vld)
                        ntuple |= (u64)((fs->val.pfvf_vld << 16) |
                                        (fs->val.pf << 13) |
                                        (fs->val.vf)) << tp->vnic_shift;
@@ -1077,6 +1073,7 @@ static int cxgb4_set_hash_filter(struct net_device *dev,
                                 struct filter_ctx *ctx)
 {
        struct adapter *adapter = netdev2adap(dev);
+       struct port_info *pi = netdev_priv(dev);
        struct tid_info *t = &adapter->tids;
        struct filter_entry *f;
        struct sk_buff *skb;
@@ -1143,13 +1140,34 @@ static int cxgb4_set_hash_filter(struct net_device *dev,
                f->fs.mask.ovlan = (fs->mask.pf << 13) | fs->mask.vf;
                f->fs.val.ovlan_vld = fs->val.pfvf_vld;
                f->fs.mask.ovlan_vld = fs->mask.pfvf_vld;
+       } else if (iconf & USE_ENC_IDX_F) {
+               if (f->fs.val.encap_vld) {
+                       struct port_info *pi = netdev_priv(f->dev);
+                       u8 match_all_mac[] = { 0, 0, 0, 0, 0, 0 };
+
+                       /* allocate MPS TCAM entry */
+                       ret = t4_alloc_encap_mac_filt(adapter, pi->viid,
+                                                     match_all_mac,
+                                                     match_all_mac,
+                                                     f->fs.val.vni,
+                                                     f->fs.mask.vni,
+                                                     0, 1, 1);
+                       if (ret < 0)
+                               goto free_atid;
+
+                       atomic_inc(&adapter->mps_encap[ret].refcnt);
+                       f->fs.val.ovlan = ret;
+                       f->fs.mask.ovlan = 0xffff;
+                       f->fs.val.ovlan_vld = 1;
+                       f->fs.mask.ovlan_vld = 1;
+               }
        }
 
        size = sizeof(struct cpl_t6_act_open_req);
        if (f->fs.type) {
                ret = cxgb4_clip_get(f->dev, (const u32 *)&f->fs.val.lip, 1);
                if (ret)
-                       goto free_atid;
+                       goto free_mps;
 
                skb = alloc_skb(size, GFP_KERNEL);
                if (!skb) {
@@ -1164,7 +1182,7 @@ static int cxgb4_set_hash_filter(struct net_device *dev,
                skb = alloc_skb(size, GFP_KERNEL);
                if (!skb) {
                        ret = -ENOMEM;
-                       goto free_atid;
+                       goto free_mps;
                }
 
                mk_act_open_req(f, skb,
@@ -1180,6 +1198,10 @@ static int cxgb4_set_hash_filter(struct net_device *dev,
 free_clip:
        cxgb4_clip_release(f->dev, (const u32 *)&f->fs.val.lip, 1);
 
+free_mps:
+       if (f->fs.val.encap_vld && f->fs.val.ovlan_vld)
+               t4_free_encap_mac_filt(adapter, pi->viid, f->fs.val.ovlan, 1);
+
 free_atid:
        cxgb4_free_atid(t, atid);
 
@@ -1361,6 +1383,27 @@ int __cxgb4_set_filter(struct net_device *dev, int filter_id,
                f->fs.mask.ovlan = (fs->mask.pf << 13) | fs->mask.vf;
                f->fs.val.ovlan_vld = fs->val.pfvf_vld;
                f->fs.mask.ovlan_vld = fs->mask.pfvf_vld;
+       } else if (iconf & USE_ENC_IDX_F) {
+               if (f->fs.val.encap_vld) {
+                       struct port_info *pi = netdev_priv(f->dev);
+                       u8 match_all_mac[] = { 0, 0, 0, 0, 0, 0 };
+
+                       /* allocate MPS TCAM entry */
+                       ret = t4_alloc_encap_mac_filt(adapter, pi->viid,
+                                                     match_all_mac,
+                                                     match_all_mac,
+                                                     f->fs.val.vni,
+                                                     f->fs.mask.vni,
+                                                     0, 1, 1);
+                       if (ret < 0)
+                               goto free_clip;
+
+                       atomic_inc(&adapter->mps_encap[ret].refcnt);
+                       f->fs.val.ovlan = ret;
+                       f->fs.mask.ovlan = 0x1ff;
+                       f->fs.val.ovlan_vld = 1;
+                       f->fs.mask.ovlan_vld = 1;
+               }
        }
 
        /* Attempt to set the filter.  If we don't succeed, we clear
@@ -1377,6 +1420,13 @@ int __cxgb4_set_filter(struct net_device *dev, int filter_id,
        }
 
        return ret;
+
+free_clip:
+       if (is_t6(adapter->params.chip) && f->fs.type)
+               cxgb4_clip_release(f->dev, (const u32 *)&f->fs.val.lip, 1);
+       cxgb4_clear_ftid(&adapter->tids, filter_id,
+                        fs->type ? PF_INET6 : PF_INET, chip_ver);
+       return ret;
 }
 
 static int cxgb4_del_hash_filter(struct net_device *dev, int filter_id,
index 24d2865b880665910c4a4a53d9e613124724432b..130d1eed7993ad7bef7f76094cc9e6eb5c37d293 100644 (file)
@@ -2886,13 +2886,13 @@ static int cxgb_set_tx_maxrate(struct net_device *dev, int index, u32 rate)
        }
 
        /* Convert from Mbps to Kbps */
-       req_rate = rate << 10;
+       req_rate = rate * 1000;
 
        /* Max rate is 100 Gbps */
-       if (req_rate >= SCHED_MAX_RATE_KBPS) {
+       if (req_rate > SCHED_MAX_RATE_KBPS) {
                dev_err(adap->pdev_dev,
                        "Invalid rate %u Mbps, Max rate is %u Mbps\n",
-                       rate, SCHED_MAX_RATE_KBPS >> 10);
+                       rate, SCHED_MAX_RATE_KBPS / 1000);
                return -ERANGE;
        }
 
@@ -3081,7 +3081,7 @@ static void cxgb_del_udp_tunnel(struct net_device *netdev,
                                           match_all_mac, match_all_mac,
                                           adapter->rawf_start +
                                            pi->port_id,
-                                          1, pi->port_id, true);
+                                          1, pi->port_id, false);
                if (ret < 0) {
                        netdev_info(netdev, "Failed to free mac filter entry, for port %d\n",
                                    i);
@@ -3169,7 +3169,7 @@ static void cxgb_add_udp_tunnel(struct net_device *netdev,
                                            match_all_mac,
                                            adapter->rawf_start +
                                            pi->port_id,
-                                           1, pi->port_id, true);
+                                           1, pi->port_id, false);
                if (ret < 0) {
                        netdev_info(netdev, "Failed to allocate a mac filter entry, not adding port %d\n",
                                    be16_to_cpu(ti->port));
@@ -3433,8 +3433,8 @@ static int adap_config_hma(struct adapter *adapter)
        sgl = adapter->hma.sgt->sgl;
        node = dev_to_node(adapter->pdev_dev);
        for_each_sg(sgl, iter, sgt->orig_nents, i) {
-               newpage = alloc_pages_node(node, __GFP_NOWARN | GFP_KERNEL,
-                                          page_order);
+               newpage = alloc_pages_node(node, __GFP_NOWARN | GFP_KERNEL |
+                                          __GFP_ZERO, page_order);
                if (!newpage) {
                        dev_err(adapter->pdev_dev,
                                "Not enough memory for HMA page allocation\n");
@@ -4276,6 +4276,20 @@ static int adap_init0(struct adapter *adap)
        adap->tids.nftids = val[4] - val[3] + 1;
        adap->sge.ingr_start = val[5];
 
+       if (CHELSIO_CHIP_VERSION(adap->params.chip) > CHELSIO_T5) {
+               /* Read the raw mps entries. In T6, the last 2 tcam entries
+                * are reserved for raw mac addresses (rawf = 2, one per port).
+                */
+               params[0] = FW_PARAM_PFVF(RAWF_START);
+               params[1] = FW_PARAM_PFVF(RAWF_END);
+               ret = t4_query_params(adap, adap->mbox, adap->pf, 0, 2,
+                                     params, val);
+               if (ret == 0) {
+                       adap->rawf_start = val[0];
+                       adap->rawf_cnt = val[1] - val[0] + 1;
+               }
+       }
+
        /* qids (ingress/egress) returned from firmware can be anywhere
         * in the range from EQ(IQFLINT)_START to EQ(IQFLINT)_END.
         * Hence driver needs to allocate memory for this range to
@@ -5181,6 +5195,7 @@ static void free_some_resources(struct adapter *adapter)
 {
        unsigned int i;
 
+       kvfree(adapter->mps_encap);
        kvfree(adapter->smt);
        kvfree(adapter->l2t);
        kvfree(adapter->srq);
@@ -5261,13 +5276,9 @@ static int cxgb4_iov_configure(struct pci_dev *pdev, int num_vfs)
        u32 pcie_fw;
 
        pcie_fw = readl(adap->regs + PCIE_FW_A);
-       /* Check if cxgb4 is the MASTER and fw is initialized */
-       if (num_vfs &&
-           (!(pcie_fw & PCIE_FW_INIT_F) ||
-           !(pcie_fw & PCIE_FW_MASTER_VLD_F) ||
-           PCIE_FW_MASTER_G(pcie_fw) != CXGB4_UNIFIED_PF)) {
-               dev_warn(&pdev->dev,
-                        "cxgb4 driver needs to be MASTER to support SRIOV\n");
+       /* Check if fw is initialized */
+       if (!(pcie_fw & PCIE_FW_INIT_F)) {
+               dev_warn(&pdev->dev, "Device not initialized\n");
                return -EOPNOTSUPP;
        }
 
@@ -5474,6 +5485,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
        }
        spin_lock_init(&adapter->mbox_lock);
        INIT_LIST_HEAD(&adapter->mlist.list);
+       adapter->mbox_log->size = T4_OS_LOG_MBOX_CMDS;
        pci_set_drvdata(pdev, adapter);
 
        if (func != ent->driver_data) {
@@ -5508,8 +5520,6 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
                goto out_free_adapter;
        }
 
-       adapter->mbox_log->size = T4_OS_LOG_MBOX_CMDS;
-
        /* PCI device has been enabled */
        adapter->flags |= DEV_ENABLED;
        memset(adapter->chan_map, 0xff, sizeof(adapter->chan_map));
@@ -5544,6 +5554,16 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
        if (err)
                goto out_free_adapter;
 
+       if (is_kdump_kernel()) {
+               /* Collect hardware state and append to /proc/vmcore */
+               err = cxgb4_cudbg_vmcore_add_dump(adapter);
+               if (err) {
+                       dev_warn(adapter->pdev_dev,
+                                "Fail collecting vmcore device dump, err: %d. Continuing\n",
+                                err);
+                       err = 0;
+               }
+       }
 
        if (!is_t4(adapter->params.chip)) {
                s_qpp = (QUEUESPERPAGEPF0_S +
@@ -5611,8 +5631,15 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
                        NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX |
                        NETIF_F_HW_TC;
 
-               if (CHELSIO_CHIP_VERSION(chip) > CHELSIO_T5)
+               if (CHELSIO_CHIP_VERSION(chip) > CHELSIO_T5) {
+                       netdev->hw_enc_features |= NETIF_F_IP_CSUM |
+                                                  NETIF_F_IPV6_CSUM |
+                                                  NETIF_F_RXCSUM |
+                                                  NETIF_F_GSO_UDP_TUNNEL |
+                                                  NETIF_F_TSO | NETIF_F_TSO6;
+
                        netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
+               }
 
                if (highdma)
                        netdev->hw_features |= NETIF_F_HIGHDMA;
@@ -5677,6 +5704,12 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
                adapter->params.offload = 0;
        }
 
+       adapter->mps_encap = kvzalloc(sizeof(struct mps_encap_entry) *
+                                         adapter->params.arch.mps_tcam_size,
+                                     GFP_KERNEL);
+       if (!adapter->mps_encap)
+               dev_warn(&pdev->dev, "could not allocate MPS Encap entries, continuing\n");
+
 #if IS_ENABLED(CONFIG_IPV6)
        if ((CHELSIO_CHIP_VERSION(adapter->params.chip) <= CHELSIO_T5) &&
            (!(t4_read_reg(adapter, LE_DB_CONFIG_A) & ASLIPCOMPEN_F))) {
index 36563364bae7cf9fbd7361c418838b671fc7fdd1..3ddd2c4acf6846e38697fde2f09f1b3aa300dbe6 100644 (file)
@@ -194,6 +194,23 @@ static void cxgb4_process_flow_match(struct net_device *dev,
                fs->mask.tos = mask->tos;
        }
 
+       if (dissector_uses_key(cls->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
+               struct flow_dissector_key_keyid *key, *mask;
+
+               key = skb_flow_dissector_target(cls->dissector,
+                                               FLOW_DISSECTOR_KEY_ENC_KEYID,
+                                               cls->key);
+               mask = skb_flow_dissector_target(cls->dissector,
+                                                FLOW_DISSECTOR_KEY_ENC_KEYID,
+                                                cls->mask);
+               fs->val.vni = be32_to_cpu(key->keyid);
+               fs->mask.vni = be32_to_cpu(mask->keyid);
+               if (fs->mask.vni) {
+                       fs->val.encap_vld = 1;
+                       fs->mask.encap_vld = 1;
+               }
+       }
+
        if (dissector_uses_key(cls->dissector, FLOW_DISSECTOR_KEY_VLAN)) {
                struct flow_dissector_key_vlan *key, *mask;
                u16 vlan_tci, vlan_tci_mask;
@@ -247,6 +264,7 @@ static int cxgb4_validate_flow_match(struct net_device *dev,
              BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
              BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
              BIT(FLOW_DISSECTOR_KEY_PORTS) |
+             BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) |
              BIT(FLOW_DISSECTOR_KEY_VLAN) |
              BIT(FLOW_DISSECTOR_KEY_IP))) {
                netdev_warn(dev, "Unsupported key used: 0x%x\n",
index 1817a0307d26372fcbf79c6960fd5efadb4041b4..77c2c538b1fde422606bcf712b8764cd6770f276 100644 (file)
@@ -491,7 +491,7 @@ u64 cxgb4_select_ntuple(struct net_device *dev,
        if (tp->protocol_shift >= 0)
                ntuple |= (u64)IPPROTO_TCP << tp->protocol_shift;
 
-       if (tp->vnic_shift >= 0) {
+       if (tp->vnic_shift >= 0 && (tp->ingress_config & VNIC_F)) {
                u32 viid = cxgb4_port_viid(dev);
                u32 vf = FW_VIID_VIN_G(viid);
                u32 pf = FW_VIID_PFN_G(viid);
index 1a28df137e1fc2649d1ca7e2520efeed9566857f..276f22357f81acf48aba9600b3800878a4a44861 100644 (file)
@@ -1072,12 +1072,27 @@ static void *inline_tx_skb_header(const struct sk_buff *skb,
 static u64 hwcsum(enum chip_type chip, const struct sk_buff *skb)
 {
        int csum_type;
-       const struct iphdr *iph = ip_hdr(skb);
+       bool inner_hdr_csum = false;
+       u16 proto, ver;
 
-       if (iph->version == 4) {
-               if (iph->protocol == IPPROTO_TCP)
+       if (skb->encapsulation &&
+           (CHELSIO_CHIP_VERSION(chip) > CHELSIO_T5))
+               inner_hdr_csum = true;
+
+       if (inner_hdr_csum) {
+               ver = inner_ip_hdr(skb)->version;
+               proto = (ver == 4) ? inner_ip_hdr(skb)->protocol :
+                       inner_ipv6_hdr(skb)->nexthdr;
+       } else {
+               ver = ip_hdr(skb)->version;
+               proto = (ver == 4) ? ip_hdr(skb)->protocol :
+                       ipv6_hdr(skb)->nexthdr;
+       }
+
+       if (ver == 4) {
+               if (proto == IPPROTO_TCP)
                        csum_type = TX_CSUM_TCPIP;
-               else if (iph->protocol == IPPROTO_UDP)
+               else if (proto == IPPROTO_UDP)
                        csum_type = TX_CSUM_UDPIP;
                else {
 nocsum:                        /*
@@ -1090,19 +1105,29 @@ static u64 hwcsum(enum chip_type chip, const struct sk_buff *skb)
                /*
                 * this doesn't work with extension headers
                 */
-               const struct ipv6hdr *ip6h = (const struct ipv6hdr *)iph;
-
-               if (ip6h->nexthdr == IPPROTO_TCP)
+               if (proto == IPPROTO_TCP)
                        csum_type = TX_CSUM_TCPIP6;
-               else if (ip6h->nexthdr == IPPROTO_UDP)
+               else if (proto == IPPROTO_UDP)
                        csum_type = TX_CSUM_UDPIP6;
                else
                        goto nocsum;
        }
 
        if (likely(csum_type >= TX_CSUM_TCPIP)) {
-               u64 hdr_len = TXPKT_IPHDR_LEN_V(skb_network_header_len(skb));
-               int eth_hdr_len = skb_network_offset(skb) - ETH_HLEN;
+               int eth_hdr_len, l4_len;
+               u64 hdr_len;
+
+               if (inner_hdr_csum) {
+                       /* This allows checksum offload for all encapsulated
+                        * packets like GRE etc..
+                        */
+                       l4_len = skb_inner_network_header_len(skb);
+                       eth_hdr_len = skb_inner_network_offset(skb) - ETH_HLEN;
+               } else {
+                       l4_len = skb_network_header_len(skb);
+                       eth_hdr_len = skb_network_offset(skb) - ETH_HLEN;
+               }
+               hdr_len = TXPKT_IPHDR_LEN_V(l4_len);
 
                if (CHELSIO_CHIP_VERSION(chip) <= CHELSIO_T5)
                        hdr_len |= TXPKT_ETHHDR_LEN_V(eth_hdr_len);
@@ -1273,7 +1298,7 @@ static inline void t6_fill_tnl_lso(struct sk_buff *skb,
 netdev_tx_t t4_eth_xmit(struct sk_buff *skb, struct net_device *dev)
 {
        u32 wr_mid, ctrl0, op;
-       u64 cntrl, *end;
+       u64 cntrl, *end, *sgl;
        int qidx, credits;
        unsigned int flits, ndesc;
        struct adapter *adap;
@@ -1386,8 +1411,9 @@ out_free: dev_kfree_skb_any(skb);
        end = (u64 *)wr + flits;
 
        len = immediate ? skb->len : 0;
+       len += sizeof(*cpl);
        if (ssi->gso_size) {
-               struct cpl_tx_pkt_lso *lso = (void *)wr;
+               struct cpl_tx_pkt_lso_core *lso = (void *)(wr + 1);
                bool v6 = (ssi->gso_type & SKB_GSO_TCPV6) != 0;
                int l3hdr_len = skb_network_header_len(skb);
                int eth_xtra_len = skb_network_offset(skb) - ETH_HLEN;
@@ -1417,20 +1443,19 @@ out_free:       dev_kfree_skb_any(skb);
                        if (skb->ip_summed == CHECKSUM_PARTIAL)
                                cntrl = hwcsum(adap->params.chip, skb);
                } else {
-                       lso->c.lso_ctrl = htonl(LSO_OPCODE_V(CPL_TX_PKT_LSO) |
-                                         LSO_FIRST_SLICE_F | LSO_LAST_SLICE_F |
-                                         LSO_IPV6_V(v6) |
-                                         LSO_ETHHDR_LEN_V(eth_xtra_len / 4) |
-                                         LSO_IPHDR_LEN_V(l3hdr_len / 4) |
-                                         LSO_TCPHDR_LEN_V(tcp_hdr(skb)->doff));
-                       lso->c.ipid_ofst = htons(0);
-                       lso->c.mss = htons(ssi->gso_size);
-                       lso->c.seqno_offset = htonl(0);
+                       lso->lso_ctrl = htonl(LSO_OPCODE_V(CPL_TX_PKT_LSO) |
+                                       LSO_FIRST_SLICE_F | LSO_LAST_SLICE_F |
+                                       LSO_IPV6_V(v6) |
+                                       LSO_ETHHDR_LEN_V(eth_xtra_len / 4) |
+                                       LSO_IPHDR_LEN_V(l3hdr_len / 4) |
+                                       LSO_TCPHDR_LEN_V(tcp_hdr(skb)->doff));
+                       lso->ipid_ofst = htons(0);
+                       lso->mss = htons(ssi->gso_size);
+                       lso->seqno_offset = htonl(0);
                        if (is_t4(adap->params.chip))
-                               lso->c.len = htonl(skb->len);
+                               lso->len = htonl(skb->len);
                        else
-                               lso->c.len =
-                                       htonl(LSO_T5_XFER_SIZE_V(skb->len));
+                               lso->len = htonl(LSO_T5_XFER_SIZE_V(skb->len));
                        cpl = (void *)(lso + 1);
 
                        if (CHELSIO_CHIP_VERSION(adap->params.chip)
@@ -1443,10 +1468,22 @@ out_free:       dev_kfree_skb_any(skb);
                                 TX_CSUM_TCPIP6 : TX_CSUM_TCPIP) |
                                 TXPKT_IPHDR_LEN_V(l3hdr_len);
                }
+               sgl = (u64 *)(cpl + 1); /* sgl start here */
+               if (unlikely((u8 *)sgl >= (u8 *)q->q.stat)) {
+                       /* If current position is already at the end of the
+                        * txq, reset the current to point to start of the queue
+                        * and update the end ptr as well.
+                        */
+                       if (sgl == (u64 *)q->q.stat) {
+                               int left = (u8 *)end - (u8 *)q->q.stat;
+
+                               end = (void *)q->q.desc + left;
+                               sgl = (void *)q->q.desc;
+                       }
+               }
                q->tso++;
                q->tx_cso += ssi->gso_segs;
        } else {
-               len += sizeof(*cpl);
                if (ptp_enabled)
                        op = FW_PTP_TX_PKT_WR;
                else
@@ -1454,6 +1491,7 @@ out_free: dev_kfree_skb_any(skb);
                wr->op_immdlen = htonl(FW_WR_OP_V(op) |
                                       FW_WR_IMMDLEN_V(len));
                cpl = (void *)(wr + 1);
+               sgl = (u64 *)(cpl + 1);
                if (skb->ip_summed == CHECKSUM_PARTIAL) {
                        cntrl = hwcsum(adap->params.chip, skb) |
                                TXPKT_IPCSUM_DIS_F;
@@ -1487,20 +1525,19 @@ out_free:       dev_kfree_skb_any(skb);
        cpl->ctrl1 = cpu_to_be64(cntrl);
 
        if (immediate) {
-               cxgb4_inline_tx_skb(skb, &q->q, cpl + 1);
+               cxgb4_inline_tx_skb(skb, &q->q, sgl);
                dev_consume_skb_any(skb);
        } else {
                int last_desc;
 
-               cxgb4_write_sgl(skb, &q->q, (struct ulptx_sgl *)(cpl + 1),
-                               end, 0, addr);
+               cxgb4_write_sgl(skb, &q->q, (void *)sgl, end, 0, addr);
                skb_orphan(skb);
 
                last_desc = q->q.pidx + ndesc - 1;
                if (last_desc >= q->q.size)
                        last_desc -= q->q.size;
                q->q.sdesc[last_desc].skb = skb;
-               q->q.sdesc[last_desc].sgl = (struct ulptx_sgl *)(cpl + 1);
+               q->q.sdesc[last_desc].sgl = (struct ulptx_sgl *)sgl;
        }
 
        txq_advance(&q->q, ndesc);
@@ -2259,7 +2296,7 @@ static void cxgb4_sgetim_to_hwtstamp(struct adapter *adap,
 }
 
 static void do_gro(struct sge_eth_rxq *rxq, const struct pkt_gl *gl,
-                  const struct cpl_rx_pkt *pkt)
+                  const struct cpl_rx_pkt *pkt, unsigned long tnl_hdr_len)
 {
        struct adapter *adapter = rxq->rspq.adap;
        struct sge *s = &adapter->sge;
@@ -2275,6 +2312,8 @@ static void do_gro(struct sge_eth_rxq *rxq, const struct pkt_gl *gl,
        }
 
        copy_frags(skb, gl, s->pktshift);
+       if (tnl_hdr_len)
+               skb->csum_level = 1;
        skb->len = gl->tot_len - s->pktshift;
        skb->data_len = skb->len;
        skb->truesize += skb->data_len;
@@ -2406,7 +2445,7 @@ int t4_ethrx_handler(struct sge_rspq *q, const __be64 *rsp,
        struct sge *s = &q->adap->sge;
        int cpl_trace_pkt = is_t4(q->adap->params.chip) ?
                            CPL_TRACE_PKT : CPL_TRACE_PKT_T5;
-       u16 err_vec;
+       u16 err_vec, tnl_hdr_len = 0;
        struct port_info *pi;
        int ret = 0;
 
@@ -2415,16 +2454,19 @@ int t4_ethrx_handler(struct sge_rspq *q, const __be64 *rsp,
 
        pkt = (const struct cpl_rx_pkt *)rsp;
        /* Compressed error vector is enabled for T6 only */
-       if (q->adap->params.tp.rx_pkt_encap)
+       if (q->adap->params.tp.rx_pkt_encap) {
                err_vec = T6_COMPR_RXERR_VEC_G(be16_to_cpu(pkt->err_vec));
-       else
+               tnl_hdr_len = T6_RX_TNLHDR_LEN_G(ntohs(pkt->err_vec));
+       } else {
                err_vec = be16_to_cpu(pkt->err_vec);
+       }
 
        csum_ok = pkt->csum_calc && !err_vec &&
                  (q->netdev->features & NETIF_F_RXCSUM);
-       if ((pkt->l2info & htonl(RXF_TCP_F)) &&
+       if (((pkt->l2info & htonl(RXF_TCP_F)) ||
+            tnl_hdr_len) &&
            (q->netdev->features & NETIF_F_GRO) && csum_ok && !pkt->ip_frag) {
-               do_gro(rxq, si, pkt);
+               do_gro(rxq, si, pkt, tnl_hdr_len);
                return 0;
        }
 
@@ -2471,7 +2513,13 @@ int t4_ethrx_handler(struct sge_rspq *q, const __be64 *rsp,
                } else if (pkt->l2info & htonl(RXF_IP_F)) {
                        __sum16 c = (__force __sum16)pkt->csum;
                        skb->csum = csum_unfold(c);
-                       skb->ip_summed = CHECKSUM_COMPLETE;
+
+                       if (tnl_hdr_len) {
+                               skb->ip_summed = CHECKSUM_UNNECESSARY;
+                               skb->csum_level = 1;
+                       } else {
+                               skb->ip_summed = CHECKSUM_COMPLETE;
+                       }
                        rxq->stats.rx_cso++;
                }
        } else {
index 6228a570830797977f70acead3e88ba793c9c277..82b70a565e248aa208a95c5d5973da42ad3711c7 100644 (file)
@@ -84,8 +84,7 @@ int cxgb4_get_srq_entry(struct net_device *dev,
        if (!skb)
                return -ENOMEM;
        req = (struct cpl_srq_table_req *)
-               __skb_put(skb, sizeof(*req));
-       memset(req, 0, sizeof(*req));
+               __skb_put_zero(skb, sizeof(*req));
        INIT_TP_WR(req, 0);
        OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SRQ_TABLE_REQ,
                                              TID_TID_V(srq_idx) |
index 7cb3ef466cc7799eea7a9e7cb85465d40b5619e8..df5e7c79223bd165ba01e2b3eea1469f1d5fbf6d 100644 (file)
@@ -7512,6 +7512,43 @@ int t4_set_rxmode(struct adapter *adap, unsigned int mbox, unsigned int viid,
        return t4_wr_mbox_meat(adap, mbox, &c, sizeof(c), NULL, sleep_ok);
 }
 
+/**
+ *      t4_free_encap_mac_filt - frees MPS entry at given index
+ *      @adap: the adapter
+ *      @viid: the VI id
+ *      @idx: index of MPS entry to be freed
+ *      @sleep_ok: call is allowed to sleep
+ *
+ *      Frees the MPS entry at supplied index
+ *
+ *      Returns a negative error number or zero on success
+ */
+int t4_free_encap_mac_filt(struct adapter *adap, unsigned int viid,
+                          int idx, bool sleep_ok)
+{
+       struct fw_vi_mac_exact *p;
+       u8 addr[] = {0, 0, 0, 0, 0, 0};
+       struct fw_vi_mac_cmd c;
+       int ret = 0;
+       u32 exact;
+
+       memset(&c, 0, sizeof(c));
+       c.op_to_viid = cpu_to_be32(FW_CMD_OP_V(FW_VI_MAC_CMD) |
+                                  FW_CMD_REQUEST_F | FW_CMD_WRITE_F |
+                                  FW_CMD_EXEC_V(0) |
+                                  FW_VI_MAC_CMD_VIID_V(viid));
+       exact = FW_VI_MAC_CMD_ENTRY_TYPE_V(FW_VI_MAC_TYPE_EXACTMAC);
+       c.freemacs_to_len16 = cpu_to_be32(FW_VI_MAC_CMD_FREEMACS_V(0) |
+                                         exact |
+                                         FW_CMD_LEN16_V(1));
+       p = c.u.exact;
+       p->valid_to_idx = cpu_to_be16(FW_VI_MAC_CMD_VALID_F |
+                                     FW_VI_MAC_CMD_IDX_V(idx));
+       memcpy(p->macaddr, addr, sizeof(p->macaddr));
+       ret = t4_wr_mbox_meat(adap, adap->mbox, &c, sizeof(c), &c, sleep_ok);
+       return ret;
+}
+
 /**
  *     t4_free_raw_mac_filt - Frees a raw mac entry in mps tcam
  *     @adap: the adapter
@@ -7562,6 +7599,55 @@ int t4_free_raw_mac_filt(struct adapter *adap, unsigned int viid,
        return t4_wr_mbox_meat(adap, adap->mbox, &c, sizeof(c), &c, sleep_ok);
 }
 
+/**
+ *      t4_alloc_encap_mac_filt - Adds a mac entry in mps tcam with VNI support
+ *      @adap: the adapter
+ *      @viid: the VI id
+ *      @mac: the MAC address
+ *      @mask: the mask
+ *      @vni: the VNI id for the tunnel protocol
+ *      @vni_mask: mask for the VNI id
+ *      @dip_hit: to enable DIP match for the MPS entry
+ *      @lookup_type: MAC address for inner (1) or outer (0) header
+ *      @sleep_ok: call is allowed to sleep
+ *
+ *      Allocates an MPS entry with specified MAC address and VNI value.
+ *
+ *      Returns a negative error number or the allocated index for this mac.
+ */
+int t4_alloc_encap_mac_filt(struct adapter *adap, unsigned int viid,
+                           const u8 *addr, const u8 *mask, unsigned int vni,
+                           unsigned int vni_mask, u8 dip_hit, u8 lookup_type,
+                           bool sleep_ok)
+{
+       struct fw_vi_mac_cmd c;
+       struct fw_vi_mac_vni *p = c.u.exact_vni;
+       int ret = 0;
+       u32 val;
+
+       memset(&c, 0, sizeof(c));
+       c.op_to_viid = cpu_to_be32(FW_CMD_OP_V(FW_VI_MAC_CMD) |
+                                  FW_CMD_REQUEST_F | FW_CMD_WRITE_F |
+                                  FW_VI_MAC_CMD_VIID_V(viid));
+       val = FW_CMD_LEN16_V(1) |
+             FW_VI_MAC_CMD_ENTRY_TYPE_V(FW_VI_MAC_TYPE_EXACTMAC_VNI);
+       c.freemacs_to_len16 = cpu_to_be32(val);
+       p->valid_to_idx = cpu_to_be16(FW_VI_MAC_CMD_VALID_F |
+                                     FW_VI_MAC_CMD_IDX_V(FW_VI_MAC_ADD_MAC));
+       memcpy(p->macaddr, addr, sizeof(p->macaddr));
+       memcpy(p->macaddr_mask, mask, sizeof(p->macaddr_mask));
+
+       p->lookup_type_to_vni =
+               cpu_to_be32(FW_VI_MAC_CMD_VNI_V(vni) |
+                           FW_VI_MAC_CMD_DIP_HIT_V(dip_hit) |
+                           FW_VI_MAC_CMD_LOOKUP_TYPE_V(lookup_type));
+       p->vni_mask_pkd = cpu_to_be32(FW_VI_MAC_CMD_VNI_MASK_V(vni_mask));
+       ret = t4_wr_mbox_meat(adap, adap->mbox, &c, sizeof(c), &c, sleep_ok);
+       if (ret == 0)
+               ret = FW_VI_MAC_CMD_IDX_G(be16_to_cpu(p->valid_to_idx));
+       return ret;
+}
+
 /**
  *     t4_alloc_raw_mac_filt - Adds a mac entry in mps tcam
  *     @adap: the adapter
index fe2029e993a222ac87070c8cb49b6a141300f77f..09e38f0733bdfdc39fe5968c233175e1851d972f 100644 (file)
@@ -1233,6 +1233,11 @@ struct cpl_rx_pkt {
 #define T6_COMPR_RXERR_SUM_V(x) ((x) << T6_COMPR_RXERR_SUM_S)
 #define T6_COMPR_RXERR_SUM_F    T6_COMPR_RXERR_SUM_V(1U)
 
+#define T6_RX_TNLHDR_LEN_S    8
+#define T6_RX_TNLHDR_LEN_M    0xFF
+#define T6_RX_TNLHDR_LEN_V(x) ((x) << T6_RX_TNLHDR_LEN_S)
+#define T6_RX_TNLHDR_LEN_G(x) (((x) >> T6_RX_TNLHDR_LEN_S) & T6_RX_TNLHDR_LEN_M)
+
 struct cpl_trace_pkt {
        u8 opcode;
        u8 intf;
index 51b18035d691e68f1622c3a7d0c8dd33295bab2c..adacc63991314b798545db3de88f33b0b8c94367 100644 (file)
@@ -145,6 +145,9 @@ CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN
        CH_PCI_ID_TABLE_FENTRY(0x5016), /* T580-OCP-SO */
        CH_PCI_ID_TABLE_FENTRY(0x5017), /* T520-OCP-SO */
        CH_PCI_ID_TABLE_FENTRY(0x5018), /* T540-BT */
+       CH_PCI_ID_TABLE_FENTRY(0x5019), /* T540-LP-BT */
+       CH_PCI_ID_TABLE_FENTRY(0x501a), /* T540-SO-BT */
+       CH_PCI_ID_TABLE_FENTRY(0x501b), /* T540-SO-CR */
        CH_PCI_ID_TABLE_FENTRY(0x5080), /* Custom T540-cr */
        CH_PCI_ID_TABLE_FENTRY(0x5081), /* Custom T540-LL-cr */
        CH_PCI_ID_TABLE_FENTRY(0x5082), /* Custom T504-cr */
@@ -184,6 +187,7 @@ CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN
        CH_PCI_ID_TABLE_FENTRY(0x50aa), /* Custom T580-CR */
        CH_PCI_ID_TABLE_FENTRY(0x50ab), /* Custom T520-CR */
        CH_PCI_ID_TABLE_FENTRY(0x50ac), /* Custom T540-BT */
+       CH_PCI_ID_TABLE_FENTRY(0x50ad), /* Custom T520-CR */
 
        /* T6 adapters:
         */
index 276fdf214b75089e9f668e7130726842c3236d0c..6b55aa2eb2a5a8be6e93bd82ca6c535086eb307a 100644 (file)
 #define VNIC_V(x) ((x) << VNIC_S)
 #define VNIC_F    VNIC_V(1U)
 
+#define USE_ENC_IDX_S          13
+#define USE_ENC_IDX_V(x)       ((x) << USE_ENC_IDX_S)
+#define USE_ENC_IDX_F          USE_ENC_IDX_V(1U)
+
 #define CSUM_HAS_PSEUDO_HDR_S    10
 #define CSUM_HAS_PSEUDO_HDR_V(x) ((x) << CSUM_HAS_PSEUDO_HDR_S)
 #define CSUM_HAS_PSEUDO_HDR_F    CSUM_HAS_PSEUDO_HDR_V(1U)
 #define LE_DB_HASH_TID_BASE_A 0x19c30
 #define LE_DB_HASH_TBL_BASE_ADDR_A 0x19c30
 #define LE_DB_INT_CAUSE_A 0x19c3c
+#define LE_DB_CLCAM_TID_BASE_A 0x19df4
 #define LE_DB_TID_HASHBASE_A 0x19df8
 #define T6_LE_DB_HASH_TID_BASE_A 0x19df8
 
index e3d4751f21ac9665f5f427f0e7b82dc61fc73833..e6b2e9549d5685917159b5e9ce7509850a97a5c2 100644 (file)
@@ -1305,6 +1305,8 @@ enum fw_params_param_pfvf {
        FW_PARAMS_PARAM_PFVF_HPFILTER_END = 0x33,
        FW_PARAMS_PARAM_PFVF_TLS_START = 0x34,
        FW_PARAMS_PARAM_PFVF_TLS_END = 0x35,
+       FW_PARAMS_PARAM_PFVF_RAWF_START = 0x36,
+       FW_PARAMS_PARAM_PFVF_RAWF_END = 0x37,
        FW_PARAMS_PARAM_PFVF_NCRYPTO_LOOKASIDE = 0x39,
        FW_PARAMS_PARAM_PFVF_PORT_CAPS32 = 0x3A,
 };
@@ -2156,6 +2158,14 @@ struct fw_vi_mac_cmd {
                        __be64 data0m_pkd;
                        __be32 data1m[2];
                } raw;
+               struct fw_vi_mac_vni {
+                       __be16 valid_to_idx;
+                       __u8 macaddr[6];
+                       __be16 r7;
+                       __u8 macaddr_mask[6];
+                       __be32 lookup_type_to_vni;
+                       __be32 vni_mask_pkd;
+               } exact_vni[2];
        } u;
 };
 
@@ -2203,6 +2213,32 @@ struct fw_vi_mac_cmd {
 #define FW_VI_MAC_CMD_RAW_IDX_G(x)      \
        (((x) >> FW_VI_MAC_CMD_RAW_IDX_S) & FW_VI_MAC_CMD_RAW_IDX_M)
 
+#define FW_VI_MAC_CMD_LOOKUP_TYPE_S    31
+#define FW_VI_MAC_CMD_LOOKUP_TYPE_M    0x1
+#define FW_VI_MAC_CMD_LOOKUP_TYPE_V(x) ((x) << FW_VI_MAC_CMD_LOOKUP_TYPE_S)
+#define FW_VI_MAC_CMD_LOOKUP_TYPE_G(x) \
+       (((x) >> FW_VI_MAC_CMD_LOOKUP_TYPE_S) & FW_VI_MAC_CMD_LOOKUP_TYPE_M)
+#define FW_VI_MAC_CMD_LOOKUP_TYPE_F    FW_VI_MAC_CMD_LOOKUP_TYPE_V(1U)
+
+#define FW_VI_MAC_CMD_DIP_HIT_S                30
+#define FW_VI_MAC_CMD_DIP_HIT_M                0x1
+#define FW_VI_MAC_CMD_DIP_HIT_V(x)     ((x) << FW_VI_MAC_CMD_DIP_HIT_S)
+#define FW_VI_MAC_CMD_DIP_HIT_G(x)     \
+       (((x) >> FW_VI_MAC_CMD_DIP_HIT_S) & FW_VI_MAC_CMD_DIP_HIT_M)
+#define FW_VI_MAC_CMD_DIP_HIT_F                FW_VI_MAC_CMD_DIP_HIT_V(1U)
+
+#define FW_VI_MAC_CMD_VNI_S            0
+#define FW_VI_MAC_CMD_VNI_M            0xffffff
+#define FW_VI_MAC_CMD_VNI_V(x)         ((x) << FW_VI_MAC_CMD_VNI_S)
+#define FW_VI_MAC_CMD_VNI_G(x)         \
+       (((x) >> FW_VI_MAC_CMD_VNI_S) & FW_VI_MAC_CMD_VNI_M)
+
+#define FW_VI_MAC_CMD_VNI_MASK_S       0
+#define FW_VI_MAC_CMD_VNI_MASK_M       0xffffff
+#define FW_VI_MAC_CMD_VNI_MASK_V(x)    ((x) << FW_VI_MAC_CMD_VNI_MASK_S)
+#define FW_VI_MAC_CMD_VNI_MASK_G(x)    \
+       (((x) >> FW_VI_MAC_CMD_VNI_MASK_S) & FW_VI_MAC_CMD_VNI_MASK_M)
+
 #define FW_RXMODE_MTU_NO_CHG   65535
 
 struct fw_vi_rxmode_cmd {
index 123e2c1b65f595b1d1d6e21e247f33541140717a..4eb15ceddca3889c1f57c6210a175f4d14875c19 100644 (file)
@@ -36,8 +36,8 @@
 #define __T4FW_VERSION_H__
 
 #define T4FW_VERSION_MAJOR 0x01
-#define T4FW_VERSION_MINOR 0x10
-#define T4FW_VERSION_MICRO 0x3F
+#define T4FW_VERSION_MINOR 0x13
+#define T4FW_VERSION_MICRO 0x01
 #define T4FW_VERSION_BUILD 0x00
 
 #define T4FW_MIN_VERSION_MAJOR 0x01
@@ -45,8 +45,8 @@
 #define T4FW_MIN_VERSION_MICRO 0x00
 
 #define T5FW_VERSION_MAJOR 0x01
-#define T5FW_VERSION_MINOR 0x10
-#define T5FW_VERSION_MICRO 0x3F
+#define T5FW_VERSION_MINOR 0x13
+#define T5FW_VERSION_MICRO 0x01
 #define T5FW_VERSION_BUILD 0x00
 
 #define T5FW_MIN_VERSION_MAJOR 0x00
@@ -54,8 +54,8 @@
 #define T5FW_MIN_VERSION_MICRO 0x00
 
 #define T6FW_VERSION_MAJOR 0x01
-#define T6FW_VERSION_MINOR 0x10
-#define T6FW_VERSION_MICRO 0x3F
+#define T6FW_VERSION_MINOR 0x13
+#define T6FW_VERSION_MICRO 0x01
 #define T6FW_VERSION_BUILD 0x00
 
 #define T6FW_MIN_VERSION_MAJOR 0x00
index 4b5aacc09cab7a7882aea252a3dad886b3b6abf3..240ba9d4c39991b36345ff7f9e548720e8ba2e58 100644 (file)
@@ -90,8 +90,7 @@ cxgb_mk_tid_release(struct sk_buff *skb, u32 len, u32 tid, u16 chan)
 {
        struct cpl_tid_release *req;
 
-       req = __skb_put(skb, len);
-       memset(req, 0, len);
+       req = __skb_put_zero(skb, len);
 
        INIT_TP_WR(req, tid);
        OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_TID_RELEASE, tid));
@@ -104,8 +103,7 @@ cxgb_mk_close_con_req(struct sk_buff *skb, u32 len, u32 tid, u16 chan,
 {
        struct cpl_close_con_req *req;
 
-       req = __skb_put(skb, len);
-       memset(req, 0, len);
+       req = __skb_put_zero(skb, len);
 
        INIT_TP_WR(req, tid);
        OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, tid));
@@ -119,8 +117,7 @@ cxgb_mk_abort_req(struct sk_buff *skb, u32 len, u32 tid, u16 chan,
 {
        struct cpl_abort_req *req;
 
-       req = __skb_put(skb, len);
-       memset(req, 0, len);
+       req = __skb_put_zero(skb, len);
 
        INIT_TP_WR(req, tid);
        OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_REQ, tid));
@@ -134,8 +131,7 @@ cxgb_mk_abort_rpl(struct sk_buff *skb, u32 len, u32 tid, u16 chan)
 {
        struct cpl_abort_rpl *rpl;
 
-       rpl = __skb_put(skb, len);
-       memset(rpl, 0, len);
+       rpl = __skb_put_zero(skb, len);
 
        INIT_TP_WR(rpl, tid);
        OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_RPL, tid));
@@ -149,8 +145,7 @@ cxgb_mk_rx_data_ack(struct sk_buff *skb, u32 len, u32 tid, u16 chan,
 {
        struct cpl_rx_data_ack *req;
 
-       req = __skb_put(skb, len);
-       memset(req, 0, len);
+       req = __skb_put_zero(skb, len);
 
        INIT_TP_WR(req, tid);
        OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_RX_DATA_ACK, tid));
index 8bb0db990c8fcf8258201f1af5fcf3fa9976b5f9..00a57273b753660f07abff48b6fc49e775710b07 100644 (file)
@@ -1246,8 +1246,7 @@ static int ethoc_probe(struct platform_device *pdev)
        mdiobus_unregister(priv->mdio);
        mdiobus_free(priv->mdio);
 free2:
-       if (priv->clk)
-               clk_disable_unprepare(priv->clk);
+       clk_disable_unprepare(priv->clk);
 free:
        free_netdev(netdev);
 out:
@@ -1271,8 +1270,7 @@ static int ethoc_remove(struct platform_device *pdev)
                        mdiobus_unregister(priv->mdio);
                        mdiobus_free(priv->mdio);
                }
-               if (priv->clk)
-                       clk_disable_unprepare(priv->clk);
+               clk_disable_unprepare(priv->clk);
                unregister_netdev(netdev);
                free_netdev(netdev);
        }
index 6e490fd2345dae47b687082101f12bd2523dbde3..a580a3dcbe5906343938094e0b477cf9bb78dc6f 100644 (file)
@@ -22,7 +22,7 @@ if NET_VENDOR_FREESCALE
 config FEC
        tristate "FEC ethernet controller (of ColdFire and some i.MX CPUs)"
        depends on (M523x || M527x || M5272 || M528x || M520x || M532x || \
-                  ARCH_MXC || SOC_IMX28)
+                  ARCH_MXC || SOC_IMX28 || COMPILE_TEST)
        default ARCH_MXC || SOC_IMX28 if ARM
        select PHYLIB
        imply PTP_1588_CLOCK
index e7381f8ef89d0d55ef4442d5fb286c4e7c038cc0..4778b663653e3213dab380d7345b875a227a034e 100644 (file)
@@ -21,7 +21,7 @@
 
 #if defined(CONFIG_M523x) || defined(CONFIG_M527x) || defined(CONFIG_M528x) || \
     defined(CONFIG_M520x) || defined(CONFIG_M532x) || defined(CONFIG_ARM) || \
-    defined(CONFIG_ARM64)
+    defined(CONFIG_ARM64) || defined(CONFIG_COMPILE_TEST)
 /*
  *     Just figures, Motorola would have to change the offsets for
  *     registers in the same peripheral device on different models
index d4604bc8eb5b04742534100c4c285065bda2021e..4358f586e28f792b178a8b93950b3e8b2f62ff62 100644 (file)
@@ -2052,13 +2052,9 @@ static int fec_enet_mii_init(struct platform_device *pdev)
        fep->mii_bus->parent = &pdev->dev;
 
        node = of_get_child_by_name(pdev->dev.of_node, "mdio");
-       if (node) {
-               err = of_mdiobus_register(fep->mii_bus, node);
+       err = of_mdiobus_register(fep->mii_bus, node);
+       if (node)
                of_node_put(node);
-       } else {
-               err = mdiobus_register(fep->mii_bus);
-       }
-
        if (err)
                goto err_out_free_mdiobus;
 
@@ -2111,7 +2107,7 @@ static int fec_enet_get_regs_len(struct net_device *ndev)
 /* List of registers that can be safety be read to dump them with ethtool */
 #if defined(CONFIG_M523x) || defined(CONFIG_M527x) || defined(CONFIG_M528x) || \
        defined(CONFIG_M520x) || defined(CONFIG_M532x) || defined(CONFIG_ARM) || \
-       defined(CONFIG_ARM64)
+       defined(CONFIG_ARM64) || defined(CONFIG_COMPILE_TEST)
 static u32 fec_enet_register_offset[] = {
        FEC_IEVENT, FEC_IMASK, FEC_R_DES_ACTIVE_0, FEC_X_DES_ACTIVE_0,
        FEC_ECNTRL, FEC_MII_DATA, FEC_MII_SPEED, FEC_MIB_CTRLSTAT, FEC_R_CNTRL,
index 6552d68ea6e1268f25177ecc3757392a4324f170..ce6e24c74978a22a1d22383f0a5b4f38ffec7c00 100644 (file)
@@ -1391,12 +1391,10 @@ int fman_port_config(struct fman_port *port, struct fman_port_params *params)
                /* FM_WRONG_RESET_VALUES_ERRATA_FMAN_A005127 Errata
                 * workaround
                 */
-               if (port->rev_info.major >= 6) {
-                       u32 reg;
+               u32 reg;
 
-                       reg = 0x00001013;
-                       iowrite32be(reg, &port->bmi_regs->tx.fmbm_tfp);
-               }
+               reg = 0x00001013;
+               iowrite32be(reg, &port->bmi_regs->tx.fmbm_tfp);
        }
 
        return 0;
index 4df282ed22c7962f7e10ab6d17be11b50a1ac44f..0beee2cc2ddd3164baf7211f49c3b5affef7a091 100644 (file)
@@ -61,7 +61,7 @@ static const char hw_stat_gstrings[][ETH_GSTRING_LEN] = {
 static const char tx_fw_stat_gstrings[][ETH_GSTRING_LEN] = {
        "tx-single-collision",
        "tx-multiple-collision",
-       "tx-late-collsion",
+       "tx-late-collision",
        "tx-aborted-frames",
        "tx-lost-frames",
        "tx-carrier-sense-errors",
index 02145f2de82030f8df41e5a2396820394148dce4..63d7dbfb90bf3feeaa755390b78ab1bc67f9006b 100644 (file)
@@ -50,13 +50,22 @@ static int hnae3_match_n_instantiate(struct hnae3_client *client,
        /* now, (un-)instantiate client by calling lower layer */
        if (is_reg) {
                ret = ae_dev->ops->init_client_instance(client, ae_dev);
-               if (ret)
+               if (ret) {
                        dev_err(&ae_dev->pdev->dev,
                                "fail to instantiate client\n");
-               return ret;
+                       return ret;
+               }
+
+               hnae_set_bit(ae_dev->flag, HNAE3_CLIENT_INITED_B, 1);
+               return 0;
+       }
+
+       if (hnae_get_bit(ae_dev->flag, HNAE3_CLIENT_INITED_B)) {
+               ae_dev->ops->uninit_client_instance(client, ae_dev);
+
+               hnae_set_bit(ae_dev->flag, HNAE3_CLIENT_INITED_B, 0);
        }
 
-       ae_dev->ops->uninit_client_instance(client, ae_dev);
        return 0;
 }
 
@@ -89,7 +98,7 @@ int hnae3_register_client(struct hnae3_client *client)
 exit:
        mutex_unlock(&hnae3_common_lock);
 
-       return ret;
+       return 0;
 }
 EXPORT_SYMBOL(hnae3_register_client);
 
@@ -112,7 +121,7 @@ EXPORT_SYMBOL(hnae3_unregister_client);
  * @ae_algo: AE algorithm
  * NOTE: the duplicated name will not be checked
  */
-int hnae3_register_ae_algo(struct hnae3_ae_algo *ae_algo)
+void hnae3_register_ae_algo(struct hnae3_ae_algo *ae_algo)
 {
        const struct pci_device_id *id;
        struct hnae3_ae_dev *ae_dev;
@@ -151,8 +160,6 @@ int hnae3_register_ae_algo(struct hnae3_ae_algo *ae_algo)
        }
 
        mutex_unlock(&hnae3_common_lock);
-
-       return ret;
 }
 EXPORT_SYMBOL(hnae3_register_ae_algo);
 
@@ -168,6 +175,9 @@ void hnae3_unregister_ae_algo(struct hnae3_ae_algo *ae_algo)
        mutex_lock(&hnae3_common_lock);
        /* Check if there are matched ae_dev */
        list_for_each_entry(ae_dev, &hnae3_ae_dev_list, node) {
+               if (!hnae_get_bit(ae_dev->flag, HNAE3_DEV_INITED_B))
+                       continue;
+
                id = pci_match_id(ae_algo->pdev_id_table, ae_dev->pdev);
                if (!id)
                        continue;
@@ -191,22 +201,14 @@ EXPORT_SYMBOL(hnae3_unregister_ae_algo);
  * @ae_dev: the AE device
  * NOTE: the duplicated name will not be checked
  */
-int hnae3_register_ae_dev(struct hnae3_ae_dev *ae_dev)
+void hnae3_register_ae_dev(struct hnae3_ae_dev *ae_dev)
 {
        const struct pci_device_id *id;
        struct hnae3_ae_algo *ae_algo;
        struct hnae3_client *client;
-       int ret = 0, lock_acquired;
+       int ret = 0;
 
-       /* we can get deadlocked if SRIOV is being enabled in context to probe
-        * and probe gets called again in same context. This can happen when
-        * pci_enable_sriov() is called to create VFs from PF probes context.
-        * Therefore, for simplicity uniformly defering further probing in all
-        * cases where we detect contention.
-        */
-       lock_acquired = mutex_trylock(&hnae3_common_lock);
-       if (!lock_acquired)
-               return -EPROBE_DEFER;
+       mutex_lock(&hnae3_common_lock);
 
        list_add_tail(&ae_dev->node, &hnae3_ae_dev_list);
 
@@ -220,7 +222,6 @@ int hnae3_register_ae_dev(struct hnae3_ae_dev *ae_dev)
 
                if (!ae_dev->ops) {
                        dev_err(&ae_dev->pdev->dev, "ae_dev ops are null\n");
-                       ret = -EOPNOTSUPP;
                        goto out_err;
                }
 
@@ -247,8 +248,6 @@ int hnae3_register_ae_dev(struct hnae3_ae_dev *ae_dev)
 
 out_err:
        mutex_unlock(&hnae3_common_lock);
-
-       return ret;
 }
 EXPORT_SYMBOL(hnae3_register_ae_dev);
 
@@ -264,6 +263,9 @@ void hnae3_unregister_ae_dev(struct hnae3_ae_dev *ae_dev)
        mutex_lock(&hnae3_common_lock);
        /* Check if there are matched ae_algo */
        list_for_each_entry(ae_algo, &hnae3_ae_algo_list, node) {
+               if (!hnae_get_bit(ae_dev->flag, HNAE3_DEV_INITED_B))
+                       continue;
+
                id = pci_match_id(ae_algo->pdev_id_table, ae_dev->pdev);
                if (!id)
                        continue;
@@ -283,3 +285,4 @@ EXPORT_SYMBOL(hnae3_unregister_ae_dev);
 MODULE_AUTHOR("Huawei Tech. Co., Ltd.");
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("HNAE3(Hisilicon Network Acceleration Engine) Framework");
+MODULE_VERSION(HNAE3_MOD_VERSION);
index 37ec1b3286c6d38796b36b480f8a4f670d64f3cc..45c571eea2ae530316b9f3f7bcfea85e44f723b7 100644 (file)
@@ -36,6 +36,8 @@
 #include <linux/pci.h>
 #include <linux/types.h>
 
+#define HNAE3_MOD_VERSION "1.0"
+
 /* Device IDs */
 #define HNAE3_DEV_ID_GE                                0xA220
 #define HNAE3_DEV_ID_25GE                      0xA221
@@ -52,6 +54,7 @@
 #define HNAE3_DEV_INITED_B                     0x0
 #define HNAE3_DEV_SUPPORT_ROCE_B               0x1
 #define HNAE3_DEV_SUPPORT_DCB_B                        0x2
+#define HNAE3_CLIENT_INITED_B                  0x3
 
 #define HNAE3_DEV_SUPPORT_ROCE_DCB_BITS (BIT(HNAE3_DEV_SUPPORT_DCB_B) |\
                BIT(HNAE3_DEV_SUPPORT_ROCE_B))
@@ -273,10 +276,6 @@ struct hnae3_ae_dev {
  *   Map rings to vector
  * unmap_ring_from_vector()
  *   Unmap rings from vector
- * add_tunnel_udp()
- *   Add tunnel information to hardware
- * del_tunnel_udp()
- *   Delete tunnel information from hardware
  * reset_queue()
  *   Reset queue
  * get_fw_version()
@@ -388,9 +387,6 @@ struct hnae3_ae_ops {
                                      int vector_num,
                                      struct hnae3_ring_chain_node *vr_chain);
 
-       int (*add_tunnel_udp)(struct hnae3_handle *handle, u16 port_num);
-       int (*del_tunnel_udp)(struct hnae3_handle *handle, u16 port_num);
-
        void (*reset_queue)(struct hnae3_handle *handle, u16 queue_id);
        u32 (*get_fw_version)(struct hnae3_handle *handle);
        void (*get_mdix_mode)(struct hnae3_handle *handle,
@@ -521,11 +517,11 @@ struct hnae3_handle {
 #define hnae_get_bit(origin, shift) \
        hnae_get_field((origin), (0x1 << (shift)), (shift))
 
-int hnae3_register_ae_dev(struct hnae3_ae_dev *ae_dev);
+void hnae3_register_ae_dev(struct hnae3_ae_dev *ae_dev);
 void hnae3_unregister_ae_dev(struct hnae3_ae_dev *ae_dev);
 
 void hnae3_unregister_ae_algo(struct hnae3_ae_algo *ae_algo);
-int hnae3_register_ae_algo(struct hnae3_ae_algo *ae_algo);
+void hnae3_register_ae_algo(struct hnae3_ae_algo *ae_algo);
 
 void hnae3_unregister_client(struct hnae3_client *client);
 int hnae3_register_client(struct hnae3_client *client);
index 8c55965a66ac310d91f2b9f2d4f9a1d6da1a1237..cac51954f2cff507a97db175c39f800c98eb6608 100644 (file)
@@ -502,7 +502,7 @@ static int hns3_get_l4_protocol(struct sk_buff *skb, u8 *ol4_proto,
 
        /* find outer header point */
        l3.hdr = skb_network_header(skb);
-       l4_hdr = skb_inner_transport_header(skb);
+       l4_hdr = skb_transport_header(skb);
 
        if (skb->protocol == htons(ETH_P_IPV6)) {
                exthdr = l3.hdr + sizeof(*l3.v6);
@@ -1244,93 +1244,6 @@ static void hns3_nic_get_stats64(struct net_device *netdev,
        stats->tx_compressed = netdev->stats.tx_compressed;
 }
 
-static void hns3_add_tunnel_port(struct net_device *netdev, u16 port,
-                                enum hns3_udp_tnl_type type)
-{
-       struct hns3_nic_priv *priv = netdev_priv(netdev);
-       struct hns3_udp_tunnel *udp_tnl = &priv->udp_tnl[type];
-       struct hnae3_handle *h = priv->ae_handle;
-
-       if (udp_tnl->used && udp_tnl->dst_port == port) {
-               udp_tnl->used++;
-               return;
-       }
-
-       if (udp_tnl->used) {
-               netdev_warn(netdev,
-                           "UDP tunnel [%d], port [%d] offload\n", type, port);
-               return;
-       }
-
-       udp_tnl->dst_port = port;
-       udp_tnl->used = 1;
-       /* TBD send command to hardware to add port */
-       if (h->ae_algo->ops->add_tunnel_udp)
-               h->ae_algo->ops->add_tunnel_udp(h, port);
-}
-
-static void hns3_del_tunnel_port(struct net_device *netdev, u16 port,
-                                enum hns3_udp_tnl_type type)
-{
-       struct hns3_nic_priv *priv = netdev_priv(netdev);
-       struct hns3_udp_tunnel *udp_tnl = &priv->udp_tnl[type];
-       struct hnae3_handle *h = priv->ae_handle;
-
-       if (!udp_tnl->used || udp_tnl->dst_port != port) {
-               netdev_warn(netdev,
-                           "Invalid UDP tunnel port %d\n", port);
-               return;
-       }
-
-       udp_tnl->used--;
-       if (udp_tnl->used)
-               return;
-
-       udp_tnl->dst_port = 0;
-       /* TBD send command to hardware to del port  */
-       if (h->ae_algo->ops->del_tunnel_udp)
-               h->ae_algo->ops->del_tunnel_udp(h, port);
-}
-
-/* hns3_nic_udp_tunnel_add - Get notifiacetion about UDP tunnel ports
- * @netdev: This physical ports's netdev
- * @ti: Tunnel information
- */
-static void hns3_nic_udp_tunnel_add(struct net_device *netdev,
-                                   struct udp_tunnel_info *ti)
-{
-       u16 port_n = ntohs(ti->port);
-
-       switch (ti->type) {
-       case UDP_TUNNEL_TYPE_VXLAN:
-               hns3_add_tunnel_port(netdev, port_n, HNS3_UDP_TNL_VXLAN);
-               break;
-       case UDP_TUNNEL_TYPE_GENEVE:
-               hns3_add_tunnel_port(netdev, port_n, HNS3_UDP_TNL_GENEVE);
-               break;
-       default:
-               netdev_err(netdev, "unsupported tunnel type %d\n", ti->type);
-               break;
-       }
-}
-
-static void hns3_nic_udp_tunnel_del(struct net_device *netdev,
-                                   struct udp_tunnel_info *ti)
-{
-       u16 port_n = ntohs(ti->port);
-
-       switch (ti->type) {
-       case UDP_TUNNEL_TYPE_VXLAN:
-               hns3_del_tunnel_port(netdev, port_n, HNS3_UDP_TNL_VXLAN);
-               break;
-       case UDP_TUNNEL_TYPE_GENEVE:
-               hns3_del_tunnel_port(netdev, port_n, HNS3_UDP_TNL_GENEVE);
-               break;
-       default:
-               break;
-       }
-}
-
 static int hns3_setup_tc(struct net_device *netdev, void *type_data)
 {
        struct tc_mqprio_qopt_offload *mqprio_qopt = type_data;
@@ -1569,13 +1482,50 @@ static const struct net_device_ops hns3_nic_netdev_ops = {
        .ndo_get_stats64        = hns3_nic_get_stats64,
        .ndo_setup_tc           = hns3_nic_setup_tc,
        .ndo_set_rx_mode        = hns3_nic_set_rx_mode,
-       .ndo_udp_tunnel_add     = hns3_nic_udp_tunnel_add,
-       .ndo_udp_tunnel_del     = hns3_nic_udp_tunnel_del,
        .ndo_vlan_rx_add_vid    = hns3_vlan_rx_add_vid,
        .ndo_vlan_rx_kill_vid   = hns3_vlan_rx_kill_vid,
        .ndo_set_vf_vlan        = hns3_ndo_set_vf_vlan,
 };
 
+static bool hns3_is_phys_func(struct pci_dev *pdev)
+{
+       u32 dev_id = pdev->device;
+
+       switch (dev_id) {
+       case HNAE3_DEV_ID_GE:
+       case HNAE3_DEV_ID_25GE:
+       case HNAE3_DEV_ID_25GE_RDMA:
+       case HNAE3_DEV_ID_25GE_RDMA_MACSEC:
+       case HNAE3_DEV_ID_50GE_RDMA:
+       case HNAE3_DEV_ID_50GE_RDMA_MACSEC:
+       case HNAE3_DEV_ID_100G_RDMA_MACSEC:
+               return true;
+       case HNAE3_DEV_ID_100G_VF:
+       case HNAE3_DEV_ID_100G_RDMA_DCB_PFC_VF:
+               return false;
+       default:
+               dev_warn(&pdev->dev, "un-recognized pci device-id %d",
+                        dev_id);
+       }
+
+       return false;
+}
+
+static void hns3_disable_sriov(struct pci_dev *pdev)
+{
+       /* If our VFs are assigned we cannot shut down SR-IOV
+        * without causing issues, so just leave the hardware
+        * available but disabled
+        */
+       if (pci_vfs_assigned(pdev)) {
+               dev_warn(&pdev->dev,
+                        "disabling driver while VFs are assigned\n");
+               return;
+       }
+
+       pci_disable_sriov(pdev);
+}
+
 /* hns3_probe - Device initialization routine
  * @pdev: PCI device information struct
  * @ent: entry in hns3_pci_tbl
@@ -1603,7 +1553,9 @@ static int hns3_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        ae_dev->dev_type = HNAE3_DEV_KNIC;
        pci_set_drvdata(pdev, ae_dev);
 
-       return hnae3_register_ae_dev(ae_dev);
+       hnae3_register_ae_dev(ae_dev);
+
+       return 0;
 }
 
 /* hns3_remove - Device removal routine
@@ -1613,21 +1565,56 @@ static void hns3_remove(struct pci_dev *pdev)
 {
        struct hnae3_ae_dev *ae_dev = pci_get_drvdata(pdev);
 
+       if (hns3_is_phys_func(pdev) && IS_ENABLED(CONFIG_PCI_IOV))
+               hns3_disable_sriov(pdev);
+
        hnae3_unregister_ae_dev(ae_dev);
 }
 
+/**
+ * hns3_pci_sriov_configure
+ * @pdev: pointer to a pci_dev structure
+ * @num_vfs: number of VFs to allocate
+ *
+ * Enable or change the number of VFs. Called when the user updates the number
+ * of VFs in sysfs.
+ **/
+static int hns3_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
+{
+       int ret;
+
+       if (!(hns3_is_phys_func(pdev) && IS_ENABLED(CONFIG_PCI_IOV))) {
+               dev_warn(&pdev->dev, "Can not config SRIOV\n");
+               return -EINVAL;
+       }
+
+       if (num_vfs) {
+               ret = pci_enable_sriov(pdev, num_vfs);
+               if (ret)
+                       dev_err(&pdev->dev, "SRIOV enable failed %d\n", ret);
+               else
+                       return num_vfs;
+       } else if (!pci_vfs_assigned(pdev)) {
+               pci_disable_sriov(pdev);
+       } else {
+               dev_warn(&pdev->dev,
+                        "Unable to free VFs because some are assigned to VMs.\n");
+       }
+
+       return 0;
+}
+
 static struct pci_driver hns3_driver = {
        .name     = hns3_driver_name,
        .id_table = hns3_pci_tbl,
        .probe    = hns3_probe,
        .remove   = hns3_remove,
+       .sriov_configure = hns3_pci_sriov_configure,
 };
 
 /* set default feature to hns3 */
 static void hns3_set_default_feature(struct net_device *netdev)
 {
-       struct hnae3_handle *h = hns3_get_handle(netdev);
-
        netdev->priv_flags |= IFF_UNICAST_FLT;
 
        netdev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
@@ -1656,15 +1643,11 @@ static void hns3_set_default_feature(struct net_device *netdev)
                NETIF_F_GSO_UDP_TUNNEL_CSUM;
 
        netdev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
-               NETIF_F_HW_VLAN_CTAG_TX |
+               NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX |
                NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_GSO |
                NETIF_F_GRO | NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_GSO_GRE |
                NETIF_F_GSO_GRE_CSUM | NETIF_F_GSO_UDP_TUNNEL |
                NETIF_F_GSO_UDP_TUNNEL_CSUM;
-
-       if (!(h->flags & HNAE3_SUPPORT_VF))
-               netdev->hw_features |=
-                       NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_CTAG_RX;
 }
 
 static int hns3_alloc_buffer(struct hns3_enet_ring *ring,
@@ -1971,106 +1954,6 @@ hns3_nic_alloc_rx_buffers(struct hns3_enet_ring *ring, int cleand_count)
        writel_relaxed(i, ring->tqp->io_base + HNS3_RING_RX_RING_HEAD_REG);
 }
 
-/* hns3_nic_get_headlen - determine size of header for LRO/GRO
- * @data: pointer to the start of the headers
- * @max: total length of section to find headers in
- *
- * This function is meant to determine the length of headers that will
- * be recognized by hardware for LRO, GRO, and RSC offloads.  The main
- * motivation of doing this is to only perform one pull for IPv4 TCP
- * packets so that we can do basic things like calculating the gso_size
- * based on the average data per packet.
- */
-static unsigned int hns3_nic_get_headlen(unsigned char *data, u32 flag,
-                                        unsigned int max_size)
-{
-       unsigned char *network;
-       u8 hlen;
-
-       /* This should never happen, but better safe than sorry */
-       if (max_size < ETH_HLEN)
-               return max_size;
-
-       /* Initialize network frame pointer */
-       network = data;
-
-       /* Set first protocol and move network header forward */
-       network += ETH_HLEN;
-
-       /* Handle any vlan tag if present */
-       if (hnae_get_field(flag, HNS3_RXD_VLAN_M, HNS3_RXD_VLAN_S)
-               == HNS3_RX_FLAG_VLAN_PRESENT) {
-               if ((typeof(max_size))(network - data) > (max_size - VLAN_HLEN))
-                       return max_size;
-
-               network += VLAN_HLEN;
-       }
-
-       /* Handle L3 protocols */
-       if (hnae_get_field(flag, HNS3_RXD_L3ID_M, HNS3_RXD_L3ID_S)
-               == HNS3_RX_FLAG_L3ID_IPV4) {
-               if ((typeof(max_size))(network - data) >
-                   (max_size - sizeof(struct iphdr)))
-                       return max_size;
-
-               /* Access ihl as a u8 to avoid unaligned access on ia64 */
-               hlen = (network[0] & 0x0F) << 2;
-
-               /* Verify hlen meets minimum size requirements */
-               if (hlen < sizeof(struct iphdr))
-                       return network - data;
-
-               /* Record next protocol if header is present */
-       } else if (hnae_get_field(flag, HNS3_RXD_L3ID_M, HNS3_RXD_L3ID_S)
-               == HNS3_RX_FLAG_L3ID_IPV6) {
-               if ((typeof(max_size))(network - data) >
-                   (max_size - sizeof(struct ipv6hdr)))
-                       return max_size;
-
-               /* Record next protocol */
-               hlen = sizeof(struct ipv6hdr);
-       } else {
-               return network - data;
-       }
-
-       /* Relocate pointer to start of L4 header */
-       network += hlen;
-
-       /* Finally sort out TCP/UDP */
-       if (hnae_get_field(flag, HNS3_RXD_L4ID_M, HNS3_RXD_L4ID_S)
-               == HNS3_RX_FLAG_L4ID_TCP) {
-               if ((typeof(max_size))(network - data) >
-                   (max_size - sizeof(struct tcphdr)))
-                       return max_size;
-
-               /* Access doff as a u8 to avoid unaligned access on ia64 */
-               hlen = (network[12] & 0xF0) >> 2;
-
-               /* Verify hlen meets minimum size requirements */
-               if (hlen < sizeof(struct tcphdr))
-                       return network - data;
-
-               network += hlen;
-       } else if (hnae_get_field(flag, HNS3_RXD_L4ID_M, HNS3_RXD_L4ID_S)
-               == HNS3_RX_FLAG_L4ID_UDP) {
-               if ((typeof(max_size))(network - data) >
-                   (max_size - sizeof(struct udphdr)))
-                       return max_size;
-
-               network += sizeof(struct udphdr);
-       }
-
-       /* If everything has gone correctly network should be the
-        * data section of the packet and will be the end of the header.
-        * If not then it probably represents the end of the last recognized
-        * header.
-        */
-       if ((typeof(max_size))(network - data) < max_size)
-               return network - data;
-       else
-               return max_size;
-}
-
 static void hns3_nic_reuse_page(struct sk_buff *skb, int i,
                                struct hns3_enet_ring *ring, int pull_len,
                                struct hns3_desc_cb *desc_cb)
@@ -2270,8 +2153,8 @@ static int hns3_handle_rx_bd(struct hns3_enet_ring *ring,
                ring->stats.seg_pkt_cnt++;
                u64_stats_update_end(&ring->syncp);
 
-               pull_len = hns3_nic_get_headlen(va, l234info,
-                                               HNS3_RX_HEAD_SIZE);
+               pull_len = eth_get_headlen(va, HNS3_RX_HEAD_SIZE);
+
                memcpy(__skb_put(skb, pull_len), va,
                       ALIGN(pull_len, sizeof(long)));
 
@@ -3052,13 +2935,13 @@ int hns3_uninit_all_ring(struct hns3_nic_priv *priv)
 }
 
 /* Set mac addr if it is configured. or leave it to the AE driver */
-static void hns3_init_mac_addr(struct net_device *netdev)
+static void hns3_init_mac_addr(struct net_device *netdev, bool init)
 {
        struct hns3_nic_priv *priv = netdev_priv(netdev);
        struct hnae3_handle *h = priv->ae_handle;
        u8 mac_addr_temp[ETH_ALEN];
 
-       if (h->ae_algo->ops->get_mac_addr) {
+       if (h->ae_algo->ops->get_mac_addr && init) {
                h->ae_algo->ops->get_mac_addr(h, mac_addr_temp);
                ether_addr_copy(netdev->dev_addr, mac_addr_temp);
        }
@@ -3112,7 +2995,7 @@ static int hns3_client_init(struct hnae3_handle *handle)
        handle->kinfo.netdev = netdev;
        handle->priv = (void *)priv;
 
-       hns3_init_mac_addr(netdev);
+       hns3_init_mac_addr(netdev, true);
 
        hns3_set_default_feature(netdev);
 
@@ -3298,9 +3181,35 @@ static void hns3_recover_hw_addr(struct net_device *ndev)
                hns3_nic_mc_sync(ndev, ha->addr);
 }
 
-static void hns3_drop_skb_data(struct hns3_enet_ring *ring, struct sk_buff *skb)
+static void hns3_clear_tx_ring(struct hns3_enet_ring *ring)
 {
-       dev_kfree_skb_any(skb);
+       if (!HNAE3_IS_TX_RING(ring))
+               return;
+
+       while (ring->next_to_clean != ring->next_to_use) {
+               hns3_free_buffer_detach(ring, ring->next_to_clean);
+               ring_ptr_move_fw(ring, next_to_clean);
+       }
+}
+
+static void hns3_clear_rx_ring(struct hns3_enet_ring *ring)
+{
+       if (HNAE3_IS_TX_RING(ring))
+               return;
+
+       while (ring->next_to_use != ring->next_to_clean) {
+               /* When a buffer is not reused, it's memory has been
+                * freed in hns3_handle_rx_bd or will be freed by
+                * stack, so only need to unmap the buffer here.
+                */
+               if (!ring->desc_cb[ring->next_to_use].reuse_flag) {
+                       hns3_unmap_buffer(ring,
+                                         &ring->desc_cb[ring->next_to_use]);
+                       ring->desc_cb[ring->next_to_use].dma = 0;
+               }
+
+               ring_ptr_move_fw(ring, next_to_use);
+       }
 }
 
 static void hns3_clear_all_ring(struct hnae3_handle *h)
@@ -3314,13 +3223,13 @@ static void hns3_clear_all_ring(struct hnae3_handle *h)
                struct hns3_enet_ring *ring;
 
                ring = priv->ring_data[i].ring;
-               hns3_clean_tx_ring(ring, ring->desc_num);
+               hns3_clear_tx_ring(ring);
                dev_queue = netdev_get_tx_queue(ndev,
                                                priv->ring_data[i].queue_index);
                netdev_tx_reset_queue(dev_queue);
 
                ring = priv->ring_data[i + h->kinfo.num_tqps].ring;
-               hns3_clean_rx_ring(ring, ring->desc_num, hns3_drop_skb_data);
+               hns3_clear_rx_ring(ring);
        }
 }
 
@@ -3359,7 +3268,7 @@ static int hns3_reset_notify_init_enet(struct hnae3_handle *handle)
        struct hns3_nic_priv *priv = netdev_priv(netdev);
        int ret;
 
-       hns3_init_mac_addr(netdev);
+       hns3_init_mac_addr(netdev, false);
        hns3_nic_set_rx_mode(netdev);
        hns3_recover_hw_addr(netdev);
 
@@ -3600,6 +3509,8 @@ static int __init hns3_init_module(void)
 
        client.ops = &client_ops;
 
+       INIT_LIST_HEAD(&client.node);
+
        ret = hnae3_register_client(&client);
        if (ret)
                return ret;
@@ -3627,3 +3538,4 @@ MODULE_DESCRIPTION("HNS3: Hisilicon Ethernet Driver");
 MODULE_AUTHOR("Huawei Tech. Co., Ltd.");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("pci:hns-nic");
+MODULE_VERSION(HNS3_MOD_VERSION);
index 98cdbd3a1163d3793b7cf3028fea807e7ab5a792..5b40f5a537611b94cd1c1ed7cfb065544c52f1f5 100644 (file)
@@ -14,6 +14,8 @@
 
 #include "hnae3.h"
 
+#define HNS3_MOD_VERSION "1.0"
+
 extern const char hns3_driver_version[];
 
 enum hns3_nic_state {
index eb3c34f3cf87b93d80f5a28cfc6cca2b29672c75..c16bb6cb0564d65f3461336a7d672ba7dce6e45d 100644 (file)
@@ -74,7 +74,7 @@ struct hns3_link_mode_mapping {
        u32 ethtool_link_mode;
 };
 
-static int hns3_lp_setup(struct net_device *ndev, enum hnae3_loop loop)
+static int hns3_lp_setup(struct net_device *ndev, enum hnae3_loop loop, bool en)
 {
        struct hnae3_handle *h = hns3_get_handle(ndev);
        int ret;
@@ -85,11 +85,7 @@ static int hns3_lp_setup(struct net_device *ndev, enum hnae3_loop loop)
 
        switch (loop) {
        case HNAE3_MAC_INTER_LOOP_MAC:
-               ret = h->ae_algo->ops->set_loopback(h, loop, true);
-               break;
-       case HNAE3_MAC_LOOP_NONE:
-               ret = h->ae_algo->ops->set_loopback(h,
-                       HNAE3_MAC_INTER_LOOP_MAC, false);
+               ret = h->ae_algo->ops->set_loopback(h, loop, en);
                break;
        default:
                ret = -ENOTSUPP;
@@ -99,10 +95,7 @@ static int hns3_lp_setup(struct net_device *ndev, enum hnae3_loop loop)
        if (ret)
                return ret;
 
-       if (loop == HNAE3_MAC_LOOP_NONE)
-               h->ae_algo->ops->set_promisc_mode(h, ndev->flags & IFF_PROMISC);
-       else
-               h->ae_algo->ops->set_promisc_mode(h, 1);
+       h->ae_algo->ops->set_promisc_mode(h, en);
 
        return ret;
 }
@@ -122,13 +115,13 @@ static int hns3_lp_up(struct net_device *ndev, enum hnae3_loop loop_mode)
                return ret;
        }
 
-       ret = hns3_lp_setup(ndev, loop_mode);
+       ret = hns3_lp_setup(ndev, loop_mode, true);
        usleep_range(10000, 20000);
 
        return ret;
 }
 
-static int hns3_lp_down(struct net_device *ndev)
+static int hns3_lp_down(struct net_device *ndev, enum hnae3_loop loop_mode)
 {
        struct hnae3_handle *h = hns3_get_handle(ndev);
        int ret;
@@ -136,7 +129,7 @@ static int hns3_lp_down(struct net_device *ndev)
        if (!h->ae_algo->ops->stop)
                return -EOPNOTSUPP;
 
-       ret = hns3_lp_setup(ndev, HNAE3_MAC_LOOP_NONE);
+       ret = hns3_lp_setup(ndev, loop_mode, false);
        if (ret) {
                netdev_err(ndev, "lb_setup return error: %d\n", ret);
                return ret;
@@ -332,7 +325,7 @@ static void hns3_self_test(struct net_device *ndev,
                data[test_index] = hns3_lp_up(ndev, loop_type);
                if (!data[test_index]) {
                        data[test_index] = hns3_lp_run_test(ndev, loop_type);
-                       hns3_lp_down(ndev);
+                       hns3_lp_down(ndev, loop_type);
                }
 
                if (data[test_index])
index ff13d1876d9efb987ae3dba1817893be1fc7d5e1..c36d64710fa69a94d911046a136cb231740f4c41 100644 (file)
@@ -31,6 +31,17 @@ static int hclge_ring_space(struct hclge_cmq_ring *ring)
        return ring->desc_num - used - 1;
 }
 
+static int is_valid_csq_clean_head(struct hclge_cmq_ring *ring, int h)
+{
+       int u = ring->next_to_use;
+       int c = ring->next_to_clean;
+
+       if (unlikely(h >= ring->desc_num))
+               return 0;
+
+       return u > c ? (h > c && h <= u) : (h > c || h <= u);
+}
+
 static int hclge_alloc_cmd_desc(struct hclge_cmq_ring *ring)
 {
        int size  = ring->desc_num * sizeof(struct hclge_desc);
@@ -141,6 +152,7 @@ static void hclge_cmd_init_regs(struct hclge_hw *hw)
 
 static int hclge_cmd_csq_clean(struct hclge_hw *hw)
 {
+       struct hclge_dev *hdev = (struct hclge_dev *)hw->back;
        struct hclge_cmq_ring *csq = &hw->cmq.csq;
        u16 ntc = csq->next_to_clean;
        struct hclge_desc *desc;
@@ -149,6 +161,13 @@ static int hclge_cmd_csq_clean(struct hclge_hw *hw)
 
        desc = &csq->desc[ntc];
        head = hclge_read_dev(hw, HCLGE_NIC_CSQ_HEAD_REG);
+       rmb(); /* Make sure head is ready before touch any data */
+
+       if (!is_valid_csq_clean_head(csq, head)) {
+               dev_warn(&hdev->pdev->dev, "wrong head (%d, %d-%d)\n", head,
+                          csq->next_to_use, csq->next_to_clean);
+               return 0;
+       }
 
        while (head != ntc) {
                memset(desc, 0, sizeof(*desc));
@@ -171,7 +190,11 @@ static int hclge_cmd_csq_done(struct hclge_hw *hw)
 
 static bool hclge_is_special_opcode(u16 opcode)
 {
-       u16 spec_opcode[3] = {0x0030, 0x0031, 0x0032};
+       /* these commands have several descriptors,
+        * and use the first one to save opcode and return value
+        */
+       u16 spec_opcode[3] = {HCLGE_OPC_STATS_64_BIT,
+               HCLGE_OPC_STATS_32_BIT, HCLGE_OPC_STATS_MAC};
        int i;
 
        for (i = 0; i < ARRAY_SIZE(spec_opcode); i++) {
@@ -362,9 +385,9 @@ int hclge_cmd_init(struct hclge_dev *hdev)
 
 static void hclge_destroy_queue(struct hclge_cmq_ring *ring)
 {
-       spin_lock_bh(&ring->lock);
+       spin_lock(&ring->lock);
        hclge_free_cmd_desc(ring);
-       spin_unlock_bh(&ring->lock);
+       spin_unlock(&ring->lock);
 }
 
 void hclge_destroy_cmd_queue(struct hclge_hw *hw)
index 2066dd734444468ec644179194898193953548be..2f0bbb6708b98e675aa928cd3ec7c134836d7f6d 100644 (file)
@@ -304,8 +304,6 @@ static const struct hclge_comm_stats_str g_mac_stats_string[] = {
                HCLGE_MAC_STATS_FIELD_OFF(mac_tx_2048_4095_oct_pkt_num)},
        {"mac_tx_4096_8191_oct_pkt_num",
                HCLGE_MAC_STATS_FIELD_OFF(mac_tx_4096_8191_oct_pkt_num)},
-       {"mac_tx_8192_12287_oct_pkt_num",
-               HCLGE_MAC_STATS_FIELD_OFF(mac_tx_8192_12287_oct_pkt_num)},
        {"mac_tx_8192_9216_oct_pkt_num",
                HCLGE_MAC_STATS_FIELD_OFF(mac_tx_8192_9216_oct_pkt_num)},
        {"mac_tx_9217_12287_oct_pkt_num",
@@ -356,8 +354,6 @@ static const struct hclge_comm_stats_str g_mac_stats_string[] = {
                HCLGE_MAC_STATS_FIELD_OFF(mac_rx_2048_4095_oct_pkt_num)},
        {"mac_rx_4096_8191_oct_pkt_num",
                HCLGE_MAC_STATS_FIELD_OFF(mac_rx_4096_8191_oct_pkt_num)},
-       {"mac_rx_8192_12287_oct_pkt_num",
-               HCLGE_MAC_STATS_FIELD_OFF(mac_rx_8192_12287_oct_pkt_num)},
        {"mac_rx_8192_9216_oct_pkt_num",
                HCLGE_MAC_STATS_FIELD_OFF(mac_rx_8192_9216_oct_pkt_num)},
        {"mac_rx_9217_12287_oct_pkt_num",
@@ -1459,8 +1455,11 @@ static int hclge_alloc_vport(struct hclge_dev *hdev)
        /* We need to alloc a vport for main NIC of PF */
        num_vport = hdev->num_vmdq_vport + hdev->num_req_vfs + 1;
 
-       if (hdev->num_tqps < num_vport)
-               num_vport = hdev->num_tqps;
+       if (hdev->num_tqps < num_vport) {
+               dev_err(&hdev->pdev->dev, "tqps(%d) is less than vports(%d)",
+                       hdev->num_tqps, num_vport);
+               return -EINVAL;
+       }
 
        /* Alloc the same number of TQPs for every vport */
        tqp_per_vport = hdev->num_tqps / num_vport;
@@ -1474,21 +1473,8 @@ static int hclge_alloc_vport(struct hclge_dev *hdev)
        hdev->vport = vport;
        hdev->num_alloc_vport = num_vport;
 
-#ifdef CONFIG_PCI_IOV
-       /* Enable SRIOV */
-       if (hdev->num_req_vfs) {
-               dev_info(&pdev->dev, "active VFs(%d) found, enabling SRIOV\n",
-                        hdev->num_req_vfs);
-               ret = pci_enable_sriov(hdev->pdev, hdev->num_req_vfs);
-               if (ret) {
-                       hdev->num_alloc_vfs = 0;
-                       dev_err(&pdev->dev, "SRIOV enable failed %d\n",
-                               ret);
-                       return ret;
-               }
-       }
-       hdev->num_alloc_vfs = hdev->num_req_vfs;
-#endif
+       if (IS_ENABLED(CONFIG_PCI_IOV))
+               hdev->num_alloc_vfs = hdev->num_req_vfs;
 
        for (i = 0; i < num_vport; i++) {
                vport->back = hdev;
@@ -2947,21 +2933,6 @@ static void hclge_service_task(struct work_struct *work)
        hclge_service_complete(hdev);
 }
 
-static void hclge_disable_sriov(struct hclge_dev *hdev)
-{
-       /* If our VFs are assigned we cannot shut down SR-IOV
-        * without causing issues, so just leave the hardware
-        * available but disabled
-        */
-       if (pci_vfs_assigned(hdev->pdev)) {
-               dev_warn(&hdev->pdev->dev,
-                        "disabling driver while VFs are assigned\n");
-               return;
-       }
-
-       pci_disable_sriov(hdev->pdev);
-}
-
 struct hclge_vport *hclge_get_vport(struct hnae3_handle *handle)
 {
        /* VF handle has no client */
@@ -3683,48 +3654,50 @@ static void hclge_cfg_mac_mode(struct hclge_dev *hdev, bool enable)
                        "mac enable fail, ret =%d.\n", ret);
 }
 
-static int hclge_set_loopback(struct hnae3_handle *handle,
-                             enum hnae3_loop loop_mode, bool en)
+static int hclge_set_mac_loopback(struct hclge_dev *hdev, bool en)
 {
-       struct hclge_vport *vport = hclge_get_vport(handle);
        struct hclge_config_mac_mode_cmd *req;
-       struct hclge_dev *hdev = vport->back;
        struct hclge_desc desc;
        u32 loop_en;
        int ret;
 
-       switch (loop_mode) {
-       case HNAE3_MAC_INTER_LOOP_MAC:
-               req = (struct hclge_config_mac_mode_cmd *)&desc.data[0];
-               /* 1 Read out the MAC mode config at first */
-               hclge_cmd_setup_basic_desc(&desc,
-                                          HCLGE_OPC_CONFIG_MAC_MODE,
-                                          true);
-               ret = hclge_cmd_send(&hdev->hw, &desc, 1);
-               if (ret) {
-                       dev_err(&hdev->pdev->dev,
-                               "mac loopback get fail, ret =%d.\n",
-                               ret);
-                       return ret;
-               }
+       req = (struct hclge_config_mac_mode_cmd *)&desc.data[0];
+       /* 1 Read out the MAC mode config at first */
+       hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CONFIG_MAC_MODE, true);
+       ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+       if (ret) {
+               dev_err(&hdev->pdev->dev,
+                       "mac loopback get fail, ret =%d.\n", ret);
+               return ret;
+       }
 
-               /* 2 Then setup the loopback flag */
-               loop_en = le32_to_cpu(req->txrx_pad_fcs_loop_en);
-               if (en)
-                       hnae_set_bit(loop_en, HCLGE_MAC_APP_LP_B, 1);
-               else
-                       hnae_set_bit(loop_en, HCLGE_MAC_APP_LP_B, 0);
+       /* 2 Then setup the loopback flag */
+       loop_en = le32_to_cpu(req->txrx_pad_fcs_loop_en);
+       hnae_set_bit(loop_en, HCLGE_MAC_APP_LP_B, en ? 1 : 0);
 
-               req->txrx_pad_fcs_loop_en = cpu_to_le32(loop_en);
+       req->txrx_pad_fcs_loop_en = cpu_to_le32(loop_en);
 
-               /* 3 Config mac work mode with loopback flag
-                * and its original configure parameters
-                */
-               hclge_cmd_reuse_desc(&desc, false);
-               ret = hclge_cmd_send(&hdev->hw, &desc, 1);
-               if (ret)
-                       dev_err(&hdev->pdev->dev,
-                               "mac loopback set fail, ret =%d.\n", ret);
+       /* 3 Config mac work mode with loopback flag
+        * and its original configure parameters
+        */
+       hclge_cmd_reuse_desc(&desc, false);
+       ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+       if (ret)
+               dev_err(&hdev->pdev->dev,
+                       "mac loopback set fail, ret =%d.\n", ret);
+       return ret;
+}
+
+static int hclge_set_loopback(struct hnae3_handle *handle,
+                             enum hnae3_loop loop_mode, bool en)
+{
+       struct hclge_vport *vport = hclge_get_vport(handle);
+       struct hclge_dev *hdev = vport->back;
+       int ret;
+
+       switch (loop_mode) {
+       case HNAE3_MAC_INTER_LOOP_MAC:
+               ret = hclge_set_mac_loopback(hdev, en);
                break;
        default:
                ret = -ENOTSUPP;
@@ -3783,6 +3756,7 @@ static int hclge_ae_start(struct hnae3_handle *handle)
        hclge_cfg_mac_mode(hdev, true);
        clear_bit(HCLGE_STATE_DOWN, &hdev->state);
        mod_timer(&hdev->service_timer, jiffies + HZ);
+       hdev->hw.mac.link = 0;
 
        /* reset tqp stats */
        hclge_reset_tqp_stats(handle);
@@ -3819,6 +3793,8 @@ static void hclge_ae_stop(struct hnae3_handle *handle)
 
        /* reset tqp stats */
        hclge_reset_tqp_stats(handle);
+       del_timer_sync(&hdev->service_timer);
+       cancel_work_sync(&hdev->service_task);
        hclge_update_link_status(hdev);
 }
 
@@ -4540,8 +4516,9 @@ static void hclge_enable_vlan_filter(struct hnae3_handle *handle, bool enable)
        hclge_set_vlan_filter_ctrl(hdev, HCLGE_FILTER_TYPE_VF, enable);
 }
 
-int hclge_set_vf_vlan_common(struct hclge_dev *hdev, int vfid,
-                            bool is_kill, u16 vlan, u8 qos, __be16 proto)
+static int hclge_set_vf_vlan_common(struct hclge_dev *hdev, int vfid,
+                                   bool is_kill, u16 vlan, u8 qos,
+                                   __be16 proto)
 {
 #define HCLGE_MAX_VF_BYTES  16
        struct hclge_vlan_filter_vf_cfg_cmd *req0;
@@ -4599,12 +4576,9 @@ int hclge_set_vf_vlan_common(struct hclge_dev *hdev, int vfid,
        return -EIO;
 }
 
-static int hclge_set_port_vlan_filter(struct hnae3_handle *handle,
-                                     __be16 proto, u16 vlan_id,
-                                     bool is_kill)
+static int hclge_set_port_vlan_filter(struct hclge_dev *hdev, __be16 proto,
+                                     u16 vlan_id, bool is_kill)
 {
-       struct hclge_vport *vport = hclge_get_vport(handle);
-       struct hclge_dev *hdev = vport->back;
        struct hclge_vlan_filter_pf_cfg_cmd *req;
        struct hclge_desc desc;
        u8 vlan_offset_byte_val;
@@ -4624,22 +4598,66 @@ static int hclge_set_port_vlan_filter(struct hnae3_handle *handle,
        req->vlan_offset_bitmap[vlan_offset_byte] = vlan_offset_byte_val;
 
        ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+       if (ret)
+               dev_err(&hdev->pdev->dev,
+                       "port vlan command, send fail, ret =%d.\n", ret);
+       return ret;
+}
+
+static int hclge_set_vlan_filter_hw(struct hclge_dev *hdev, __be16 proto,
+                                   u16 vport_id, u16 vlan_id, u8 qos,
+                                   bool is_kill)
+{
+       u16 vport_idx, vport_num = 0;
+       int ret;
+
+       ret = hclge_set_vf_vlan_common(hdev, vport_id, is_kill, vlan_id,
+                                      0, proto);
        if (ret) {
                dev_err(&hdev->pdev->dev,
-                       "port vlan command, send fail, ret =%d.\n",
-                       ret);
+                       "Set %d vport vlan filter config fail, ret =%d.\n",
+                       vport_id, ret);
                return ret;
        }
 
-       ret = hclge_set_vf_vlan_common(hdev, 0, is_kill, vlan_id, 0, proto);
-       if (ret) {
+       /* vlan 0 may be added twice when 8021q module is enabled */
+       if (!is_kill && !vlan_id &&
+           test_bit(vport_id, hdev->vlan_table[vlan_id]))
+               return 0;
+
+       if (!is_kill && test_and_set_bit(vport_id, hdev->vlan_table[vlan_id])) {
                dev_err(&hdev->pdev->dev,
-                       "Set pf vlan filter config fail, ret =%d.\n",
-                       ret);
-               return -EIO;
+                       "Add port vlan failed, vport %d is already in vlan %d\n",
+                       vport_id, vlan_id);
+               return -EINVAL;
        }
 
-       return 0;
+       if (is_kill &&
+           !test_and_clear_bit(vport_id, hdev->vlan_table[vlan_id])) {
+               dev_err(&hdev->pdev->dev,
+                       "Delete port vlan failed, vport %d is not in vlan %d\n",
+                       vport_id, vlan_id);
+               return -EINVAL;
+       }
+
+       for_each_set_bit(vport_idx, hdev->vlan_table[vlan_id], VLAN_N_VID)
+               vport_num++;
+
+       if ((is_kill && vport_num == 0) || (!is_kill && vport_num == 1))
+               ret = hclge_set_port_vlan_filter(hdev, proto, vlan_id,
+                                                is_kill);
+
+       return ret;
+}
+
+int hclge_set_vlan_filter(struct hnae3_handle *handle, __be16 proto,
+                         u16 vlan_id, bool is_kill)
+{
+       struct hclge_vport *vport = hclge_get_vport(handle);
+       struct hclge_dev *hdev = vport->back;
+
+       return hclge_set_vlan_filter_hw(hdev, proto, vport->vport_id, vlan_id,
+                                       0, is_kill);
 }
 
 static int hclge_set_vf_vlan_filter(struct hnae3_handle *handle, int vfid,
@@ -4653,7 +4671,7 @@ static int hclge_set_vf_vlan_filter(struct hnae3_handle *handle, int vfid,
        if (proto != htons(ETH_P_8021Q))
                return -EPROTONOSUPPORT;
 
-       return hclge_set_vf_vlan_common(hdev, vfid, false, vlan, qos, proto);
+       return hclge_set_vlan_filter_hw(hdev, proto, vfid, vlan, qos, false);
 }
 
 static int hclge_set_vlan_tx_offload_cfg(struct hclge_vport *vport)
@@ -4818,10 +4836,10 @@ static int hclge_init_vlan_config(struct hclge_dev *hdev)
        }
 
        handle = &hdev->vport[0].nic;
-       return hclge_set_port_vlan_filter(handle, htons(ETH_P_8021Q), 0, false);
+       return hclge_set_vlan_filter(handle, htons(ETH_P_8021Q), 0, false);
 }
 
-static int hclge_en_hw_strip_rxvtag(struct hnae3_handle *handle, bool enable)
+int hclge_en_hw_strip_rxvtag(struct hnae3_handle *handle, bool enable)
 {
        struct hclge_vport *vport = hclge_get_vport(handle);
 
@@ -5166,12 +5184,6 @@ static int hclge_set_pauseparam(struct hnae3_handle *handle, u32 auto_neg,
        struct phy_device *phydev = hdev->hw.mac.phydev;
        u32 fc_autoneg;
 
-       /* Only support flow control negotiation for netdev with
-        * phy attached for now.
-        */
-       if (!phydev)
-               return -EOPNOTSUPP;
-
        fc_autoneg = hclge_get_autoneg(handle);
        if (auto_neg != fc_autoneg) {
                dev_info(&hdev->pdev->dev,
@@ -5190,6 +5202,12 @@ static int hclge_set_pauseparam(struct hnae3_handle *handle, u32 auto_neg,
        if (!fc_autoneg)
                return hclge_cfg_pauseparam(hdev, rx_en, tx_en);
 
+       /* Only support flow control negotiation for netdev with
+        * phy attached for now.
+        */
+       if (!phydev)
+               return -EOPNOTSUPP;
+
        return phy_start_aneg(phydev);
 }
 
@@ -5282,7 +5300,7 @@ static int hclge_init_client_instance(struct hnae3_client *client,
                        vport->nic.client = client;
                        ret = client->ops->init_instance(&vport->nic);
                        if (ret)
-                               goto err;
+                               return ret;
 
                        if (hdev->roce_client &&
                            hnae3_dev_roce_supported(hdev)) {
@@ -5290,11 +5308,11 @@ static int hclge_init_client_instance(struct hnae3_client *client,
 
                                ret = hclge_init_roce_base_info(vport);
                                if (ret)
-                                       goto err;
+                                       return ret;
 
                                ret = rc->ops->init_instance(&vport->roce);
                                if (ret)
-                                       goto err;
+                                       return ret;
                        }
 
                        break;
@@ -5304,7 +5322,7 @@ static int hclge_init_client_instance(struct hnae3_client *client,
 
                        ret = client->ops->init_instance(&vport->nic);
                        if (ret)
-                               goto err;
+                               return ret;
 
                        break;
                case HNAE3_CLIENT_ROCE:
@@ -5316,18 +5334,16 @@ static int hclge_init_client_instance(struct hnae3_client *client,
                        if (hdev->roce_client && hdev->nic_client) {
                                ret = hclge_init_roce_base_info(vport);
                                if (ret)
-                                       goto err;
+                                       return ret;
 
                                ret = client->ops->init_instance(&vport->roce);
                                if (ret)
-                                       goto err;
+                                       return ret;
                        }
                }
        }
 
        return 0;
-err:
-       return ret;
 }
 
 static void hclge_uninit_client_instance(struct hnae3_client *client,
@@ -5364,7 +5380,7 @@ static int hclge_pci_init(struct hclge_dev *hdev)
        ret = pci_enable_device(pdev);
        if (ret) {
                dev_err(&pdev->dev, "failed to enable PCI device\n");
-               goto err_no_drvdata;
+               return ret;
        }
 
        ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
@@ -5402,8 +5418,6 @@ static int hclge_pci_init(struct hclge_dev *hdev)
        pci_release_regions(pdev);
 err_disable_device:
        pci_disable_device(pdev);
-err_no_drvdata:
-       pci_set_drvdata(pdev, NULL);
 
        return ret;
 }
@@ -5412,6 +5426,7 @@ static void hclge_pci_uninit(struct hclge_dev *hdev)
 {
        struct pci_dev *pdev = hdev->pdev;
 
+       pcim_iounmap(pdev, hdev->hw.io_base);
        pci_free_irq_vectors(pdev);
        pci_clear_master(pdev);
        pci_release_mem_regions(pdev);
@@ -5427,7 +5442,7 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
        hdev = devm_kzalloc(&pdev->dev, sizeof(*hdev), GFP_KERNEL);
        if (!hdev) {
                ret = -ENOMEM;
-               goto err_hclge_dev;
+               goto out;
        }
 
        hdev->pdev = pdev;
@@ -5440,38 +5455,38 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
        ret = hclge_pci_init(hdev);
        if (ret) {
                dev_err(&pdev->dev, "PCI init failed\n");
-               goto err_pci_init;
+               goto out;
        }
 
        /* Firmware command queue initialize */
        ret = hclge_cmd_queue_init(hdev);
        if (ret) {
                dev_err(&pdev->dev, "Cmd queue init failed, ret = %d.\n", ret);
-               return ret;
+               goto err_pci_uninit;
        }
 
        /* Firmware command initialize */
        ret = hclge_cmd_init(hdev);
        if (ret)
-               goto err_cmd_init;
+               goto err_cmd_uninit;
 
        ret = hclge_get_cap(hdev);
        if (ret) {
                dev_err(&pdev->dev, "get hw capability error, ret = %d.\n",
                        ret);
-               return ret;
+               goto err_cmd_uninit;
        }
 
        ret = hclge_configure(hdev);
        if (ret) {
                dev_err(&pdev->dev, "Configure dev error, ret = %d.\n", ret);
-               return ret;
+               goto err_cmd_uninit;
        }
 
        ret = hclge_init_msi(hdev);
        if (ret) {
                dev_err(&pdev->dev, "Init MSI/MSI-X error, ret = %d.\n", ret);
-               return ret;
+               goto err_cmd_uninit;
        }
 
        ret = hclge_misc_irq_init(hdev);
@@ -5479,69 +5494,71 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
                dev_err(&pdev->dev,
                        "Misc IRQ(vector0) init error, ret = %d.\n",
                        ret);
-               return ret;
+               goto err_msi_uninit;
        }
 
        ret = hclge_alloc_tqps(hdev);
        if (ret) {
                dev_err(&pdev->dev, "Allocate TQPs error, ret = %d.\n", ret);
-               return ret;
+               goto err_msi_irq_uninit;
        }
 
        ret = hclge_alloc_vport(hdev);
        if (ret) {
                dev_err(&pdev->dev, "Allocate vport error, ret = %d.\n", ret);
-               return ret;
+               goto err_msi_irq_uninit;
        }
 
        ret = hclge_map_tqp(hdev);
        if (ret) {
                dev_err(&pdev->dev, "Map tqp error, ret = %d.\n", ret);
-               return ret;
+               goto err_msi_irq_uninit;
        }
 
-       ret = hclge_mac_mdio_config(hdev);
-       if (ret) {
-               dev_warn(&hdev->pdev->dev,
-                        "mdio config fail ret=%d\n", ret);
-               return ret;
+       if (hdev->hw.mac.media_type == HNAE3_MEDIA_TYPE_COPPER) {
+               ret = hclge_mac_mdio_config(hdev);
+               if (ret) {
+                       dev_err(&hdev->pdev->dev,
+                               "mdio config fail ret=%d\n", ret);
+                       goto err_msi_irq_uninit;
+               }
        }
 
        ret = hclge_mac_init(hdev);
        if (ret) {
                dev_err(&pdev->dev, "Mac init error, ret = %d\n", ret);
-               return ret;
+               goto err_mdiobus_unreg;
        }
 
        ret = hclge_config_tso(hdev, HCLGE_TSO_MSS_MIN, HCLGE_TSO_MSS_MAX);
        if (ret) {
                dev_err(&pdev->dev, "Enable tso fail, ret =%d\n", ret);
-               return ret;
+               goto err_mdiobus_unreg;
        }
 
        ret = hclge_init_vlan_config(hdev);
        if (ret) {
                dev_err(&pdev->dev, "VLAN init fail, ret =%d\n", ret);
-               return  ret;
+               goto err_mdiobus_unreg;
        }
 
        ret = hclge_tm_schd_init(hdev);
        if (ret) {
                dev_err(&pdev->dev, "tm schd init fail, ret =%d\n", ret);
-               return ret;
+               goto err_mdiobus_unreg;
        }
 
        hclge_rss_init_cfg(hdev);
        ret = hclge_rss_init_hw(hdev);
        if (ret) {
                dev_err(&pdev->dev, "Rss init fail, ret =%d\n", ret);
-               return ret;
+               goto err_mdiobus_unreg;
        }
 
        ret = init_mgr_tbl(hdev);
        if (ret) {
                dev_err(&pdev->dev, "manager table init fail, ret =%d\n", ret);
-               return ret;
+               goto err_mdiobus_unreg;
        }
 
        hclge_dcb_ops_set(hdev);
@@ -5564,11 +5581,21 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
        pr_info("%s driver initialization finished.\n", HCLGE_DRIVER_NAME);
        return 0;
 
-err_cmd_init:
+err_mdiobus_unreg:
+       if (hdev->hw.mac.phydev)
+               mdiobus_unregister(hdev->hw.mac.mdio_bus);
+err_msi_irq_uninit:
+       hclge_misc_irq_uninit(hdev);
+err_msi_uninit:
+       pci_free_irq_vectors(pdev);
+err_cmd_uninit:
+       hclge_destroy_cmd_queue(&hdev->hw);
+err_pci_uninit:
+       pcim_iounmap(pdev, hdev->hw.io_base);
+       pci_clear_master(pdev);
        pci_release_regions(pdev);
-err_pci_init:
-       pci_set_drvdata(pdev, NULL);
-err_hclge_dev:
+       pci_disable_device(pdev);
+out:
        return ret;
 }
 
@@ -5586,6 +5613,7 @@ static int hclge_reset_ae_dev(struct hnae3_ae_dev *ae_dev)
        set_bit(HCLGE_STATE_DOWN, &hdev->state);
 
        hclge_stats_clear(hdev);
+       memset(hdev->vlan_table, 0, sizeof(hdev->vlan_table));
 
        ret = hclge_cmd_init(hdev);
        if (ret) {
@@ -5658,9 +5686,6 @@ static void hclge_uninit_ae_dev(struct hnae3_ae_dev *ae_dev)
 
        set_bit(HCLGE_STATE_DOWN, &hdev->state);
 
-       if (IS_ENABLED(CONFIG_PCI_IOV))
-               hclge_disable_sriov(hdev);
-
        if (hdev->service_timer.function)
                del_timer_sync(&hdev->service_timer);
        if (hdev->service_task.func)
@@ -6203,7 +6228,7 @@ static const struct hnae3_ae_ops hclge_ops = {
        .get_fw_version = hclge_get_fw_version,
        .get_mdix_mode = hclge_get_mdix_mode,
        .enable_vlan_filter = hclge_enable_vlan_filter,
-       .set_vlan_filter = hclge_set_port_vlan_filter,
+       .set_vlan_filter = hclge_set_vlan_filter,
        .set_vf_vlan_filter = hclge_set_vf_vlan_filter,
        .enable_hw_strip_rxvtag = hclge_en_hw_strip_rxvtag,
        .reset_event = hclge_reset_event,
@@ -6228,7 +6253,9 @@ static int hclge_init(void)
 {
        pr_info("%s is initializing\n", HCLGE_NAME);
 
-       return hnae3_register_ae_algo(&ae_algo);
+       hnae3_register_ae_algo(&ae_algo);
+
+       return 0;
 }
 
 static void hclge_exit(void)
index 0f4157e7128215467e22b04654046182d8387425..93177d91eea4bed5ff0570f4ac37529e0761d76a 100644 (file)
 #include <linux/fs.h>
 #include <linux/types.h>
 #include <linux/phy.h>
+#include <linux/if_vlan.h>
+
 #include "hclge_cmd.h"
 #include "hnae3.h"
 
-#define HCLGE_MOD_VERSION "v1.0"
+#define HCLGE_MOD_VERSION "1.0"
 #define HCLGE_DRIVER_NAME "hclge"
 
 #define HCLGE_INVALID_VPORT 0xffff
@@ -406,9 +408,9 @@ struct hclge_mac_stats {
        u64 mac_tx_1519_2047_oct_pkt_num;
        u64 mac_tx_2048_4095_oct_pkt_num;
        u64 mac_tx_4096_8191_oct_pkt_num;
-       u64 mac_tx_8192_12287_oct_pkt_num; /* valid for GE MAC only */
-       u64 mac_tx_8192_9216_oct_pkt_num; /* valid for LGE & CGE MAC only */
-       u64 mac_tx_9217_12287_oct_pkt_num; /* valid for LGE & CGE MAC */
+       u64 rsv0;
+       u64 mac_tx_8192_9216_oct_pkt_num;
+       u64 mac_tx_9217_12287_oct_pkt_num;
        u64 mac_tx_12288_16383_oct_pkt_num;
        u64 mac_tx_1519_max_good_oct_pkt_num;
        u64 mac_tx_1519_max_bad_oct_pkt_num;
@@ -433,9 +435,9 @@ struct hclge_mac_stats {
        u64 mac_rx_1519_2047_oct_pkt_num;
        u64 mac_rx_2048_4095_oct_pkt_num;
        u64 mac_rx_4096_8191_oct_pkt_num;
-       u64 mac_rx_8192_12287_oct_pkt_num;/* valid for GE MAC only */
-       u64 mac_rx_8192_9216_oct_pkt_num; /* valid for LGE & CGE MAC only */
-       u64 mac_rx_9217_12287_oct_pkt_num; /* valid for LGE & CGE MAC only */
+       u64 rsv1;
+       u64 mac_rx_8192_9216_oct_pkt_num;
+       u64 mac_rx_9217_12287_oct_pkt_num;
        u64 mac_rx_12288_16383_oct_pkt_num;
        u64 mac_rx_1519_max_good_oct_pkt_num;
        u64 mac_rx_1519_max_bad_oct_pkt_num;
@@ -471,6 +473,7 @@ struct hclge_vlan_type_cfg {
        u16 tx_in_vlan_type;
 };
 
+#define HCLGE_VPORT_NUM 256
 struct hclge_dev {
        struct pci_dev *pdev;
        struct hnae3_ae_dev *ae_dev;
@@ -562,6 +565,7 @@ struct hclge_dev {
 
        u64 rx_pkts_for_led;
        u64 tx_pkts_for_led;
+       unsigned long vlan_table[VLAN_N_VID][BITS_TO_LONGS(HCLGE_VPORT_NUM)];
 };
 
 /* VPort level vlan tag configuration for TX direction */
@@ -646,8 +650,9 @@ static inline int hclge_get_queue_id(struct hnae3_queue *queue)
 }
 
 int hclge_cfg_mac_speed_dup(struct hclge_dev *hdev, int speed, u8 duplex);
-int hclge_set_vf_vlan_common(struct hclge_dev *vport, int vfid,
-                            bool is_kill, u16 vlan, u8 qos, __be16 proto);
+int hclge_set_vlan_filter(struct hnae3_handle *handle, __be16 proto,
+                         u16 vlan_id, bool is_kill);
+int hclge_en_hw_strip_rxvtag(struct hnae3_handle *handle, bool enable);
 
 int hclge_buffer_alloc(struct hclge_dev *hdev);
 int hclge_rss_init_hw(struct hclge_dev *hdev);
index a6f7ffa9c25975df834dae0e777d369efc29ccd8..b6ae26ba0a467dcabe26610e34d5b02203f7f0cd 100644 (file)
@@ -264,19 +264,23 @@ static int hclge_set_vf_vlan_cfg(struct hclge_vport *vport,
                                 struct hclge_mbx_vf_to_pf_cmd *mbx_req,
                                 bool gen_resp)
 {
-       struct hclge_dev *hdev = vport->back;
        int status = 0;
 
        if (mbx_req->msg[1] == HCLGE_MBX_VLAN_FILTER) {
+               struct hnae3_handle *handle = &vport->nic;
                u16 vlan, proto;
                bool is_kill;
 
                is_kill = !!mbx_req->msg[2];
                memcpy(&vlan, &mbx_req->msg[3], sizeof(vlan));
                memcpy(&proto, &mbx_req->msg[5], sizeof(proto));
-               status = hclge_set_vf_vlan_common(hdev, vport->vport_id,
-                                                 is_kill, vlan, 0,
-                                                 cpu_to_be16(proto));
+               status = hclge_set_vlan_filter(handle, cpu_to_be16(proto),
+                                              vlan, is_kill);
+       } else if (mbx_req->msg[1] == HCLGE_MBX_VLAN_RX_OFF_CFG) {
+               struct hnae3_handle *handle = &vport->nic;
+               bool en = mbx_req->msg[2] ? true : false;
+
+               status = hclge_en_hw_strip_rxvtag(handle, en);
        }
 
        if (gen_resp)
index 682c2d6618e7b547c544e3a92d86a83eca554fd8..9f7932e423b5ec3efdad1b7d0cce4f39a1847e07 100644 (file)
@@ -140,8 +140,11 @@ int hclge_mac_mdio_config(struct hclge_dev *hdev)
        struct mii_bus *mdio_bus;
        int ret;
 
-       if (hdev->hw.mac.phy_addr >= PHY_MAX_ADDR)
-               return 0;
+       if (hdev->hw.mac.phy_addr >= PHY_MAX_ADDR) {
+               dev_err(&hdev->pdev->dev, "phy_addr(%d) is too large.\n",
+                       hdev->hw.mac.phy_addr);
+               return -EINVAL;
+       }
 
        mdio_bus = devm_mdiobus_alloc(&hdev->pdev->dev);
        if (!mdio_bus)
index 885f25cd7be49fa7669341d629abcef706fe9ffc..262c125f81375a8f91f9bccc899144ad01f2605a 100644 (file)
@@ -134,11 +134,8 @@ static int hclge_pfc_stats_get(struct hclge_dev *hdev,
        }
 
        ret = hclge_cmd_send(&hdev->hw, desc, HCLGE_TM_PFC_PKT_GET_CMD_NUM);
-       if (ret) {
-               dev_err(&hdev->pdev->dev,
-                       "Get pfc pause stats fail, ret = %d.\n", ret);
+       if (ret)
                return ret;
-       }
 
        for (i = 0; i < HCLGE_TM_PFC_PKT_GET_CMD_NUM; i++) {
                struct hclge_pfc_stats_cmd *pfc_stats =
@@ -503,7 +500,8 @@ static int hclge_tm_qs_schd_mode_cfg(struct hclge_dev *hdev, u16 qs_id, u8 mode)
        return hclge_cmd_send(&hdev->hw, &desc, 1);
 }
 
-static int hclge_tm_qs_bp_cfg(struct hclge_dev *hdev, u8 tc)
+static int hclge_tm_qs_bp_cfg(struct hclge_dev *hdev, u8 tc, u8 grp_id,
+                             u32 bit_map)
 {
        struct hclge_bp_to_qs_map_cmd *bp_to_qs_map_cmd;
        struct hclge_desc desc;
@@ -514,9 +512,8 @@ static int hclge_tm_qs_bp_cfg(struct hclge_dev *hdev, u8 tc)
        bp_to_qs_map_cmd = (struct hclge_bp_to_qs_map_cmd *)desc.data;
 
        bp_to_qs_map_cmd->tc_id = tc;
-
-       /* Qset and tc is one by one mapping */
-       bp_to_qs_map_cmd->qs_bit_map = cpu_to_le32(1 << tc);
+       bp_to_qs_map_cmd->qs_group_id = grp_id;
+       bp_to_qs_map_cmd->qs_bit_map = cpu_to_le32(bit_map);
 
        return hclge_cmd_send(&hdev->hw, &desc, 1);
 }
@@ -1170,6 +1167,41 @@ static int hclge_pfc_setup_hw(struct hclge_dev *hdev)
                                      hdev->tm_info.hw_pfc_map);
 }
 
+/* Each Tc has a 1024 queue sets to backpress, it divides to
+ * 32 group, each group contains 32 queue sets, which can be
+ * represented by u32 bitmap.
+ */
+static int hclge_bp_setup_hw(struct hclge_dev *hdev, u8 tc)
+{
+       struct hclge_vport *vport = hdev->vport;
+       u32 i, k, qs_bitmap;
+       int ret;
+
+       for (i = 0; i < HCLGE_BP_GRP_NUM; i++) {
+               qs_bitmap = 0;
+
+               for (k = 0; k < hdev->num_alloc_vport; k++) {
+                       u16 qs_id = vport->qs_offset + tc;
+                       u8 grp, sub_grp;
+
+                       grp = hnae_get_field(qs_id, HCLGE_BP_GRP_ID_M,
+                                            HCLGE_BP_GRP_ID_S);
+                       sub_grp = hnae_get_field(qs_id, HCLGE_BP_SUB_GRP_ID_M,
+                                                HCLGE_BP_SUB_GRP_ID_S);
+                       if (i == grp)
+                               qs_bitmap |= (1 << sub_grp);
+
+                       vport++;
+               }
+
+               ret = hclge_tm_qs_bp_cfg(hdev, tc, i, qs_bitmap);
+               if (ret)
+                       return ret;
+       }
+
+       return 0;
+}
+
 static int hclge_mac_pause_setup_hw(struct hclge_dev *hdev)
 {
        bool tx_en, rx_en;
@@ -1221,7 +1253,7 @@ int hclge_pause_setup_hw(struct hclge_dev *hdev)
                dev_warn(&hdev->pdev->dev, "set pfc pause failed:%d\n", ret);
 
        for (i = 0; i < hdev->tm_info.num_tc; i++) {
-               ret = hclge_tm_qs_bp_cfg(hdev, i);
+               ret = hclge_bp_setup_hw(hdev, i);
                if (ret)
                        return ret;
        }
index 2dbe177581e982c1ecf3e115a6cf9d078f680df3..c2b6e8a6700f067fa38511fdc34b27a1eb1dc75e 100644 (file)
@@ -89,6 +89,11 @@ struct hclge_pg_shapping_cmd {
        __le32 pg_shapping_para;
 };
 
+#define HCLGE_BP_GRP_NUM               32
+#define HCLGE_BP_SUB_GRP_ID_S          0
+#define HCLGE_BP_SUB_GRP_ID_M          GENMASK(4, 0)
+#define HCLGE_BP_GRP_ID_S              5
+#define HCLGE_BP_GRP_ID_M              GENMASK(9, 5)
 struct hclge_bp_to_qs_map_cmd {
        u8 tc_id;
        u8 rsvd[2];
index 2b8426412cc9ae504b24423601a539c8502f2573..2b0e3295989fd811db9dfb41d75720ea9c4bac03 100644 (file)
@@ -830,6 +830,17 @@ static int hclgevf_set_vlan_filter(struct hnae3_handle *handle,
                                    HCLGEVF_VLAN_MBX_MSG_LEN, false, NULL, 0);
 }
 
+static int hclgevf_en_hw_strip_rxvtag(struct hnae3_handle *handle, bool enable)
+{
+       struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+       u8 msg_data;
+
+       msg_data = enable ? 1 : 0;
+       return hclgevf_send_mbx_msg(hdev, HCLGE_MBX_SET_VLAN,
+                                   HCLGE_MBX_VLAN_RX_OFF_CFG, &msg_data,
+                                   1, false, NULL, 0);
+}
+
 static void hclgevf_reset_tqp(struct hnae3_handle *handle, u16 queue_id)
 {
        struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
@@ -1552,7 +1563,7 @@ static int hclgevf_pci_init(struct hclgevf_dev *hdev)
        ret = pci_enable_device(pdev);
        if (ret) {
                dev_err(&pdev->dev, "failed to enable PCI device\n");
-               goto err_no_drvdata;
+               return ret;
        }
 
        ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
@@ -1584,8 +1595,7 @@ static int hclgevf_pci_init(struct hclgevf_dev *hdev)
        pci_release_regions(pdev);
 err_disable_device:
        pci_disable_device(pdev);
-err_no_drvdata:
-       pci_set_drvdata(pdev, NULL);
+
        return ret;
 }
 
@@ -1597,7 +1607,6 @@ static void hclgevf_pci_uninit(struct hclgevf_dev *hdev)
        pci_clear_master(pdev);
        pci_release_regions(pdev);
        pci_disable_device(pdev);
-       pci_set_drvdata(pdev, NULL);
 }
 
 static int hclgevf_init_hdev(struct hclgevf_dev *hdev)
@@ -1625,6 +1634,10 @@ static int hclgevf_init_hdev(struct hclgevf_dev *hdev)
 
        hclgevf_state_init(hdev);
 
+       ret = hclgevf_cmd_init(hdev);
+       if (ret)
+               goto err_cmd_init;
+
        ret = hclgevf_misc_irq_init(hdev);
        if (ret) {
                dev_err(&pdev->dev, "failed(%d) to init Misc IRQ(vector0)\n",
@@ -1632,10 +1645,6 @@ static int hclgevf_init_hdev(struct hclgevf_dev *hdev)
                goto err_misc_irq_init;
        }
 
-       ret = hclgevf_cmd_init(hdev);
-       if (ret)
-               goto err_cmd_init;
-
        ret = hclgevf_configure(hdev);
        if (ret) {
                dev_err(&pdev->dev, "failed(%d) to fetch configuration\n", ret);
@@ -1683,10 +1692,10 @@ static int hclgevf_init_hdev(struct hclgevf_dev *hdev)
        return 0;
 
 err_config:
-       hclgevf_cmd_uninit(hdev);
-err_cmd_init:
        hclgevf_misc_irq_uninit(hdev);
 err_misc_irq_init:
+       hclgevf_cmd_uninit(hdev);
+err_cmd_init:
        hclgevf_state_uninit(hdev);
        hclgevf_uninit_msi(hdev);
 err_irq_init:
@@ -1696,9 +1705,9 @@ static int hclgevf_init_hdev(struct hclgevf_dev *hdev)
 
 static void hclgevf_uninit_hdev(struct hclgevf_dev *hdev)
 {
-       hclgevf_cmd_uninit(hdev);
-       hclgevf_misc_irq_uninit(hdev);
        hclgevf_state_uninit(hdev);
+       hclgevf_misc_irq_uninit(hdev);
+       hclgevf_cmd_uninit(hdev);
        hclgevf_uninit_msi(hdev);
        hclgevf_pci_uninit(hdev);
 }
@@ -1825,6 +1834,7 @@ static const struct hnae3_ae_ops hclgevf_ops = {
        .get_tc_size = hclgevf_get_tc_size,
        .get_fw_version = hclgevf_get_fw_version,
        .set_vlan_filter = hclgevf_set_vlan_filter,
+       .enable_hw_strip_rxvtag = hclgevf_en_hw_strip_rxvtag,
        .reset_event = hclgevf_reset_event,
        .get_channels = hclgevf_get_channels,
        .get_tqps_and_rss_info = hclgevf_get_tqps_and_rss_info,
@@ -1842,7 +1852,9 @@ static int hclgevf_init(void)
 {
        pr_info("%s is initializing\n", HCLGEVF_NAME);
 
-       return hnae3_register_ae_algo(&ae_algovf);
+       hnae3_register_ae_algo(&ae_algovf);
+
+       return 0;
 }
 
 static void hclgevf_exit(void)
index a477a7c36bbd30a2c2a9ffd19352da8166fb1c0d..9763e742e6fbae2f4be013b35b9213ca2543a36d 100644 (file)
@@ -9,7 +9,7 @@
 #include "hclgevf_cmd.h"
 #include "hnae3.h"
 
-#define HCLGEVF_MOD_VERSION "v1.0"
+#define HCLGEVF_MOD_VERSION "1.0"
 #define HCLGEVF_DRIVER_NAME "hclgevf"
 
 #define HCLGEVF_ROCEE_VECTOR_NUM       0
index eb53bd93065e0da12ddd744feffdd34f853c952e..5b122728dcb472c9f9849cb2a82a08e5b4917cca 100644 (file)
@@ -51,7 +51,9 @@ static unsigned int rx_weight = 64;
 module_param(rx_weight, uint, 0644);
 MODULE_PARM_DESC(rx_weight, "Number Rx packets for NAPI budget (default=64)");
 
-#define PCI_DEVICE_ID_HI1822_PF         0x1822
+#define HINIC_DEV_ID_QUAD_PORT_25GE     0x1822
+#define HINIC_DEV_ID_DUAL_PORT_25GE     0x0200
+#define HINIC_DEV_ID_DUAL_PORT_100GE    0x0201
 
 #define HINIC_WQ_NAME                   "hinic_dev"
 
@@ -1097,7 +1099,9 @@ static void hinic_remove(struct pci_dev *pdev)
 }
 
 static const struct pci_device_id hinic_pci_table[] = {
-       { PCI_VDEVICE(HUAWEI, PCI_DEVICE_ID_HI1822_PF), 0},
+       { PCI_VDEVICE(HUAWEI, HINIC_DEV_ID_QUAD_PORT_25GE), 0},
+       { PCI_VDEVICE(HUAWEI, HINIC_DEV_ID_DUAL_PORT_25GE), 0},
+       { PCI_VDEVICE(HUAWEI, HINIC_DEV_ID_DUAL_PORT_100GE), 0},
        { 0, 0}
 };
 MODULE_DEVICE_TABLE(pci, hinic_pci_table);
index 2df01ad98df77d6dbdaa0873cc771bb910346fc6..4bb4646a5f92f5617b6a3306a5002e180fc9cca1 100644 (file)
@@ -192,6 +192,7 @@ static int alloc_long_term_buff(struct ibmvnic_adapter *adapter,
        if (adapter->fw_done_rc) {
                dev_err(dev, "Couldn't map long term buffer,rc = %d\n",
                        adapter->fw_done_rc);
+               dma_free_coherent(dev, ltb->size, ltb->buff, ltb->addr);
                return -1;
        }
        return 0;
@@ -1128,7 +1129,7 @@ static void clean_rx_pools(struct ibmvnic_adapter *adapter)
        if (!adapter->rx_pool)
                return;
 
-       rx_scrqs = be32_to_cpu(adapter->login_rsp_buf->num_rxadd_subcrqs);
+       rx_scrqs = adapter->num_active_rx_pools;
        rx_entries = adapter->req_rx_add_entries_per_subcrq;
 
        /* Free any remaining skbs in the rx buffer pools */
@@ -1177,7 +1178,7 @@ static void clean_tx_pools(struct ibmvnic_adapter *adapter)
        if (!adapter->tx_pool || !adapter->tso_pool)
                return;
 
-       tx_scrqs = be32_to_cpu(adapter->login_rsp_buf->num_txsubm_subcrqs);
+       tx_scrqs = adapter->num_active_tx_pools;
 
        /* Free any remaining skbs in the tx buffer pools */
        for (i = 0; i < tx_scrqs; i++) {
@@ -1821,9 +1822,8 @@ static int do_reset(struct ibmvnic_adapter *adapter,
                        if (rc)
                                return rc;
                }
+               ibmvnic_disable_irqs(adapter);
        }
-
-       ibmvnic_disable_irqs(adapter);
        adapter->state = VNIC_CLOSED;
 
        if (reset_state == VNIC_CLOSED)
@@ -4586,14 +4586,6 @@ static int ibmvnic_init(struct ibmvnic_adapter *adapter)
                release_crq_queue(adapter);
        }
 
-       rc = init_stats_buffers(adapter);
-       if (rc)
-               return rc;
-
-       rc = init_stats_token(adapter);
-       if (rc)
-               return rc;
-
        return rc;
 }
 
@@ -4662,13 +4654,21 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
                        goto ibmvnic_init_fail;
        } while (rc == EAGAIN);
 
+       rc = init_stats_buffers(adapter);
+       if (rc)
+               goto ibmvnic_init_fail;
+
+       rc = init_stats_token(adapter);
+       if (rc)
+               goto ibmvnic_stats_fail;
+
        netdev->mtu = adapter->req_mtu - ETH_HLEN;
        netdev->min_mtu = adapter->min_mtu - ETH_HLEN;
        netdev->max_mtu = adapter->max_mtu - ETH_HLEN;
 
        rc = device_create_file(&dev->dev, &dev_attr_failover);
        if (rc)
-               goto ibmvnic_init_fail;
+               goto ibmvnic_dev_file_err;
 
        netif_carrier_off(netdev);
        rc = register_netdev(netdev);
@@ -4687,6 +4687,12 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
 ibmvnic_register_fail:
        device_remove_file(&dev->dev, &dev_attr_failover);
 
+ibmvnic_dev_file_err:
+       release_stats_token(adapter);
+
+ibmvnic_stats_fail:
+       release_stats_buffers(adapter);
+
 ibmvnic_init_fail:
        release_sub_crqs(adapter, 1);
        release_crq_queue(adapter);
index 41ad56edfb96da5362b76ae00722731cfbe2a107..27d5f27163d2cd04f8583f9defd888c3ee1ee8bc 100644 (file)
@@ -1,31 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
-
-  Intel PRO/100 Linux driver
-  Copyright(c) 1999 - 2006 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, write to the Free Software Foundation, Inc.,
-  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  Linux NICS <linux.nics@intel.com>
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Copyright(c) 1999 - 2006 Intel Corporation. */
 
 /*
  *     e100.c: Intel(R) PRO/100 ethernet driver
index c7caadd3c8af9e7e443327a9acdc430c01a6a2fa..314c52d44b7c55dfcfd28a9a7c2fd26a95580d53 100644 (file)
@@ -1,31 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
-################################################################################
-#
-# Intel PRO/1000 Linux driver
 # Copyright(c) 1999 - 2006 Intel Corporation.
-#
-# This program is free software; you can redistribute it and/or modify it
-# under the terms and conditions of the GNU General Public License,
-# version 2, as published by the Free Software Foundation.
-#
-# This program is distributed in the hope it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-# more details.
-#
-# You should have received a copy of the GNU General Public License along with
-# this program; if not, write to the Free Software Foundation, Inc.,
-# 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-#
-# The full GNU General Public License is included in this distribution in
-# the file called "COPYING".
-#
-# Contact Information:
-# Linux NICS <linux.nics@intel.com>
-# e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-# Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-#
-################################################################################
 
 #
 # Makefile for the Intel(R) PRO/1000 ethernet driver
index 3a0feea2df54463822346bc45c70c67aad914f01..c40729b2c1844e2fd16c905e6c51426698ed6f80 100644 (file)
@@ -1,32 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
-  Intel PRO/1000 Linux driver
-  Copyright(c) 1999 - 2006 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, write to the Free Software Foundation, Inc.,
-  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  Linux NICS <linux.nics@intel.com>
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
+/* Copyright(c) 1999 - 2006 Intel Corporation. */
 
 /* Linux PRO/1000 Ethernet Driver main header file */
 
index 3e80ca170dd78cc74e1843576a061772c06c0731..5d365a986bb08a05893bb46430f09bc7004e8f23 100644 (file)
@@ -1,26 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
- * Intel PRO/1000 Linux driver
- * Copyright(c) 1999 - 2006 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * Linux NICS <linux.nics@intel.com>
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 1999 - 2006 Intel Corporation. */
 
 /* ethtool support for e1000 */
 
index 6e7e923d57bf47d078481ffd5185fcc82c6e5aa3..48428d6a00be2f0f4a59b17ff143249cd22bed73 100644 (file)
@@ -1,31 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
-*
-  Intel PRO/1000 Linux driver
-  Copyright(c) 1999 - 2006 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, write to the Free Software Foundation, Inc.,
-  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  Linux NICS <linux.nics@intel.com>
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
- */
+/* Copyright(c) 1999 - 2006 Intel Corporation. */
 
 /* e1000_hw.c
  * Shared functions for accessing and configuring the MAC
index f09c569ec19b7609209f2cb14ce05621afcd016d..b57a04954ccfbe85a876e643a0d788b3032326af 100644 (file)
@@ -1,31 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
-  Intel PRO/1000 Linux driver
-  Copyright(c) 1999 - 2006 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, write to the Free Software Foundation, Inc.,
-  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  Linux NICS <linux.nics@intel.com>
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Copyright(c) 1999 - 2006 Intel Corporation. */
 
 /* e1000_hw.h
  * Structures, enums, and macros for the MAC
index d5eb19b86a0acf5bc0422ee87df341717d8a3fd2..2110d5f2da19037d11be071566c8ce882b8644e6 100644 (file)
@@ -1,31 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
-
-  Intel PRO/1000 Linux driver
-  Copyright(c) 1999 - 2006 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, write to the Free Software Foundation, Inc.,
-  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  Linux NICS <linux.nics@intel.com>
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Copyright(c) 1999 - 2006 Intel Corporation. */
 
 #include "e1000.h"
 #include <net/ip6_checksum.h>
index ae0559b8b011ab25412810573800863f31f05318..e966bb2907977c3160674db5248642ace38da0b2 100644 (file)
@@ -1,32 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
-  Intel PRO/1000 Linux driver
-  Copyright(c) 1999 - 2006 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, write to the Free Software Foundation, Inc.,
-  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  Linux NICS <linux.nics@intel.com>
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
+/* Copyright(c) 1999 - 2006 Intel Corporation. */
 
 /* glue for the OS independent part of e1000
  * includes register access macros
index 345f23927bcc8a06a03c3dc84e4b3253b7d8328a..d3f29ffe1e475bb6d07b986157ea25c0f62e09be 100644 (file)
@@ -1,31 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
-
-  Intel PRO/1000 Linux driver
-  Copyright(c) 1999 - 2006 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, write to the Free Software Foundation, Inc.,
-  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  Linux NICS <linux.nics@intel.com>
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Copyright(c) 1999 - 2006 Intel Corporation. */
 
 #include "e1000.h"
 
index 953e99df420c03f069ebbf7b97312727eb3692f3..257bd59bc9c6ff58446f9525b7230281a6fe7d2a 100644 (file)
@@ -1,24 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Intel PRO/1000 Linux driver
- * Copyright(c) 1999 - 2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * Linux NICS <linux.nics@intel.com>
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
 
 /* 80003ES2LAN Gigabit Ethernet Controller (Copper)
  * 80003ES2LAN Gigabit Ethernet Controller (Serdes)
index ee6d1256fda4559510126edc855d463dcd5cbe7a..aa9d639c6cbbb3900ca1e8c457f5b1fc45457c8d 100644 (file)
@@ -1,24 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Intel PRO/1000 Linux driver
- * Copyright(c) 1999 - 2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * Linux NICS <linux.nics@intel.com>
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
 
 #ifndef _E1000E_80003ES2LAN_H_
 #define _E1000E_80003ES2LAN_H_
index 924f2c8dfa6cbb8179532dd6da5e6301b82650c5..b9309302c29e5cebe3b10e82a36f4265b47dbdbd 100644 (file)
@@ -1,24 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Intel PRO/1000 Linux driver
- * Copyright(c) 1999 - 2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * Linux NICS <linux.nics@intel.com>
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
 
 /* 82571EB Gigabit Ethernet Controller
  * 82571EB Gigabit Ethernet Controller (Copper)
index 9a24c645f726b8557ef76e571679ac8723a2a688..834c238d02dbabf7d72415c84076d73653dceb90 100644 (file)
@@ -1,24 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Intel PRO/1000 Linux driver
- * Copyright(c) 1999 - 2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * Linux NICS <linux.nics@intel.com>
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
 
 #ifndef _E1000E_82571_H_
 #define _E1000E_82571_H_
index 24e391a4ac68e92a2b55f5cdb4f6dbf0722ff003..44e58b6e766079d9a5e1ff5d2a99508b2ed3953f 100644 (file)
@@ -1,30 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
-################################################################################
-#
-# Intel PRO/1000 Linux driver
-# Copyright(c) 1999 - 2014 Intel Corporation.
-#
-# This program is free software; you can redistribute it and/or modify it
-# under the terms and conditions of the GNU General Public License,
-# version 2, as published by the Free Software Foundation.
-#
-# This program is distributed in the hope it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-# more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, see <http://www.gnu.org/licenses/>.
-#
-# The full GNU General Public License is included in this distribution in
-# the file called "COPYING".
-#
-# Contact Information:
-# Linux NICS <linux.nics@intel.com>
-# e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-# Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-#
-################################################################################
+# Copyright(c) 1999 - 2018 Intel Corporation.
 
 #
 # Makefile for the Intel(R) PRO/1000 ethernet driver
index 22883015a6959266657478d07094d5e05e53e623..fd550dee498200502f0940ffb1dd88daa12b22b8 100644 (file)
@@ -1,24 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Intel PRO/1000 Linux driver
- * Copyright(c) 1999 - 2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * Linux NICS <linux.nics@intel.com>
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
 
 #ifndef _E1000_DEFINES_H_
 #define _E1000_DEFINES_H_
index da88555ba1fdf3ba97f2171618b4b438484ba4f0..c760dc72c52007def58701558ef72730ba94a69b 100644 (file)
@@ -1,24 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Intel PRO/1000 Linux driver
- * Copyright(c) 1999 - 2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * Linux NICS <linux.nics@intel.com>
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
 
 /* Linux PRO/1000 Ethernet Driver main header file */
 
index 64dc0c11147faba8c6a5df1e9a4d2d20b2ad025e..e084cb734eb1e7534da82c8ae729b4ae360bac20 100644 (file)
@@ -1,24 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Intel PRO/1000 Linux driver
- * Copyright(c) 1999 - 2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * Linux NICS <linux.nics@intel.com>
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
 
 /* ethtool support for e1000 */
 
index 21802396bed68f2b662b4148c515c486ea41555a..eff75bd8a8f0b9dc08a17f3c5a36b2515c31f0cf 100644 (file)
@@ -1,24 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Intel PRO/1000 Linux driver
- * Copyright(c) 1999 - 2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * Linux NICS <linux.nics@intel.com>
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
 
 #ifndef _E1000_HW_H_
 #define _E1000_HW_H_
index 1551d6ce5341022d3d752f6209bf3207e821cb11..cdae0efde8e6415a89afeaec45de49acab763164 100644 (file)
@@ -1,24 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Intel PRO/1000 Linux driver
- * Copyright(c) 1999 - 2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * Linux NICS <linux.nics@intel.com>
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
 
 /* 82562G 10/100 Network Connection
  * 82562G-2 10/100 Network Connection
index 3c4f82c21084a44c5962ede1e01863ee6bd4fe34..eb09c755fa172314ffa8500b110d0c481d14e680 100644 (file)
@@ -1,24 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Intel PRO/1000 Linux driver
- * Copyright(c) 1999 - 2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * Linux NICS <linux.nics@intel.com>
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
 
 #ifndef _E1000E_ICH8LAN_H_
 #define _E1000E_ICH8LAN_H_
index b293464a9f2738fb7bc10399ad3bb258056102e3..4abd55d646c551bbd191bb3e2dfd4d75cb80a967 100644 (file)
@@ -1,24 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Intel PRO/1000 Linux driver
- * Copyright(c) 1999 - 2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * Linux NICS <linux.nics@intel.com>
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
 
 #include "e1000.h"
 
index cb0abf6c76a5ca323af21b5eeb4e6c4649a4cd32..6ab26111980154f220e94b4d6ec8ade68b2fe019 100644 (file)
@@ -1,24 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Intel PRO/1000 Linux driver
- * Copyright(c) 1999 - 2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * Linux NICS <linux.nics@intel.com>
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
 
 #ifndef _E1000E_MAC_H_
 #define _E1000E_MAC_H_
index e027660aeb92b62e34ba52d5d483d80c5bc8bdd0..c4c9b20bc51f9b0ecf4f3a3420b92f7faa14eed7 100644 (file)
@@ -1,24 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Intel PRO/1000 Linux driver
- * Copyright(c) 1999 - 2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * Linux NICS <linux.nics@intel.com>
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
 
 #include "e1000.h"
 
index 3268f2e58593f7eab6651e86b49da8bf380c0cc8..d868aad806d4b378d1eab18f070c12585c6ee843 100644 (file)
@@ -1,24 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Intel PRO/1000 Linux driver
- * Copyright(c) 1999 - 2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * Linux NICS <linux.nics@intel.com>
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
 
 #ifndef _E1000E_MANAGE_H_
 #define _E1000E_MANAGE_H_
index ec4a9759a6f26be5222a09d0b3cf405c35a23958..d3fef7fefea8a3cd7de04401a38f4b62efdcb181 100644 (file)
@@ -1,24 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Intel PRO/1000 Linux driver
- * Copyright(c) 1999 - 2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * Linux NICS <linux.nics@intel.com>
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
index 68949bb41b7b592ad3194d9f60dfa8d762342166..937f9af22d26779cac114714c42a49d468fcc309 100644 (file)
@@ -1,24 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Intel PRO/1000 Linux driver
- * Copyright(c) 1999 - 2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * Linux NICS <linux.nics@intel.com>
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
 
 #include "e1000.h"
 
index 8e082028be7dd1738e97cc2b5cbb30ce8d87af9e..6a30dfea411783c4235ba374bc4405220a1c6606 100644 (file)
@@ -1,24 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Intel PRO/1000 Linux driver
- * Copyright(c) 1999 - 2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * Linux NICS <linux.nics@intel.com>
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
 
 #ifndef _E1000E_NVM_H_
 #define _E1000E_NVM_H_
index 2def33eba9e67041b760a56c0d637c2b47d52756..098369fd3e6598a7395e298a770463d4c8472153 100644 (file)
@@ -1,24 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Intel PRO/1000 Linux driver
- * Copyright(c) 1999 - 2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * Linux NICS <linux.nics@intel.com>
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
 
 #include <linux/netdevice.h>
 #include <linux/module.h>
index b8226ed0e338f45e71f35a4036d97964d8491591..42233019255a35e2cd834b56ec672f8081e0e9fd 100644 (file)
@@ -1,24 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Intel PRO/1000 Linux driver
- * Copyright(c) 1999 - 2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * Linux NICS <linux.nics@intel.com>
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
 
 #include "e1000.h"
 
index d4180b5e91960c4f6b7532b8d313a823d3d4d15a..c48777d09523529c0977f1e19510aeb83d245f44 100644 (file)
@@ -1,24 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Intel PRO/1000 Linux driver
- * Copyright(c) 1999 - 2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * Linux NICS <linux.nics@intel.com>
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
 
 #ifndef _E1000E_PHY_H_
 #define _E1000E_PHY_H_
index f941e5085f44dd1399d2883f5aa3c32e934f7106..37c76945ad9baaf8c433669e9ef4310148255caa 100644 (file)
@@ -1,24 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Intel PRO/1000 Linux driver
- * Copyright(c) 1999 - 2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * Linux NICS <linux.nics@intel.com>
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
 
 /* PTP 1588 Hardware Clock (PHC)
  * Derived from PTP Hardware Clock driver for Intel 82576 and 82580 (igb)
index 16afc3c2a986cd6373118e027d3f33257b902690..47f5ca7939705191582db6968daaa715e0947d75 100644 (file)
@@ -1,24 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Intel PRO/1000 Linux driver
- * Copyright(c) 1999 - 2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * Linux NICS <linux.nics@intel.com>
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
 
 #ifndef _E1000E_REGS_H_
 #define _E1000E_REGS_H_
index 93277cb99cb7c9ec798858ed19df34c7ae348f5e..26a9746ccb14509ea07ccaa3156deefb71969c0a 100644 (file)
@@ -1,26 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
-################################################################################
-#
-# Intel(R) Ethernet Switch Host Interface Driver
-# Copyright(c) 2013 - 2016 Intel Corporation.
-#
-# This program is free software; you can redistribute it and/or modify it
-# under the terms and conditions of the GNU General Public License,
-# version 2, as published by the Free Software Foundation.
-#
-# This program is distributed in the hope it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-# more details.
-#
-# The full GNU General Public License is included in this distribution in
-# the file called "COPYING".
-#
-# Contact Information:
-# e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-# Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-#
-################################################################################
+# Copyright(c) 2013 - 2018 Intel Corporation.
 
 #
 # Makefile for the Intel(R) Ethernet Switch Host Interface Driver
index a9cdf763c59de2460c84cfdefbe9da55c170bbc5..a903a0ba45e180c39e99c6b6a0726291203b1f91 100644 (file)
@@ -1,23 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2017 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
 
 #ifndef _FM10K_H_
 #define _FM10K_H_
index e303d88720efad8b8f23240be7fee0f533f46fd6..f51a63fca513e92434e83f0c7372cc8fc22ce298 100644 (file)
@@ -1,23 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2018 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
 
 #include "fm10k_common.h"
 
index 2bdb24d2ca9d860508ebae72c45bcdddcc79372f..4c48fb73b3e78c94e92e05a5a1691ca32a5241d3 100644 (file)
@@ -1,23 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
 
 #ifndef _FM10K_COMMON_H_
 #define _FM10K_COMMON_H_
index c4f733452ef2918825091fb63a26a12ff21698ad..20768ac7f17eb9836ab248cd9e8994b39ea3bb0e 100644 (file)
@@ -1,23 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
 
 #include "fm10k.h"
 
index 43e8d839831f0916cc7d337d48eb3d6c6d008ecc..dca104121c0544963bce2c3958c09d564af5d3f2 100644 (file)
@@ -1,23 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
 
 #include "fm10k.h"
 
index 28b6b4e564879e6e6b2029c174ea4ffa7bf28079..7657daa27298dac290859c105640873e4b5f08d7 100644 (file)
@@ -1,40 +1,32 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2017 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
 
 #include <linux/vmalloc.h>
 
 #include "fm10k.h"
 
 struct fm10k_stats {
+       /* The stat_string is expected to be a format string formatted using
+        * vsnprintf by fm10k_add_stat_strings. Every member of a stats array
+        * should use the same format specifiers as they will be formatted
+        * using the same variadic arguments.
+        */
        char stat_string[ETH_GSTRING_LEN];
        int sizeof_stat;
        int stat_offset;
 };
 
-#define FM10K_NETDEV_STAT(_net_stat) { \
-       .stat_string = #_net_stat, \
-       .sizeof_stat = FIELD_SIZEOF(struct net_device_stats, _net_stat), \
-       .stat_offset = offsetof(struct net_device_stats, _net_stat) \
+#define FM10K_STAT_FIELDS(_type, _name, _stat) { \
+       .stat_string = _name, \
+       .sizeof_stat = FIELD_SIZEOF(_type, _stat), \
+       .stat_offset = offsetof(_type, _stat) \
 }
 
+/* netdevice statistics */
+#define FM10K_NETDEV_STAT(_net_stat) \
+       FM10K_STAT_FIELDS(struct net_device_stats, __stringify(_net_stat), \
+                         _net_stat)
+
 static const struct fm10k_stats fm10k_gstrings_net_stats[] = {
        FM10K_NETDEV_STAT(tx_packets),
        FM10K_NETDEV_STAT(tx_bytes),
@@ -52,11 +44,9 @@ static const struct fm10k_stats fm10k_gstrings_net_stats[] = {
 
 #define FM10K_NETDEV_STATS_LEN ARRAY_SIZE(fm10k_gstrings_net_stats)
 
-#define FM10K_STAT(_name, _stat) { \
-       .stat_string = _name, \
-       .sizeof_stat = FIELD_SIZEOF(struct fm10k_intfc, _stat), \
-       .stat_offset = offsetof(struct fm10k_intfc, _stat) \
-}
+/* General interface statistics */
+#define FM10K_STAT(_name, _stat) \
+       FM10K_STAT_FIELDS(struct fm10k_intfc, _name, _stat)
 
 static const struct fm10k_stats fm10k_gstrings_global_stats[] = {
        FM10K_STAT("tx_restart_queue", restart_queue),
@@ -93,11 +83,9 @@ static const struct fm10k_stats fm10k_gstrings_pf_stats[] = {
        FM10K_STAT("nodesc_drop", stats.nodesc_drop.count),
 };
 
-#define FM10K_MBX_STAT(_name, _stat) { \
-       .stat_string = _name, \
-       .sizeof_stat = FIELD_SIZEOF(struct fm10k_mbx_info, _stat), \
-       .stat_offset = offsetof(struct fm10k_mbx_info, _stat) \
-}
+/* mailbox statistics */
+#define FM10K_MBX_STAT(_name, _stat) \
+       FM10K_STAT_FIELDS(struct fm10k_mbx_info, _name, _stat)
 
 static const struct fm10k_stats fm10k_gstrings_mbx_stats[] = {
        FM10K_MBX_STAT("mbx_tx_busy", tx_busy),
@@ -111,15 +99,13 @@ static const struct fm10k_stats fm10k_gstrings_mbx_stats[] = {
        FM10K_MBX_STAT("mbx_rx_mbmem_pushed", rx_mbmem_pushed),
 };
 
-#define FM10K_QUEUE_STAT(_name, _stat) { \
-       .stat_string = _name, \
-       .sizeof_stat = FIELD_SIZEOF(struct fm10k_ring, _stat), \
-       .stat_offset = offsetof(struct fm10k_ring, _stat) \
-}
+/* per-queue ring statistics */
+#define FM10K_QUEUE_STAT(_name, _stat) \
+       FM10K_STAT_FIELDS(struct fm10k_ring, _name, _stat)
 
 static const struct fm10k_stats fm10k_gstrings_queue_stats[] = {
-       FM10K_QUEUE_STAT("packets", stats.packets),
-       FM10K_QUEUE_STAT("bytes", stats.bytes),
+       FM10K_QUEUE_STAT("%s_queue_%u_packets", stats.packets),
+       FM10K_QUEUE_STAT("%s_queue_%u_bytes", stats.bytes),
 };
 
 #define FM10K_GLOBAL_STATS_LEN ARRAY_SIZE(fm10k_gstrings_global_stats)
@@ -149,49 +135,44 @@ enum {
 static const char fm10k_prv_flags[FM10K_PRV_FLAG_LEN][ETH_GSTRING_LEN] = {
 };
 
-static void fm10k_add_stat_strings(u8 **p, const char *prefix,
-                                  const struct fm10k_stats stats[],
-                                  const unsigned int size)
+static void __fm10k_add_stat_strings(u8 **p, const struct fm10k_stats stats[],
+                                    const unsigned int size, ...)
 {
        unsigned int i;
 
        for (i = 0; i < size; i++) {
-               snprintf(*p, ETH_GSTRING_LEN, "%s%s",
-                        prefix, stats[i].stat_string);
+               va_list args;
+
+               va_start(args, size);
+               vsnprintf(*p, ETH_GSTRING_LEN, stats[i].stat_string, args);
                *p += ETH_GSTRING_LEN;
+               va_end(args);
        }
 }
 
+#define fm10k_add_stat_strings(p, stats, ...) \
+       __fm10k_add_stat_strings(p, stats, ARRAY_SIZE(stats), ## __VA_ARGS__)
+
 static void fm10k_get_stat_strings(struct net_device *dev, u8 *data)
 {
        struct fm10k_intfc *interface = netdev_priv(dev);
        unsigned int i;
 
-       fm10k_add_stat_strings(&data, "", fm10k_gstrings_net_stats,
-                              FM10K_NETDEV_STATS_LEN);
+       fm10k_add_stat_strings(&data, fm10k_gstrings_net_stats);
 
-       fm10k_add_stat_strings(&data, "", fm10k_gstrings_global_stats,
-                              FM10K_GLOBAL_STATS_LEN);
+       fm10k_add_stat_strings(&data, fm10k_gstrings_global_stats);
 
-       fm10k_add_stat_strings(&data, "", fm10k_gstrings_mbx_stats,
-                              FM10K_MBX_STATS_LEN);
+       fm10k_add_stat_strings(&data, fm10k_gstrings_mbx_stats);
 
        if (interface->hw.mac.type != fm10k_mac_vf)
-               fm10k_add_stat_strings(&data, "", fm10k_gstrings_pf_stats,
-                                      FM10K_PF_STATS_LEN);
+               fm10k_add_stat_strings(&data, fm10k_gstrings_pf_stats);
 
        for (i = 0; i < interface->hw.mac.max_queues; i++) {
-               char prefix[ETH_GSTRING_LEN];
-
-               snprintf(prefix, ETH_GSTRING_LEN, "tx_queue_%u_", i);
-               fm10k_add_stat_strings(&data, prefix,
-                                      fm10k_gstrings_queue_stats,
-                                      FM10K_QUEUE_STATS_LEN);
+               fm10k_add_stat_strings(&data, fm10k_gstrings_queue_stats,
+                                      "tx", i);
 
-               snprintf(prefix, ETH_GSTRING_LEN, "rx_queue_%u_", i);
-               fm10k_add_stat_strings(&data, prefix,
-                                      fm10k_gstrings_queue_stats,
-                                      FM10K_QUEUE_STATS_LEN);
+               fm10k_add_stat_strings(&data, fm10k_gstrings_queue_stats,
+                                      "rx", i);
        }
 }
 
@@ -236,9 +217,9 @@ static int fm10k_get_sset_count(struct net_device *dev, int sset)
        }
 }
 
-static void fm10k_add_ethtool_stats(u64 **data, void *pointer,
-                                   const struct fm10k_stats stats[],
-                                   const unsigned int size)
+static void __fm10k_add_ethtool_stats(u64 **data, void *pointer,
+                                     const struct fm10k_stats stats[],
+                                     const unsigned int size)
 {
        unsigned int i;
        char *p;
@@ -267,11 +248,16 @@ static void fm10k_add_ethtool_stats(u64 **data, void *pointer,
                        *((*data)++) = *(u8 *)p;
                        break;
                default:
+                       WARN_ONCE(1, "unexpected stat size for %s",
+                                 stats[i].stat_string);
                        *((*data)++) = 0;
                }
        }
 }
 
+#define fm10k_add_ethtool_stats(data, pointer, stats) \
+       __fm10k_add_ethtool_stats(data, pointer, stats, ARRAY_SIZE(stats))
+
 static void fm10k_get_ethtool_stats(struct net_device *netdev,
                                    struct ethtool_stats __always_unused *stats,
                                    u64 *data)
@@ -282,20 +268,16 @@ static void fm10k_get_ethtool_stats(struct net_device *netdev,
 
        fm10k_update_stats(interface);
 
-       fm10k_add_ethtool_stats(&data, net_stats, fm10k_gstrings_net_stats,
-                               FM10K_NETDEV_STATS_LEN);
+       fm10k_add_ethtool_stats(&data, net_stats, fm10k_gstrings_net_stats);
 
-       fm10k_add_ethtool_stats(&data, interface, fm10k_gstrings_global_stats,
-                               FM10K_GLOBAL_STATS_LEN);
+       fm10k_add_ethtool_stats(&data, interface, fm10k_gstrings_global_stats);
 
        fm10k_add_ethtool_stats(&data, &interface->hw.mbx,
-                               fm10k_gstrings_mbx_stats,
-                               FM10K_MBX_STATS_LEN);
+                               fm10k_gstrings_mbx_stats);
 
        if (interface->hw.mac.type != fm10k_mac_vf) {
                fm10k_add_ethtool_stats(&data, interface,
-                                       fm10k_gstrings_pf_stats,
-                                       FM10K_PF_STATS_LEN);
+                                       fm10k_gstrings_pf_stats);
        }
 
        for (i = 0; i < interface->hw.mac.max_queues; i++) {
@@ -303,13 +285,11 @@ static void fm10k_get_ethtool_stats(struct net_device *netdev,
 
                ring = interface->tx_ring[i];
                fm10k_add_ethtool_stats(&data, ring,
-                                       fm10k_gstrings_queue_stats,
-                                       FM10K_QUEUE_STATS_LEN);
+                                       fm10k_gstrings_queue_stats);
 
                ring = interface->rx_ring[i];
                fm10k_add_ethtool_stats(&data, ring,
-                                       fm10k_gstrings_queue_stats,
-                                       FM10K_QUEUE_STATS_LEN);
+                                       fm10k_gstrings_queue_stats);
        }
 }
 
index 30395f5e5e87ad3b5a7b4cb40832fb8d4f3c0be1..e707d717012faa997a127687ce45d54b27b9e3eb 100644 (file)
@@ -1,23 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2017 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
 
 #include "fm10k.h"
 #include "fm10k_vf.h"
index df8607097e4abda3330c3ffda7d85c1284edbf36..3f536541f45f170ab9a2c0513bc6f311501990a6 100644 (file)
@@ -1,23 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2017 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
 
 #include <linux/types.h>
 #include <linux/module.h>
@@ -445,15 +427,14 @@ static void fm10k_type_trans(struct fm10k_ring *rx_ring,
                        l2_accel = NULL;
        }
 
-       skb->protocol = eth_type_trans(skb, dev);
-
        /* Record Rx queue, or update macvlan statistics */
        if (!l2_accel)
                skb_record_rx_queue(skb, rx_ring->queue_index);
        else
                macvlan_count_rx(netdev_priv(dev), skb->len + ETH_HLEN, true,
-                                (skb->pkt_type == PACKET_BROADCAST) ||
-                                (skb->pkt_type == PACKET_MULTICAST));
+                                false);
+
+       skb->protocol = eth_type_trans(skb, dev);
 }
 
 /**
index c01bf30a0c9e1def9b6aecac61ef5676e7249f85..21021fe4f1c3ffcfb9c132fab8c72753dc9df5b5 100644 (file)
@@ -1,23 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2017 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
 
 #include "fm10k_common.h"
 
index 007e1dfa9b7a082d3030b0de6a72345385c27c35..56d1abff04e22b7018975ee522c07fc82b7f1450 100644 (file)
@@ -1,23 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
 
 #ifndef _FM10K_MBX_H_
 #define _FM10K_MBX_H_
index 45793491d4ba34dea2ffa632579ead116d4e9add..929f538d28bc03a391340ebf6cc531b03f92b7a3 100644 (file)
@@ -1,27 +1,10 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2018 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
 
 #include "fm10k.h"
 #include <linux/vmalloc.h>
 #include <net/udp_tunnel.h>
+#include <linux/if_macvlan.h>
 
 /**
  * fm10k_setup_tx_resources - allocate Tx resources (Descriptors)
@@ -924,7 +907,9 @@ static int fm10k_mc_vlan_unsync(struct net_device *netdev,
 static int fm10k_update_vid(struct net_device *netdev, u16 vid, bool set)
 {
        struct fm10k_intfc *interface = netdev_priv(netdev);
+       struct fm10k_l2_accel *l2_accel = interface->l2_accel;
        struct fm10k_hw *hw = &interface->hw;
+       u16 glort;
        s32 err;
        int i;
 
@@ -992,6 +977,22 @@ static int fm10k_update_vid(struct net_device *netdev, u16 vid, bool set)
        if (err)
                goto err_out;
 
+       /* Update L2 accelerated macvlan addresses */
+       if (l2_accel) {
+               for (i = 0; i < l2_accel->size; i++) {
+                       struct net_device *sdev = l2_accel->macvlan[i];
+
+                       if (!sdev)
+                               continue;
+
+                       glort = l2_accel->dglort + 1 + i;
+
+                       fm10k_queue_mac_request(interface, glort,
+                                               sdev->dev_addr,
+                                               vid, set);
+               }
+       }
+
        /* set VLAN ID prior to syncing/unsyncing the VLAN */
        interface->vid = vid + (set ? VLAN_N_VID : 0);
 
@@ -1231,6 +1232,22 @@ void fm10k_restore_rx_state(struct fm10k_intfc *interface)
 
                fm10k_queue_mac_request(interface, glort,
                                        hw->mac.addr, vid, true);
+
+               /* synchronize macvlan addresses */
+               if (l2_accel) {
+                       for (i = 0; i < l2_accel->size; i++) {
+                               struct net_device *sdev = l2_accel->macvlan[i];
+
+                               if (!sdev)
+                                       continue;
+
+                               glort = l2_accel->dglort + 1 + i;
+
+                               fm10k_queue_mac_request(interface, glort,
+                                                       sdev->dev_addr,
+                                                       vid, true);
+                       }
+               }
        }
 
        /* update xcast mode before synchronizing addresses if host's mailbox
@@ -1254,7 +1271,7 @@ void fm10k_restore_rx_state(struct fm10k_intfc *interface)
                        glort = l2_accel->dglort + 1 + i;
 
                        hw->mac.ops.update_xcast_mode(hw, glort,
-                                                     FM10K_XCAST_MODE_MULTI);
+                                                     FM10K_XCAST_MODE_NONE);
                        fm10k_queue_mac_request(interface, glort,
                                                sdev->dev_addr,
                                                hw->mac.default_vid, true);
@@ -1447,7 +1464,14 @@ static void *fm10k_dfwd_add_station(struct net_device *dev,
        struct fm10k_dglort_cfg dglort = { 0 };
        struct fm10k_hw *hw = &interface->hw;
        int size = 0, i;
-       u16 glort;
+       u16 vid, glort;
+
+       /* The hardware supported by fm10k only filters on the destination MAC
+        * address. In order to avoid issues we only support offloading modes
+        * where the hardware can actually provide the functionality.
+        */
+       if (!macvlan_supports_dest_filter(sdev))
+               return ERR_PTR(-EMEDIUMTYPE);
 
        /* allocate l2 accel structure if it is not available */
        if (!l2_accel) {
@@ -1513,12 +1537,18 @@ static void *fm10k_dfwd_add_station(struct net_device *dev,
 
        glort = l2_accel->dglort + 1 + i;
 
-       if (fm10k_host_mbx_ready(interface)) {
+       if (fm10k_host_mbx_ready(interface))
                hw->mac.ops.update_xcast_mode(hw, glort,
-                                             FM10K_XCAST_MODE_MULTI);
+                                             FM10K_XCAST_MODE_NONE);
+
+       fm10k_queue_mac_request(interface, glort, sdev->dev_addr,
+                               hw->mac.default_vid, true);
+
+       for (vid = fm10k_find_next_vlan(interface, 0);
+            vid < VLAN_N_VID;
+            vid = fm10k_find_next_vlan(interface, vid))
                fm10k_queue_mac_request(interface, glort, sdev->dev_addr,
-                                       hw->mac.default_vid, true);
-       }
+                                       vid, true);
 
        fm10k_mbx_unlock(interface);
 
@@ -1532,8 +1562,8 @@ static void fm10k_dfwd_del_station(struct net_device *dev, void *priv)
        struct fm10k_dglort_cfg dglort = { 0 };
        struct fm10k_hw *hw = &interface->hw;
        struct net_device *sdev = priv;
+       u16 vid, glort;
        int i;
-       u16 glort;
 
        if (!l2_accel)
                return;
@@ -1553,12 +1583,18 @@ static void fm10k_dfwd_del_station(struct net_device *dev, void *priv)
 
        glort = l2_accel->dglort + 1 + i;
 
-       if (fm10k_host_mbx_ready(interface)) {
+       if (fm10k_host_mbx_ready(interface))
                hw->mac.ops.update_xcast_mode(hw, glort,
                                              FM10K_XCAST_MODE_NONE);
+
+       fm10k_queue_mac_request(interface, glort, sdev->dev_addr,
+                               hw->mac.default_vid, false);
+
+       for (vid = fm10k_find_next_vlan(interface, 0);
+            vid < VLAN_N_VID;
+            vid = fm10k_find_next_vlan(interface, vid))
                fm10k_queue_mac_request(interface, glort, sdev->dev_addr,
-                                       hw->mac.default_vid, false);
-       }
+                                       vid, false);
 
        fm10k_mbx_unlock(interface);
 
index c4a2b688b38bc41a4ecc6458c314d45851d06e77..15071e4adb98c9924ab2e9a17c7f616f23104bd0 100644 (file)
@@ -1,23 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2018 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
 
 #include <linux/module.h>
 #include <linux/interrupt.h>
index 7ba54c534f8cbcd4f742aaad4940f183f45d77de..8f0a99b6a537fc26d8b118d24c74c82d6f38d3b5 100644 (file)
@@ -1,23 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2018 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
 
 #include "fm10k_pf.h"
 #include "fm10k_vf.h"
index ae81f9a16602a50bd6df59ab58e72a4675e636cd..8e814df709d2a33c2c44cd58708963064ec076d7 100644 (file)
@@ -1,23 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2017 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
 
 #ifndef _FM10K_PF_H_
 #define _FM10K_PF_H_
index 725ecb7abccd9a82180f90bde99ec6eb0c96b4de..2a7a40bf2b1c6b1156e1922b5e804b38c83c2340 100644 (file)
@@ -1,23 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2018 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
 
 #include "fm10k_tlv.h"
 
index 5d2ee759507ee6635ffe149d67e37a4b7fcb5e62..160bc5b78f9928f46dbb99fc2b2189d4c8956524 100644 (file)
@@ -1,23 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
 
 #ifndef _FM10K_TLV_H_
 #define _FM10K_TLV_H_
index dd23af11e2c132ae8749af94cc818f360d56e5a4..3e608e493f9df6bf921383f65be2139ad38cb288 100644 (file)
@@ -1,23 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
 
 #ifndef _FM10K_TYPE_H_
 #define _FM10K_TYPE_H_
index f06913630b39b964cab61ad5bdc34689329337f5..a8519c1f0406dd77810ed98826d7193d850cdb84 100644 (file)
@@ -1,23 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
 
 #include "fm10k_vf.h"
 
index 66a66b73a2f1f6beee1e2c92e189276d510b314e..787d0d570a289a7ed31aab12c9fe4cb7f242784f 100644 (file)
@@ -1,23 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
 
 #ifndef _FM10K_VF_H_
 #define _FM10K_VF_H_
index 75437768a07c759b5905c24b5ee230deaf316c6e..14397e7e9925e352a5454ef5207cb4f032d067e3 100644 (file)
@@ -1,29 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
-################################################################################
-#
-# Intel Ethernet Controller XL710 Family Linux Driver
-# Copyright(c) 2013 - 2015 Intel Corporation.
-#
-# This program is free software; you can redistribute it and/or modify it
-# under the terms and conditions of the GNU General Public License,
-# version 2, as published by the Free Software Foundation.
-#
-# This program is distributed in the hope it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-# more details.
-#
-# You should have received a copy of the GNU General Public License along
-# with this program.  If not, see <http://www.gnu.org/licenses/>.
-#
-# The full GNU General Public License is included in this distribution in
-# the file called "COPYING".
-#
-# Contact Information:
-# e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-# Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-#
-################################################################################
+# Copyright(c) 2013 - 2018 Intel Corporation.
 
 #
 # Makefile for the Intel(R) Ethernet Connection XL710 (i40e.ko) driver
index a44139c1de80c8f5156d364805cacc67aa18118c..7a80652e25008503ae17fbafa158337266c00f90 100644 (file)
@@ -1,29 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Driver
- * Copyright(c) 2013 - 2017 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
 
 #ifndef _I40E_H_
 #define _I40E_H_
@@ -334,10 +310,12 @@ struct i40e_tc_configuration {
        struct i40e_tc_info tc_info[I40E_MAX_TRAFFIC_CLASS];
 };
 
+#define I40E_UDP_PORT_INDEX_UNUSED     255
 struct i40e_udp_port_config {
        /* AdminQ command interface expects port number in Host byte order */
        u16 port;
        u8 type;
+       u8 filter_index;
 };
 
 /* macros related to FLX_PIT */
@@ -608,7 +586,7 @@ struct i40e_pf {
        unsigned long ptp_tx_start;
        struct hwtstamp_config tstamp_config;
        struct mutex tmreg_lock; /* Used to protect the SYSTIME registers. */
-       u64 ptp_base_adj;
+       u32 ptp_adj_mult;
        u32 tx_hwtstamp_timeouts;
        u32 tx_hwtstamp_skipped;
        u32 rx_hwtstamp_cleared;
@@ -1009,6 +987,9 @@ void i40e_service_event_schedule(struct i40e_pf *pf);
 void i40e_notify_client_of_vf_msg(struct i40e_vsi *vsi, u32 vf_id,
                                  u8 *msg, u16 len);
 
+int i40e_control_wait_tx_q(int seid, struct i40e_pf *pf, int pf_q, bool is_xdp,
+                          bool enable);
+int i40e_control_wait_rx_q(struct i40e_pf *pf, int pf_q, bool enable);
 int i40e_vsi_start_rings(struct i40e_vsi *vsi);
 void i40e_vsi_stop_rings(struct i40e_vsi *vsi);
 void i40e_vsi_stop_rings_no_wait(struct  i40e_vsi *vsi);
index 843fc7781ef8b80f1f665328f62ef5437168cd6b..ddbea79d18e5fdfac88031e21c3d8d88678b06e0 100644 (file)
@@ -1,29 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Driver
- * Copyright(c) 2013 - 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
 
 #include "i40e_status.h"
 #include "i40e_type.h"
index 0a8749ee9fd3184ec07f0310e668d28ecdf6b93e..edec3df789719aceec7fc941226eb9538f984628 100644 (file)
@@ -1,29 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Driver
- * Copyright(c) 2013 - 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
 
 #ifndef _I40E_ADMINQ_H_
 #define _I40E_ADMINQ_H_
index 0244923edeb8bb7aeadc76a883451d09d666e972..7d888e05f96f703c256142c2061bae5eb2b561b3 100644 (file)
@@ -1,29 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Driver
- * Copyright(c) 2013 - 2017 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
 
 #ifndef _I40E_ADMINQ_CMD_H_
 #define _I40E_ADMINQ_CMD_H_
index abed0c52e782d7047a1475deb4ca3d6146a7430f..cb8689222c8b77c957be573b6fad8e6d75bcaa4d 100644 (file)
@@ -1,29 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Driver
- * Copyright(c) 2013 - 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
 
 #ifndef _I40E_ALLOC_H_
 #define _I40E_ALLOC_H_
index d8ce4999864f3c0137ad30b6098a5e63fb1ef498..5f3b8b9ff511d49a13e24f30f0d2a589915e40fe 100644 (file)
@@ -1,29 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Driver
- * Copyright(c) 2013 - 2017 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
 
 #include <linux/list.h>
 #include <linux/errno.h>
@@ -64,7 +40,7 @@ static struct i40e_ops i40e_lan_ops = {
 /**
  * i40e_client_get_params - Get the params that can change at runtime
  * @vsi: the VSI with the message
- * @param: clinet param struct
+ * @params: client param struct
  *
  **/
 static
@@ -590,7 +566,7 @@ static int i40e_client_virtchnl_send(struct i40e_info *ldev,
  * i40e_client_setup_qvlist
  * @ldev: pointer to L2 context.
  * @client: Client pointer.
- * @qv_info: queue and vector list
+ * @qvlist_info: queue and vector list
  *
  * Return 0 on success or < 0 on error
  **/
@@ -665,7 +641,7 @@ static int i40e_client_setup_qvlist(struct i40e_info *ldev,
  * i40e_client_request_reset
  * @ldev: pointer to L2 context.
  * @client: Client pointer.
- * @level: reset level
+ * @reset_level: reset level
  **/
 static void i40e_client_request_reset(struct i40e_info *ldev,
                                      struct i40e_client *client,
index 9d464d40bc1731ea38b1358d9feff0750b42bb3f..72994baf494106da5acba2cfbd424bf04f7a5eb4 100644 (file)
@@ -1,29 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Driver
- * Copyright(c) 2013 - 2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
 
 #ifndef _I40E_CLIENT_H_
 #define _I40E_CLIENT_H_
index c0a3dae8a2db336555ac21b73a9c88de47604b6d..eb2d1530d3316db8e0364fe9645e24f9604eb05a 100644 (file)
@@ -1,29 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Driver
- * Copyright(c) 2013 - 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
 
 #include "i40e_type.h"
 #include "i40e_adminq.h"
@@ -1695,6 +1671,8 @@ enum i40e_status_code i40e_aq_set_phy_config(struct i40e_hw *hw,
 /**
  * i40e_set_fc
  * @hw: pointer to the hw struct
+ * @aq_failures: buffer to return AdminQ failure information
+ * @atomic_restart: whether to enable atomic link restart
  *
  * Set the requested flow control mode using set_phy_config.
  **/
@@ -2831,8 +2809,8 @@ i40e_status i40e_aq_remove_macvlan(struct i40e_hw *hw, u16 seid,
  * @mr_list: list of mirrored VSI SEIDs or VLAN IDs
  * @cmd_details: pointer to command details structure or NULL
  * @rule_id: Rule ID returned from FW
- * @rule_used: Number of rules used in internal switch
- * @rule_free: Number of rules free in internal switch
+ * @rules_used: Number of rules used in internal switch
+ * @rules_free: Number of rules free in internal switch
  *
  * Add/Delete a mirror rule to a specific switch. Mirror rules are supported for
  * VEBs/VEPA elements only
@@ -2892,8 +2870,8 @@ static i40e_status i40e_mirrorrule_op(struct i40e_hw *hw,
  * @mr_list: list of mirrored VSI SEIDs or VLAN IDs
  * @cmd_details: pointer to command details structure or NULL
  * @rule_id: Rule ID returned from FW
- * @rule_used: Number of rules used in internal switch
- * @rule_free: Number of rules free in internal switch
+ * @rules_used: Number of rules used in internal switch
+ * @rules_free: Number of rules free in internal switch
  *
  * Add mirror rule. Mirror rules are supported for VEBs or VEPA elements only
  **/
@@ -2923,8 +2901,8 @@ i40e_status i40e_aq_add_mirrorrule(struct i40e_hw *hw, u16 sw_seid,
  *             add_mirrorrule.
  * @mr_list: list of mirrored VLAN IDs to be removed
  * @cmd_details: pointer to command details structure or NULL
- * @rule_used: Number of rules used in internal switch
- * @rule_free: Number of rules free in internal switch
+ * @rules_used: Number of rules used in internal switch
+ * @rules_free: Number of rules free in internal switch
  *
  * Delete a mirror rule. Mirror rules are supported for VEBs/VEPA elements only
  **/
@@ -3672,6 +3650,8 @@ i40e_status i40e_aq_stop_lldp(struct i40e_hw *hw, bool shutdown_agent,
 /**
  * i40e_aq_start_lldp
  * @hw: pointer to the hw struct
+ * @buff: buffer for result
+ * @buff_size: buffer size
  * @cmd_details: pointer to command details structure or NULL
  *
  * Start the embedded LLDP Agent on all ports.
@@ -3752,7 +3732,6 @@ i40e_status i40e_aq_get_cee_dcb_config(struct i40e_hw *hw,
  * i40e_aq_add_udp_tunnel
  * @hw: pointer to the hw struct
  * @udp_port: the UDP port to add in Host byte order
- * @header_len: length of the tunneling header length in DWords
  * @protocol_index: protocol index type
  * @filter_index: pointer to filter index
  * @cmd_details: pointer to command details structure or NULL
@@ -3971,6 +3950,7 @@ i40e_status i40e_aq_config_vsi_tc_bw(struct i40e_hw *hw,
  * @hw: pointer to the hw struct
  * @seid: seid of the switching component connected to Physical Port
  * @ets_data: Buffer holding ETS parameters
+ * @opcode: Tx scheduler AQ command opcode
  * @cmd_details: pointer to command details structure or NULL
  **/
 i40e_status i40e_aq_config_switch_comp_ets(struct i40e_hw *hw,
@@ -4314,10 +4294,10 @@ i40e_status i40e_aq_add_rem_control_packet_filter(struct i40e_hw *hw,
  * @hw: pointer to the hw struct
  * @seid: VSI seid to add ethertype filter from
  **/
-#define I40E_FLOW_CONTROL_ETHTYPE 0x8808
 void i40e_add_filter_to_drop_tx_flow_control_frames(struct i40e_hw *hw,
                                                    u16 seid)
 {
+#define I40E_FLOW_CONTROL_ETHTYPE 0x8808
        u16 flag = I40E_AQC_ADD_CONTROL_PACKET_FLAGS_IGNORE_MAC |
                   I40E_AQC_ADD_CONTROL_PACKET_FLAGS_DROP |
                   I40E_AQC_ADD_CONTROL_PACKET_FLAGS_TX;
@@ -4448,6 +4428,7 @@ void i40e_set_pci_config_data(struct i40e_hw *hw, u16 link_status)
  * @ret_buff_size: actual buffer size returned
  * @ret_next_table: next block to read
  * @ret_next_index: next index to read
+ * @cmd_details: pointer to command details structure or NULL
  *
  * Dump internal FW/HW data for debug purposes.
  *
@@ -4574,7 +4555,7 @@ i40e_status i40e_aq_configure_partition_bw(struct i40e_hw *hw,
  * i40e_read_phy_register_clause22
  * @hw: pointer to the HW structure
  * @reg: register address in the page
- * @phy_adr: PHY address on MDIO interface
+ * @phy_addr: PHY address on MDIO interface
  * @value: PHY register value
  *
  * Reads specified PHY register value
@@ -4619,7 +4600,7 @@ i40e_status i40e_read_phy_register_clause22(struct i40e_hw *hw,
  * i40e_write_phy_register_clause22
  * @hw: pointer to the HW structure
  * @reg: register address in the page
- * @phy_adr: PHY address on MDIO interface
+ * @phy_addr: PHY address on MDIO interface
  * @value: PHY register value
  *
  * Writes specified PHY register value
@@ -4660,7 +4641,7 @@ i40e_status i40e_write_phy_register_clause22(struct i40e_hw *hw,
  * @hw: pointer to the HW structure
  * @page: registers page number
  * @reg: register address in the page
- * @phy_adr: PHY address on MDIO interface
+ * @phy_addr: PHY address on MDIO interface
  * @value: PHY register value
  *
  * Reads specified PHY register value
@@ -4734,7 +4715,7 @@ i40e_status i40e_read_phy_register_clause45(struct i40e_hw *hw,
  * @hw: pointer to the HW structure
  * @page: registers page number
  * @reg: register address in the page
- * @phy_adr: PHY address on MDIO interface
+ * @phy_addr: PHY address on MDIO interface
  * @value: PHY register value
  *
  * Writes value to specified PHY register
@@ -4801,7 +4782,7 @@ i40e_status i40e_write_phy_register_clause45(struct i40e_hw *hw,
  * @hw: pointer to the HW structure
  * @page: registers page number
  * @reg: register address in the page
- * @phy_adr: PHY address on MDIO interface
+ * @phy_addr: PHY address on MDIO interface
  * @value: PHY register value
  *
  * Writes value to specified PHY register
@@ -4837,7 +4818,7 @@ i40e_status i40e_write_phy_register(struct i40e_hw *hw,
  * @hw: pointer to the HW structure
  * @page: registers page number
  * @reg: register address in the page
- * @phy_adr: PHY address on MDIO interface
+ * @phy_addr: PHY address on MDIO interface
  * @value: PHY register value
  *
  * Reads specified PHY register value
@@ -4872,7 +4853,6 @@ i40e_status i40e_read_phy_register(struct i40e_hw *hw,
  * i40e_get_phy_address
  * @hw: pointer to the HW structure
  * @dev_num: PHY port num that address we want
- * @phy_addr: Returned PHY address
  *
  * Gets PHY address for current port
  **/
@@ -5082,7 +5062,9 @@ i40e_status i40e_led_get_phy(struct i40e_hw *hw, u16 *led_addr,
  * i40e_led_set_phy
  * @hw: pointer to the HW structure
  * @on: true or false
+ * @led_addr: address of led register to use
  * @mode: original val plus bit for set or ignore
+ *
  * Set led's on or off when controlled by the PHY
  *
  **/
@@ -5371,6 +5353,7 @@ i40e_status_code i40e_aq_write_ddp(struct i40e_hw *hw, void *buff,
  * @hw: pointer to the hw struct
  * @buff: command buffer (size in bytes = buff_size)
  * @buff_size: buffer size in bytes
+ * @flags: AdminQ command flags
  * @cmd_details: pointer to command details structure or NULL
  **/
 enum
index 9fec728dc4b9a06e795058a4024252575c8d4952..56bff8faf37185fa9c6f45910aad6eb146dc07c5 100644 (file)
@@ -1,29 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Driver
- * Copyright(c) 2013 - 2017 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
 
 #include "i40e_adminq.h"
 #include "i40e_prototype.h"
@@ -944,6 +920,70 @@ i40e_status i40e_init_dcb(struct i40e_hw *hw)
        return ret;
 }
 
+/**
+ * _i40e_read_lldp_cfg - generic read of LLDP Configuration data from NVM
+ * @hw: pointer to the HW structure
+ * @lldp_cfg: pointer to hold lldp configuration variables
+ * @module: address of the module pointer
+ * @word_offset: offset of LLDP configuration
+ *
+ * Reads the LLDP configuration data from NVM using passed addresses
+ **/
+static i40e_status _i40e_read_lldp_cfg(struct i40e_hw *hw,
+                                      struct i40e_lldp_variables *lldp_cfg,
+                                      u8 module, u32 word_offset)
+{
+       u32 address, offset = (2 * word_offset);
+       i40e_status ret;
+       __le16 raw_mem;
+       u16 mem;
+
+       ret = i40e_acquire_nvm(hw, I40E_RESOURCE_READ);
+       if (ret)
+               return ret;
+
+       ret = i40e_aq_read_nvm(hw, 0x0, module * 2, sizeof(raw_mem), &raw_mem,
+                              true, NULL);
+       i40e_release_nvm(hw);
+       if (ret)
+               return ret;
+
+       mem = le16_to_cpu(raw_mem);
+       /* Check if this pointer needs to be read in word size or 4K sector
+        * units.
+        */
+       if (mem & I40E_PTR_TYPE)
+               address = (0x7FFF & mem) * 4096;
+       else
+               address = (0x7FFF & mem) * 2;
+
+       ret = i40e_acquire_nvm(hw, I40E_RESOURCE_READ);
+       if (ret)
+               goto err_lldp_cfg;
+
+       ret = i40e_aq_read_nvm(hw, module, offset, sizeof(raw_mem), &raw_mem,
+                              true, NULL);
+       i40e_release_nvm(hw);
+       if (ret)
+               return ret;
+
+       mem = le16_to_cpu(raw_mem);
+       offset = mem + word_offset;
+       offset *= 2;
+
+       ret = i40e_acquire_nvm(hw, I40E_RESOURCE_READ);
+       if (ret)
+               goto err_lldp_cfg;
+
+       ret = i40e_aq_read_nvm(hw, 0, address + offset,
+                              sizeof(struct i40e_lldp_variables), lldp_cfg,
+                              true, NULL);
+       i40e_release_nvm(hw);
+
+err_lldp_cfg:
+       return ret;
+}
+
 /**
  * i40e_read_lldp_cfg - read LLDP Configuration data from NVM
  * @hw: pointer to the HW structure
@@ -955,21 +995,34 @@ i40e_status i40e_read_lldp_cfg(struct i40e_hw *hw,
                               struct i40e_lldp_variables *lldp_cfg)
 {
        i40e_status ret = 0;
-       u32 offset = (2 * I40E_NVM_LLDP_CFG_PTR);
+       u32 mem;
 
        if (!lldp_cfg)
                return I40E_ERR_PARAM;
 
        ret = i40e_acquire_nvm(hw, I40E_RESOURCE_READ);
        if (ret)
-               goto err_lldp_cfg;
+               return ret;
 
-       ret = i40e_aq_read_nvm(hw, I40E_SR_EMP_MODULE_PTR, offset,
-                              sizeof(struct i40e_lldp_variables),
-                              (u8 *)lldp_cfg,
-                              true, NULL);
+       ret = i40e_aq_read_nvm(hw, I40E_SR_NVM_CONTROL_WORD, 0, sizeof(mem),
+                              &mem, true, NULL);
        i40e_release_nvm(hw);
+       if (ret)
+               return ret;
+
+       /* Read a bit that holds information whether we are running flat or
+        * structured NVM image. Flat image has LLDP configuration in shadow
+        * ram, so there is a need to pass different addresses for both cases.
+        */
+       if (mem & I40E_SR_NVM_MAP_STRUCTURE_TYPE) {
+               /* Flat NVM case */
+               ret = _i40e_read_lldp_cfg(hw, lldp_cfg, I40E_SR_EMP_MODULE_PTR,
+                                         I40E_SR_LLDP_CFG_PTR);
+       } else {
+               /* Good old structured NVM image */
+               ret = _i40e_read_lldp_cfg(hw, lldp_cfg, I40E_EMP_MODULE_PTR,
+                                         I40E_NVM_LLDP_CFG_PTR);
+       }
 
-err_lldp_cfg:
        return ret;
 }
index 4f806386cb227f2d9fc2645439865b0584d03696..2b748a60a843ce704ddcd34849607c638c1f9b86 100644 (file)
@@ -1,29 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Driver
- * Copyright(c) 2013 - 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
 
 #ifndef _I40E_DCB_H_
 #define _I40E_DCB_H_
index 502818e3da7888c16b959987f2c08180974905f2..9deae9a35423f0b6462cba09ab1c7df141123790 100644 (file)
@@ -1,29 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Driver
- * Copyright(c) 2013 - 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
 
 #ifdef CONFIG_I40E_DCB
 #include "i40e.h"
@@ -47,7 +23,7 @@ static void i40e_get_pfc_delay(struct i40e_hw *hw, u16 *delay)
 
 /**
  * i40e_dcbnl_ieee_getets - retrieve local IEEE ETS configuration
- * @netdev: the corresponding netdev
+ * @dev: the corresponding netdev
  * @ets: structure to hold the ETS information
  *
  * Returns local IEEE ETS configuration
@@ -86,8 +62,8 @@ static int i40e_dcbnl_ieee_getets(struct net_device *dev,
 
 /**
  * i40e_dcbnl_ieee_getpfc - retrieve local IEEE PFC configuration
- * @netdev: the corresponding netdev
- * @ets: structure to hold the PFC information
+ * @dev: the corresponding netdev
+ * @pfc: structure to hold the PFC information
  *
  * Returns local IEEE PFC configuration
  **/
@@ -119,7 +95,7 @@ static int i40e_dcbnl_ieee_getpfc(struct net_device *dev,
 
 /**
  * i40e_dcbnl_getdcbx - retrieve current DCBx capability
- * @netdev: the corresponding netdev
+ * @dev: the corresponding netdev
  *
  * Returns DCBx capability features
  **/
@@ -132,7 +108,8 @@ static u8 i40e_dcbnl_getdcbx(struct net_device *dev)
 
 /**
  * i40e_dcbnl_get_perm_hw_addr - MAC address used by DCBx
- * @netdev: the corresponding netdev
+ * @dev: the corresponding netdev
+ * @perm_addr: buffer to store the MAC address
  *
  * Returns the SAN MAC address used for LLDP exchange
  **/
index d494dcaf18d0d6a9c2da0fd69e642985853668c0..56b911a5dd8be669ec0fcd231eeb01987cd7afc6 100644 (file)
@@ -1,29 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Driver
- * Copyright(c) 2013 - 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
 
 #ifdef CONFIG_DEBUG_FS
 
@@ -36,8 +12,8 @@ static struct dentry *i40e_dbg_root;
 
 /**
  * i40e_dbg_find_vsi - searches for the vsi with the given seid
- * @pf - the PF structure to search for the vsi
- * @seid - seid of the vsi it is searching for
+ * @pf: the PF structure to search for the vsi
+ * @seid: seid of the vsi it is searching for
  **/
 static struct i40e_vsi *i40e_dbg_find_vsi(struct i40e_pf *pf, int seid)
 {
@@ -55,8 +31,8 @@ static struct i40e_vsi *i40e_dbg_find_vsi(struct i40e_pf *pf, int seid)
 
 /**
  * i40e_dbg_find_veb - searches for the veb with the given seid
- * @pf - the PF structure to search for the veb
- * @seid - seid of the veb it is searching for
+ * @pf: the PF structure to search for the veb
+ * @seid: seid of the veb it is searching for
  **/
 static struct i40e_veb *i40e_dbg_find_veb(struct i40e_pf *pf, int seid)
 {
index ad6a66ccb57683a31bae15d2500765ea1d9a6f6d..334b05ff685ac937ecfde38afb41176bb66240ac 100644 (file)
@@ -1,29 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Driver
- * Copyright(c) 2013 - 2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
 
 #ifndef _I40E_DEVIDS_H_
 #define _I40E_DEVIDS_H_
index df3e60470f8bc9689ac7f7dad8121b4479207ea7..ef4d3762bf371191c75dba65a53ea784136803ed 100644 (file)
@@ -1,29 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Driver
- * Copyright(c) 2013 - 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
 
 #include "i40e_diag.h"
 #include "i40e_prototype.h"
index be8341763475af813e6365bf7b31ed34763e9354..c3340f320a18c56dd602c4dc910bc922782c0a72 100644 (file)
@@ -1,29 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Driver
- * Copyright(c) 2013 - 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
 
 #ifndef _I40E_DIAG_H_
 #define _I40E_DIAG_H_
index b974482ff63036386db0874e0087af567b126ed7..329e59eae4a1811986bcc080762533007e19c8f9 100644 (file)
@@ -1,29 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Driver
- * Copyright(c) 2013 - 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
 
 /* ethtool support for i40e */
 
@@ -43,13 +19,13 @@ struct i40e_stats {
 }
 
 #define I40E_NETDEV_STAT(_net_stat) \
-               I40E_STAT(struct rtnl_link_stats64, #_net_stat, _net_stat)
+       I40E_STAT(struct rtnl_link_stats64, #_net_stat, _net_stat)
 #define I40E_PF_STAT(_name, _stat) \
-               I40E_STAT(struct i40e_pf, _name, _stat)
+       I40E_STAT(struct i40e_pf, _name, _stat)
 #define I40E_VSI_STAT(_name, _stat) \
-               I40E_STAT(struct i40e_vsi, _name, _stat)
+       I40E_STAT(struct i40e_vsi, _name, _stat)
 #define I40E_VEB_STAT(_name, _stat) \
-               I40E_STAT(struct i40e_veb, _name, _stat)
+       I40E_STAT(struct i40e_veb, _name, _stat)
 
 static const struct i40e_stats i40e_gstrings_net_stats[] = {
        I40E_NETDEV_STAT(rx_packets),
@@ -90,6 +66,7 @@ static const struct i40e_stats i40e_gstrings_misc_stats[] = {
        I40E_VSI_STAT("rx_unknown_protocol", eth_stats.rx_unknown_protocol),
        I40E_VSI_STAT("tx_linearize", tx_linearize),
        I40E_VSI_STAT("tx_force_wb", tx_force_wb),
+       I40E_VSI_STAT("tx_busy", tx_busy),
        I40E_VSI_STAT("rx_alloc_fail", rx_buf_failed),
        I40E_VSI_STAT("rx_pg_alloc_fail", rx_page_failed),
 };
@@ -127,10 +104,6 @@ static const struct i40e_stats i40e_gstrings_stats[] = {
        I40E_PF_STAT("link_xoff_rx", stats.link_xoff_rx),
        I40E_PF_STAT("link_xon_tx", stats.link_xon_tx),
        I40E_PF_STAT("link_xoff_tx", stats.link_xoff_tx),
-       I40E_PF_STAT("priority_xon_rx", stats.priority_xon_rx),
-       I40E_PF_STAT("priority_xoff_rx", stats.priority_xoff_rx),
-       I40E_PF_STAT("priority_xon_tx", stats.priority_xon_tx),
-       I40E_PF_STAT("priority_xoff_tx", stats.priority_xoff_tx),
        I40E_PF_STAT("rx_size_64", stats.rx_size_64),
        I40E_PF_STAT("rx_size_127", stats.rx_size_127),
        I40E_PF_STAT("rx_size_255", stats.rx_size_255),
@@ -172,9 +145,9 @@ static const struct i40e_stats i40e_gstrings_stats[] = {
            * 2 /* Tx and Rx together */                                     \
            * (sizeof(struct i40e_queue_stats) / sizeof(u64)))
 #define I40E_GLOBAL_STATS_LEN  ARRAY_SIZE(i40e_gstrings_stats)
-#define I40E_NETDEV_STATS_LEN   ARRAY_SIZE(i40e_gstrings_net_stats)
+#define I40E_NETDEV_STATS_LEN  ARRAY_SIZE(i40e_gstrings_net_stats)
 #define I40E_MISC_STATS_LEN    ARRAY_SIZE(i40e_gstrings_misc_stats)
-#define I40E_VSI_STATS_LEN(n)   (I40E_NETDEV_STATS_LEN + \
+#define I40E_VSI_STATS_LEN(n)  (I40E_NETDEV_STATS_LEN + \
                                 I40E_MISC_STATS_LEN + \
                                 I40E_QUEUE_STATS_LEN((n)))
 #define I40E_PFC_STATS_LEN ( \
@@ -977,7 +950,9 @@ static int i40e_set_link_ksettings(struct net_device *netdev,
            ethtool_link_ksettings_test_link_mode(ks, advertising,
                                                  10000baseCR_Full) ||
            ethtool_link_ksettings_test_link_mode(ks, advertising,
-                                                 10000baseSR_Full))
+                                                 10000baseSR_Full) ||
+           ethtool_link_ksettings_test_link_mode(ks, advertising,
+                                                 10000baseLR_Full))
                config.link_speed |= I40E_LINK_SPEED_10GB;
        if (ethtool_link_ksettings_test_link_mode(ks, advertising,
                                                  20000baseKR2_Full))
@@ -1079,6 +1054,9 @@ static int i40e_nway_reset(struct net_device *netdev)
 
 /**
  * i40e_get_pauseparam -  Get Flow Control status
+ * @netdev: netdevice structure
+ * @pause: buffer to return pause parameters
+ *
  * Return tx/rx-pause status
  **/
 static void i40e_get_pauseparam(struct net_device *netdev,
@@ -1677,6 +1655,23 @@ static int i40e_set_ringparam(struct net_device *netdev,
        return err;
 }
 
+static int i40e_get_stats_count(struct net_device *netdev)
+{
+       struct i40e_netdev_priv *np = netdev_priv(netdev);
+       struct i40e_vsi *vsi = np->vsi;
+       struct i40e_pf *pf = vsi->back;
+
+       if (vsi == pf->vsi[pf->lan_vsi] && pf->hw.partition_id == 1) {
+               if (pf->lan_veb != I40E_NO_VEB &&
+                   pf->flags & I40E_FLAG_VEB_STATS_ENABLED)
+                       return I40E_PF_STATS_LEN(netdev) + I40E_VEB_STATS_TOTAL;
+               else
+                       return I40E_PF_STATS_LEN(netdev);
+       } else {
+               return I40E_VSI_STATS_LEN(netdev);
+       }
+}
+
 static int i40e_get_sset_count(struct net_device *netdev, int sset)
 {
        struct i40e_netdev_priv *np = netdev_priv(netdev);
@@ -1687,16 +1682,7 @@ static int i40e_get_sset_count(struct net_device *netdev, int sset)
        case ETH_SS_TEST:
                return I40E_TEST_LEN;
        case ETH_SS_STATS:
-               if (vsi == pf->vsi[pf->lan_vsi] && pf->hw.partition_id == 1) {
-                       int len = I40E_PF_STATS_LEN(netdev);
-
-                       if ((pf->lan_veb != I40E_NO_VEB) &&
-                           (pf->flags & I40E_FLAG_VEB_STATS_ENABLED))
-                               len += I40E_VEB_STATS_TOTAL;
-                       return len;
-               } else {
-                       return I40E_VSI_STATS_LEN(netdev);
-               }
+               return i40e_get_stats_count(netdev);
        case ETH_SS_PRIV_FLAGS:
                return I40E_PRIV_FLAGS_STR_LEN +
                        (pf->hw.pf_id == 0 ? I40E_GL_PRIV_FLAGS_STR_LEN : 0);
@@ -2550,7 +2536,7 @@ static int i40e_get_rss_hash_opts(struct i40e_pf *pf, struct ethtool_rxnfc *cmd)
 /**
  * i40e_check_mask - Check whether a mask field is set
  * @mask: the full mask value
- * @field; mask of the field to check
+ * @field: mask of the field to check
  *
  * If the given mask is fully set, return positive value. If the mask for the
  * field is fully unset, return zero. Otherwise return a negative error code.
@@ -2621,6 +2607,7 @@ static int i40e_parse_rx_flow_user_data(struct ethtool_rx_flow_spec *fsp,
 /**
  * i40e_fill_rx_flow_user_data - Fill in user-defined data field
  * @fsp: pointer to rx_flow specification
+ * @data: pointer to return userdef data
  *
  * Reads the userdef data structure and properly fills in the user defined
  * fields of the rx_flow_spec.
@@ -2799,6 +2786,7 @@ static int i40e_get_ethtool_fdir_entry(struct i40e_pf *pf,
  * i40e_get_rxnfc - command to get RX flow classification rules
  * @netdev: network interface device structure
  * @cmd: ethtool rxnfc command
+ * @rule_locs: pointer to store rule data
  *
  * Returns Success if the command is supported.
  **/
@@ -2840,7 +2828,7 @@ static int i40e_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd,
 /**
  * i40e_get_rss_hash_bits - Read RSS Hash bits from register
  * @nfc: pointer to user request
- * @i_setc bits currently set
+ * @i_setc: bits currently set
  *
  * Returns value of bits to be set per user request
  **/
@@ -2885,7 +2873,7 @@ static u64 i40e_get_rss_hash_bits(struct ethtool_rxnfc *nfc, u64 i_setc)
 /**
  * i40e_set_rss_hash_opt - Enable/Disable flow types for RSS hash
  * @pf: pointer to the physical function struct
- * @cmd: ethtool rxnfc command
+ * @nfc: ethtool rxnfc command
  *
  * Returns Success if the flow input set is supported.
  **/
@@ -3284,7 +3272,7 @@ static int i40e_add_flex_offset(struct list_head *flex_pit_list,
  * __i40e_reprogram_flex_pit - Re-program specific FLX_PIT table
  * @pf: Pointer to the PF structure
  * @flex_pit_list: list of flexible src offsets in use
- * #flex_pit_start: index to first entry for this section of the table
+ * @flex_pit_start: index to first entry for this section of the table
  *
  * In order to handle flexible data, the hardware uses a table of values
  * called the FLX_PIT table. This table is used to indicate which sections of
@@ -3398,7 +3386,7 @@ static void i40e_reprogram_flex_pit(struct i40e_pf *pf)
 
 /**
  * i40e_flow_str - Converts a flow_type into a human readable string
- * @flow_type: the flow type from a flow specification
+ * @fsp: the flow specification
  *
  * Currently only flow types we support are included here, and the string
  * value attempts to match what ethtool would use to configure this flow type.
@@ -4103,7 +4091,7 @@ static unsigned int i40e_max_channels(struct i40e_vsi *vsi)
 
 /**
  * i40e_get_channels - Get the current channels enabled and max supported etc.
- * @netdev: network interface device structure
+ * @dev: network interface device structure
  * @ch: ethtool channels structure
  *
  * We don't support separate tx and rx queues as channels. The other count
@@ -4112,7 +4100,7 @@ static unsigned int i40e_max_channels(struct i40e_vsi *vsi)
  * q_vectors since we support a lot more queue pairs than q_vectors.
  **/
 static void i40e_get_channels(struct net_device *dev,
-                              struct ethtool_channels *ch)
+                             struct ethtool_channels *ch)
 {
        struct i40e_netdev_priv *np = netdev_priv(dev);
        struct i40e_vsi *vsi = np->vsi;
@@ -4131,14 +4119,14 @@ static void i40e_get_channels(struct net_device *dev,
 
 /**
  * i40e_set_channels - Set the new channels count.
- * @netdev: network interface device structure
+ * @dev: network interface device structure
  * @ch: ethtool channels structure
  *
  * The new channels count may not be the same as requested by the user
  * since it gets rounded down to a power of 2 value.
  **/
 static int i40e_set_channels(struct net_device *dev,
-                             struct ethtool_channels *ch)
+                            struct ethtool_channels *ch)
 {
        const u8 drop = I40E_FILTER_PROGRAM_DESC_DEST_DROP_PACKET;
        struct i40e_netdev_priv *np = netdev_priv(dev);
@@ -4273,6 +4261,7 @@ static int i40e_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
  * @netdev: network interface device structure
  * @indir: indirection table
  * @key: hash key
+ * @hfunc: hash function to use
  *
  * Returns -EINVAL if the table specifies an invalid queue id, otherwise
  * returns 0 after programming the table.
index 6d4b590f851b95322f8989134740e66ee18c7bd8..19ce93d7fd0a900f07fa606f79eca07c2bd0f9f8 100644 (file)
@@ -1,29 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Driver
- * Copyright(c) 2013 - 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
 
 #include "i40e_osdep.h"
 #include "i40e_register.h"
@@ -198,7 +174,6 @@ i40e_status i40e_add_pd_table_entry(struct i40e_hw *hw,
  * @hw: pointer to our HW structure
  * @hmc_info: pointer to the HMC configuration information structure
  * @idx: the page index
- * @is_pf: distinguishes a VF from a PF
  *
  * This function:
  *     1. Marks the entry in pd tabe (for paged address mode) or in sd table
index 7b5fd33d70ae7eb315115fb119ee7e31f9091a5a..1c78de838857be788669cc6873984d0ac1cd49d5 100644 (file)
@@ -1,29 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Driver
- * Copyright(c) 2013 - 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
 
 #ifndef _I40E_HMC_H_
 #define _I40E_HMC_H_
index cd40dc487b38d8ff3b4b74306060004927d25688..994011c38fb4db0181e3626a6e734cfc9c65f188 100644 (file)
@@ -1,29 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Driver
- * Copyright(c) 2013 - 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
 
 #include "i40e_osdep.h"
 #include "i40e_register.h"
index 79e1396735d90eef0124f442b4a221d2a0a2d0b1..c46a2c449e60e6ced95eb9894dedc0c8130dd59a 100644 (file)
@@ -1,29 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Driver
- * Copyright(c) 2013 - 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
 
 #ifndef _I40E_LAN_HMC_H_
 #define _I40E_LAN_HMC_H_
index 16229998fb1e0eb7f2d961b3062e06ae8d47c427..b5daa5c9c7de2c30ddc7da6f7b2235cef6405c86 100644 (file)
@@ -1,29 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Driver
- * Copyright(c) 2013 - 2017 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
 
 #include <linux/etherdevice.h>
 #include <linux/of_net.h>
@@ -278,8 +254,8 @@ static int i40e_put_lump(struct i40e_lump_tracking *pile, u16 index, u16 id)
 
 /**
  * i40e_find_vsi_from_id - searches for the vsi with the given id
- * @pf - the pf structure to search for the vsi
- * @id - id of the vsi it is searching for
+ * @pf: the pf structure to search for the vsi
+ * @id: id of the vsi it is searching for
  **/
 struct i40e_vsi *i40e_find_vsi_from_id(struct i40e_pf *pf, u16 id)
 {
@@ -435,6 +411,7 @@ static void i40e_get_netdev_stats_struct_tx(struct i40e_ring *ring,
 /**
  * i40e_get_netdev_stats_struct - Get statistics for netdev interface
  * @netdev: network interface device structure
+ * @stats: data structure to store statistics
  *
  * Returns the address of the device statistics structure.
  * The statistics are actually updated from the service task.
@@ -2027,7 +2004,7 @@ struct i40e_new_mac_filter *i40e_next_filter(struct i40e_new_mac_filter *next)
  * from firmware
  * @count: Number of filters added
  * @add_list: return data from fw
- * @head: pointer to first filter in current batch
+ * @add_head: pointer to first filter in current batch
  *
  * MAC filter entries from list were slated to be added to device. Returns
  * number of successful filters. Note that 0 does NOT mean success!
@@ -2134,6 +2111,7 @@ void i40e_aqc_add_filters(struct i40e_vsi *vsi, const char *vsi_name,
 /**
  * i40e_aqc_broadcast_filter - Set promiscuous broadcast flags
  * @vsi: pointer to the VSI
+ * @vsi_name: the VSI name
  * @f: filter data
  *
  * This function sets or clears the promiscuous broadcast flags for VLAN
@@ -2840,6 +2818,7 @@ void i40e_vsi_kill_vlan(struct i40e_vsi *vsi, u16 vid)
 /**
  * i40e_vlan_rx_add_vid - Add a vlan id filter to HW offload
  * @netdev: network interface to be adjusted
+ * @proto: unused protocol value
  * @vid: vlan id to be added
  *
  * net_device_ops implementation for adding vlan ids
@@ -2861,9 +2840,27 @@ static int i40e_vlan_rx_add_vid(struct net_device *netdev,
        return ret;
 }
 
+/**
+ * i40e_vlan_rx_add_vid_up - Add a vlan id filter to HW offload in UP path
+ * @netdev: network interface to be adjusted
+ * @proto: unused protocol value
+ * @vid: vlan id to be added
+ **/
+static void i40e_vlan_rx_add_vid_up(struct net_device *netdev,
+                                   __always_unused __be16 proto, u16 vid)
+{
+       struct i40e_netdev_priv *np = netdev_priv(netdev);
+       struct i40e_vsi *vsi = np->vsi;
+
+       if (vid >= VLAN_N_VID)
+               return;
+       set_bit(vid, vsi->active_vlans);
+}
+
 /**
  * i40e_vlan_rx_kill_vid - Remove a vlan id filter from HW offload
  * @netdev: network interface to be adjusted
+ * @proto: unused protocol value
  * @vid: vlan id to be removed
  *
  * net_device_ops implementation for removing vlan ids
@@ -2902,8 +2899,8 @@ static void i40e_restore_vlan(struct i40e_vsi *vsi)
                i40e_vlan_stripping_disable(vsi);
 
        for_each_set_bit(vid, vsi->active_vlans, VLAN_N_VID)
-               i40e_vlan_rx_add_vid(vsi->netdev, htons(ETH_P_8021Q),
-                                    vid);
+               i40e_vlan_rx_add_vid_up(vsi->netdev, htons(ETH_P_8021Q),
+                                       vid);
 }
 
 /**
@@ -3485,7 +3482,7 @@ static void i40e_vsi_configure_msix(struct i40e_vsi *vsi)
 
 /**
  * i40e_enable_misc_int_causes - enable the non-queue interrupts
- * @hw: ptr to the hardware info
+ * @pf: pointer to private device data structure
  **/
 static void i40e_enable_misc_int_causes(struct i40e_pf *pf)
 {
@@ -4255,8 +4252,8 @@ static void i40e_control_tx_q(struct i40e_pf *pf, int pf_q, bool enable)
  * @is_xdp: true if the queue is used for XDP
  * @enable: start or stop the queue
  **/
-static int i40e_control_wait_tx_q(int seid, struct i40e_pf *pf, int pf_q,
-                                 bool is_xdp, bool enable)
+int i40e_control_wait_tx_q(int seid, struct i40e_pf *pf, int pf_q,
+                          bool is_xdp, bool enable)
 {
        int ret;
 
@@ -4301,7 +4298,6 @@ static int i40e_vsi_control_tx(struct i40e_vsi *vsi, bool enable)
                if (ret)
                        break;
        }
-
        return ret;
 }
 
@@ -4340,9 +4336,9 @@ static int i40e_pf_rxq_wait(struct i40e_pf *pf, int pf_q, bool enable)
  * @pf_q: the PF queue to configure
  * @enable: start or stop the queue
  *
- * This function enables or disables a single queue. Note that any delay
- * required after the operation is expected to be handled by the caller of
- * this function.
+ * This function enables or disables a single queue. Note that
+ * any delay required after the operation is expected to be
+ * handled by the caller of this function.
  **/
 static void i40e_control_rx_q(struct i40e_pf *pf, int pf_q, bool enable)
 {
@@ -4371,6 +4367,30 @@ static void i40e_control_rx_q(struct i40e_pf *pf, int pf_q, bool enable)
        wr32(hw, I40E_QRX_ENA(pf_q), rx_reg);
 }
 
+/**
+ * i40e_control_wait_rx_q
+ * @pf: the PF structure
+ * @pf_q: queue being configured
+ * @enable: start or stop the rings
+ *
+ * This function enables or disables a single queue along with waiting
+ * for the change to finish. The caller of this function should handle
+ * the delays needed in the case of disabling queues.
+ **/
+int i40e_control_wait_rx_q(struct i40e_pf *pf, int pf_q, bool enable)
+{
+       int ret = 0;
+
+       i40e_control_rx_q(pf, pf_q, enable);
+
+       /* wait for the change to finish */
+       ret = i40e_pf_rxq_wait(pf, pf_q, enable);
+       if (ret)
+               return ret;
+
+       return ret;
+}
+
 /**
  * i40e_vsi_control_rx - Start or stop a VSI's rings
  * @vsi: the VSI being configured
@@ -4383,10 +4403,7 @@ static int i40e_vsi_control_rx(struct i40e_vsi *vsi, bool enable)
 
        pf_q = vsi->base_queue;
        for (i = 0; i < vsi->num_queue_pairs; i++, pf_q++) {
-               i40e_control_rx_q(pf, pf_q, enable);
-
-               /* wait for the change to finish */
-               ret = i40e_pf_rxq_wait(pf, pf_q, enable);
+               ret = i40e_control_wait_rx_q(pf, pf_q, enable);
                if (ret) {
                        dev_info(&pf->pdev->dev,
                                 "VSI seid %d Rx ring %d %sable timeout\n",
@@ -5096,7 +5113,7 @@ static int i40e_vsi_get_bw_info(struct i40e_vsi *vsi)
  * i40e_vsi_configure_bw_alloc - Configure VSI BW allocation per TC
  * @vsi: the VSI being configured
  * @enabled_tc: TC bitmap
- * @bw_credits: BW shared credits per TC
+ * @bw_share: BW shared credits per TC
  *
  * Returns 0 on success, negative value on failure
  **/
@@ -6353,6 +6370,7 @@ static int i40e_init_pf_dcb(struct i40e_pf *pf)
 /**
  * i40e_print_link_message - print link up or down
  * @vsi: the VSI for which link needs a message
+ * @isup: true of link is up, false otherwise
  */
 void i40e_print_link_message(struct i40e_vsi *vsi, bool isup)
 {
@@ -7212,8 +7230,7 @@ static int i40e_parse_cls_flower(struct i40e_vsi *vsi,
                        if (mask->dst == cpu_to_be32(0xffffffff)) {
                                field_flags |= I40E_CLOUD_FIELD_IIP;
                        } else {
-                               mask->dst = be32_to_cpu(mask->dst);
-                               dev_err(&pf->pdev->dev, "Bad ip dst mask %pI4\n",
+                               dev_err(&pf->pdev->dev, "Bad ip dst mask %pI4b\n",
                                        &mask->dst);
                                return I40E_ERR_CONFIG;
                        }
@@ -7223,8 +7240,7 @@ static int i40e_parse_cls_flower(struct i40e_vsi *vsi,
                        if (mask->src == cpu_to_be32(0xffffffff)) {
                                field_flags |= I40E_CLOUD_FIELD_IIP;
                        } else {
-                               mask->src = be32_to_cpu(mask->src);
-                               dev_err(&pf->pdev->dev, "Bad ip src mask %pI4\n",
+                               dev_err(&pf->pdev->dev, "Bad ip src mask %pI4b\n",
                                        &mask->src);
                                return I40E_ERR_CONFIG;
                        }
@@ -9691,9 +9707,9 @@ static void i40e_handle_mdd_event(struct i40e_pf *pf)
        i40e_flush(hw);
 }
 
-static const char *i40e_tunnel_name(struct i40e_udp_port_config *port)
+static const char *i40e_tunnel_name(u8 type)
 {
-       switch (port->type) {
+       switch (type) {
        case UDP_TUNNEL_TYPE_VXLAN:
                return "vxlan";
        case UDP_TUNNEL_TYPE_GENEVE:
@@ -9727,37 +9743,68 @@ static void i40e_sync_udp_filters(struct i40e_pf *pf)
 static void i40e_sync_udp_filters_subtask(struct i40e_pf *pf)
 {
        struct i40e_hw *hw = &pf->hw;
-       i40e_status ret;
+       u8 filter_index, type;
        u16 port;
        int i;
 
        if (!test_and_clear_bit(__I40E_UDP_FILTER_SYNC_PENDING, pf->state))
                return;
 
+       /* acquire RTNL to maintain state of flags and port requests */
+       rtnl_lock();
+
        for (i = 0; i < I40E_MAX_PF_UDP_OFFLOAD_PORTS; i++) {
                if (pf->pending_udp_bitmap & BIT_ULL(i)) {
+                       struct i40e_udp_port_config *udp_port;
+                       i40e_status ret = 0;
+
+                       udp_port = &pf->udp_ports[i];
                        pf->pending_udp_bitmap &= ~BIT_ULL(i);
-                       port = pf->udp_ports[i].port;
+
+                       port = READ_ONCE(udp_port->port);
+                       type = READ_ONCE(udp_port->type);
+                       filter_index = READ_ONCE(udp_port->filter_index);
+
+                       /* release RTNL while we wait on AQ command */
+                       rtnl_unlock();
+
                        if (port)
                                ret = i40e_aq_add_udp_tunnel(hw, port,
-                                                       pf->udp_ports[i].type,
-                                                       NULL, NULL);
-                       else
-                               ret = i40e_aq_del_udp_tunnel(hw, i, NULL);
+                                                            type,
+                                                            &filter_index,
+                                                            NULL);
+                       else if (filter_index != I40E_UDP_PORT_INDEX_UNUSED)
+                               ret = i40e_aq_del_udp_tunnel(hw, filter_index,
+                                                            NULL);
+
+                       /* reacquire RTNL so we can update filter_index */
+                       rtnl_lock();
 
                        if (ret) {
                                dev_info(&pf->pdev->dev,
                                         "%s %s port %d, index %d failed, err %s aq_err %s\n",
-                                        i40e_tunnel_name(&pf->udp_ports[i]),
+                                        i40e_tunnel_name(type),
                                         port ? "add" : "delete",
-                                        port, i,
+                                        port,
+                                        filter_index,
                                         i40e_stat_str(&pf->hw, ret),
                                         i40e_aq_str(&pf->hw,
                                                     pf->hw.aq.asq_last_status));
-                               pf->udp_ports[i].port = 0;
+                               if (port) {
+                                       /* failed to add, just reset port,
+                                        * drop pending bit for any deletion
+                                        */
+                                       udp_port->port = 0;
+                                       pf->pending_udp_bitmap &= ~BIT_ULL(i);
+                               }
+                       } else if (port) {
+                               /* record filter index on success */
+                               udp_port->filter_index = filter_index;
                        }
                }
        }
+
+       rtnl_unlock();
 }
 
 /**
@@ -10004,7 +10051,7 @@ static int i40e_vsi_mem_alloc(struct i40e_pf *pf, enum i40e_vsi_type type)
 
 /**
  * i40e_vsi_free_arrays - Free queue and vector pointer arrays for the VSI
- * @type: VSI pointer
+ * @vsi: VSI pointer
  * @free_qvectors: a bool to specify if q_vectors need to be freed.
  *
  * On error: returns error code (negative)
@@ -10279,21 +10326,28 @@ static int i40e_init_msix(struct i40e_pf *pf)
 
        /* any vectors left over go for VMDq support */
        if (pf->flags & I40E_FLAG_VMDQ_ENABLED) {
-               int vmdq_vecs_wanted = pf->num_vmdq_vsis * pf->num_vmdq_qps;
-               int vmdq_vecs = min_t(int, vectors_left, vmdq_vecs_wanted);
-
                if (!vectors_left) {
                        pf->num_vmdq_msix = 0;
                        pf->num_vmdq_qps = 0;
                } else {
+                       int vmdq_vecs_wanted =
+                               pf->num_vmdq_vsis * pf->num_vmdq_qps;
+                       int vmdq_vecs =
+                               min_t(int, vectors_left, vmdq_vecs_wanted);
+
                        /* if we're short on vectors for what's desired, we limit
                         * the queues per vmdq.  If this is still more than are
                         * available, the user will need to change the number of
                         * queues/vectors used by the PF later with the ethtool
                         * channels command
                         */
-                       if (vmdq_vecs < vmdq_vecs_wanted)
+                       if (vectors_left < vmdq_vecs_wanted) {
                                pf->num_vmdq_qps = 1;
+                               vmdq_vecs_wanted = pf->num_vmdq_vsis;
+                               vmdq_vecs = min_t(int,
+                                                 vectors_left,
+                                                 vmdq_vecs_wanted);
+                       }
                        pf->num_vmdq_msix = pf->num_vmdq_qps;
 
                        v_budget += vmdq_vecs;
@@ -10800,7 +10854,7 @@ int i40e_config_rss(struct i40e_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size)
  * @vsi: Pointer to VSI structure
  * @seed: Buffer to store the keys
  * @lut: Buffer to store the lookup table entries
- * lut_size: Size of buffer to store the lookup table entries
+ * @lut_size: Size of buffer to store the lookup table entries
  *
  * Returns 0 on success, negative on failure
  */
@@ -11374,6 +11428,11 @@ static u8 i40e_get_udp_port_idx(struct i40e_pf *pf, u16 port)
        u8 i;
 
        for (i = 0; i < I40E_MAX_PF_UDP_OFFLOAD_PORTS; i++) {
+               /* Do not report ports with pending deletions as
+                * being available.
+                */
+               if (!port && (pf->pending_udp_bitmap & BIT_ULL(i)))
+                       continue;
                if (pf->udp_ports[i].port == port)
                        return i;
        }
@@ -11428,6 +11487,7 @@ static void i40e_udp_tunnel_add(struct net_device *netdev,
 
        /* New port: add it and mark its index in the bitmap */
        pf->udp_ports[next_idx].port = port;
+       pf->udp_ports[next_idx].filter_index = I40E_UDP_PORT_INDEX_UNUSED;
        pf->pending_udp_bitmap |= BIT_ULL(next_idx);
        set_bit(__I40E_UDP_FILTER_SYNC_PENDING, pf->state);
 }
@@ -11469,7 +11529,12 @@ static void i40e_udp_tunnel_del(struct net_device *netdev,
         * and make it pending
         */
        pf->udp_ports[idx].port = 0;
-       pf->pending_udp_bitmap |= BIT_ULL(idx);
+
+       /* Toggle pending bit instead of setting it. This way if we are
+        * deleting a port that has yet to be added we just clear the pending
+        * bit and don't have to worry about it.
+        */
+       pf->pending_udp_bitmap ^= BIT_ULL(idx);
        set_bit(__I40E_UDP_FILTER_SYNC_PENDING, pf->state);
 
        return;
@@ -11500,6 +11565,7 @@ static int i40e_get_phys_port_id(struct net_device *netdev,
  * @tb: pointer to array of nladdr (unused)
  * @dev: the net device pointer
  * @addr: the MAC address entry being added
+ * @vid: VLAN ID
  * @flags: instructions from stack about fdb operation
  */
 static int i40e_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
@@ -11545,6 +11611,7 @@ static int i40e_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
  * i40e_ndo_bridge_setlink - Set the hardware bridge mode
  * @dev: the netdev being configured
  * @nlh: RTNL message
+ * @flags: bridge flags
  *
  * Inserts a new hardware bridge if not already created and
  * enables the bridging mode requested (VEB or VEPA). If the
@@ -14118,6 +14185,7 @@ static void i40e_remove(struct pci_dev *pdev)
 /**
  * i40e_pci_error_detected - warning that something funky happened in PCI land
  * @pdev: PCI device information struct
+ * @error: the type of PCI error
  *
  * Called to warn that something happened and the error handling steps
  * are in progress.  Allows the driver to quiesce things, be ready for
index ba9687c037950acd9dbe531f314f55c1480f4c8a..0299e5bbb9022a457fa398ca7a401bc73bc6c1b8 100644 (file)
@@ -1,29 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Driver
- * Copyright(c) 2013 - 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
 
 #include "i40e_prototype.h"
 
@@ -1173,6 +1149,7 @@ void i40e_nvmupd_clear_wait_state(struct i40e_hw *hw)
  * i40e_nvmupd_check_wait_event - handle NVM update operation events
  * @hw: pointer to the hardware structure
  * @opcode: the event that just happened
+ * @desc: AdminQ descriptor
  **/
 void i40e_nvmupd_check_wait_event(struct i40e_hw *hw, u16 opcode,
                                  struct i40e_aq_desc *desc)
index 9c3c3b0d3ac46ed2c9be2f5bfb9aae751b5ed9bc..a07574bff5508bd7c606f8010b0e96fd5345d8e4 100644 (file)
@@ -1,29 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Driver
- * Copyright(c) 2013 - 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
 
 #ifndef _I40E_OSDEP_H_
 #define _I40E_OSDEP_H_
index 2ec24188d6e221ca08a239021e99bb0c1c5865a9..3170655cdeb990b02f12edf5ffa42c6811201d58 100644 (file)
@@ -1,29 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Driver
- * Copyright(c) 2013 - 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
 
 #ifndef _I40E_PROTOTYPE_H_
 #define _I40E_PROTOTYPE_H_
index 5b47dd1f75a56b09f8ca69d9dc4268f319998f6a..d50d84927e6b1d39a7c1099cdabd908e5351ada8 100644 (file)
@@ -1,29 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Driver
- * Copyright(c) 2013 - 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
 
 #include "i40e.h"
 #include <linux/ptp_classify.h>
@@ -40,9 +16,9 @@
  * At 1Gb link, the period is multiplied by 20. (32ns)
  * 1588 functionality is not supported at 100Mbps.
  */
-#define I40E_PTP_40GB_INCVAL 0x0199999999ULL
-#define I40E_PTP_10GB_INCVAL 0x0333333333ULL
-#define I40E_PTP_1GB_INCVAL  0x2000000000ULL
+#define I40E_PTP_40GB_INCVAL           0x0199999999ULL
+#define I40E_PTP_10GB_INCVAL_MULT      2
+#define I40E_PTP_1GB_INCVAL_MULT       20
 
 #define I40E_PRTTSYN_CTL1_TSYNTYPE_V1  BIT(I40E_PRTTSYN_CTL1_TSYNTYPE_SHIFT)
 #define I40E_PRTTSYN_CTL1_TSYNTYPE_V2  (2 << \
@@ -130,17 +106,24 @@ static int i40e_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
                ppb = -ppb;
        }
 
-       smp_mb(); /* Force any pending update before accessing. */
-       adj = READ_ONCE(pf->ptp_base_adj);
-
-       freq = adj;
+       freq = I40E_PTP_40GB_INCVAL;
        freq *= ppb;
        diff = div_u64(freq, 1000000000ULL);
 
        if (neg_adj)
-               adj -= diff;
+               adj = I40E_PTP_40GB_INCVAL - diff;
        else
-               adj += diff;
+               adj = I40E_PTP_40GB_INCVAL + diff;
+
+       /* At some link speeds, the base incval is so large that directly
+        * multiplying by ppb would result in arithmetic overflow even when
+        * using a u64. Avoid this by instead calculating the new incval
+        * always in terms of the 40GbE clock rate and then multiplying by the
+        * link speed factor afterwards. This does result in slightly lower
+        * precision at lower link speeds, but it is fairly minor.
+        */
+       smp_mb(); /* Force any pending update before accessing. */
+       adj *= READ_ONCE(pf->ptp_adj_mult);
 
        wr32(hw, I40E_PRTTSYN_INC_L, adj & 0xFFFFFFFF);
        wr32(hw, I40E_PRTTSYN_INC_H, adj >> 32);
@@ -334,10 +317,12 @@ void i40e_ptp_rx_hang(struct i40e_pf *pf)
  * This watchdog task is run periodically to make sure that we clear the Tx
  * timestamp logic if we don't obtain a timestamp in a reasonable amount of
  * time. It is unexpected in the normal case but if it occurs it results in
- * permanently prevent timestamps of future packets
+ * permanently preventing timestamps of future packets.
  **/
 void i40e_ptp_tx_hang(struct i40e_pf *pf)
 {
+       struct sk_buff *skb;
+
        if (!(pf->flags & I40E_FLAG_PTP) || !pf->ptp_tx)
                return;
 
@@ -350,9 +335,12 @@ void i40e_ptp_tx_hang(struct i40e_pf *pf)
         * within a second it is reasonable to assume that we never will.
         */
        if (time_is_before_jiffies(pf->ptp_tx_start + HZ)) {
-               dev_kfree_skb_any(pf->ptp_tx_skb);
+               skb = pf->ptp_tx_skb;
                pf->ptp_tx_skb = NULL;
                clear_bit_unlock(__I40E_PTP_TX_IN_PROGRESS, pf->state);
+
+               /* Free the skb after we clear the bitlock */
+               dev_kfree_skb_any(skb);
                pf->tx_hwtstamp_timeouts++;
        }
 }
@@ -462,6 +450,7 @@ void i40e_ptp_set_increment(struct i40e_pf *pf)
        struct i40e_link_status *hw_link_info;
        struct i40e_hw *hw = &pf->hw;
        u64 incval;
+       u32 mult;
 
        hw_link_info = &hw->phy.link_info;
 
@@ -469,10 +458,10 @@ void i40e_ptp_set_increment(struct i40e_pf *pf)
 
        switch (hw_link_info->link_speed) {
        case I40E_LINK_SPEED_10GB:
-               incval = I40E_PTP_10GB_INCVAL;
+               mult = I40E_PTP_10GB_INCVAL_MULT;
                break;
        case I40E_LINK_SPEED_1GB:
-               incval = I40E_PTP_1GB_INCVAL;
+               mult = I40E_PTP_1GB_INCVAL_MULT;
                break;
        case I40E_LINK_SPEED_100MB:
        {
@@ -483,15 +472,20 @@ void i40e_ptp_set_increment(struct i40e_pf *pf)
                                 "1588 functionality is not supported at 100 Mbps. Stopping the PHC.\n");
                        warn_once++;
                }
-               incval = 0;
+               mult = 0;
                break;
        }
        case I40E_LINK_SPEED_40GB:
        default:
-               incval = I40E_PTP_40GB_INCVAL;
+               mult = 1;
                break;
        }
 
+       /* The increment value is calculated by taking the base 40GbE incvalue
+        * and multiplying it by a factor based on the link speed.
+        */
+       incval = I40E_PTP_40GB_INCVAL * mult;
+
        /* Write the new increment value into the increment register. The
         * hardware will not update the clock until both registers have been
         * written.
@@ -500,14 +494,14 @@ void i40e_ptp_set_increment(struct i40e_pf *pf)
        wr32(hw, I40E_PRTTSYN_INC_H, incval >> 32);
 
        /* Update the base adjustement value. */
-       WRITE_ONCE(pf->ptp_base_adj, incval);
+       WRITE_ONCE(pf->ptp_adj_mult, mult);
        smp_mb(); /* Force the above update. */
 }
 
 /**
  * i40e_ptp_get_ts_config - ioctl interface to read the HW timestamping
  * @pf: Board private structure
- * @ifreq: ioctl data
+ * @ifr: ioctl data
  *
  * Obtain the current hardware timestamping settigs as requested. To do this,
  * keep a shadow copy of the timestamp settings rather than attempting to
@@ -651,7 +645,7 @@ static int i40e_ptp_set_timestamp_mode(struct i40e_pf *pf,
 /**
  * i40e_ptp_set_ts_config - ioctl interface to control the HW timestamping
  * @pf: Board private structure
- * @ifreq: ioctl data
+ * @ifr: ioctl data
  *
  * Respond to the user filter requests and make the appropriate hardware
  * changes here. The XL710 cannot support splitting of the Tx/Rx timestamping
index b3e206e49cc2f907ebd2d3769a3b545511e28949..52e3680c57f8d71da5441fbc7afd76f349bb6485 100644 (file)
@@ -1,29 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Driver
- * Copyright(c) 2013 - 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
 
 #ifndef _I40E_REGISTER_H_
 #define _I40E_REGISTER_H_
index 10c86f63dc52d86326c4b86130baf9d4220ebc2c..77be0702d07c8c044469ac70406b3e26656f75ba 100644 (file)
@@ -1,29 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Driver
- * Copyright(c) 2013 - 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
 
 #ifndef _I40E_STATUS_H_
 #define _I40E_STATUS_H_
index 410ba13bcf21e2c0f1871cbfb9d6c275db6fa10c..424f02077e2e24b016a9b4f2c28da228f28910ee 100644 (file)
@@ -1,26 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
- *
- * Intel(R) 40-10 Gigabit Ethernet Connection Network Driver
- * Copyright(c) 2013 - 2017 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
 
 /* Modeled on trace-events-sample.h */
 
index 87fb27ab9c2466cfba456177622fa1077b544cb6..5efa68de935b8adcb783b77eceb663577da8a094 100644 (file)
@@ -1,29 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Driver
- * Copyright(c) 2013 - 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
 
 #include <linux/prefetch.h>
 #include <net/busy_poll.h>
@@ -495,7 +471,7 @@ static int i40e_add_del_fdir_ipv4(struct i40e_vsi *vsi,
 /**
  * i40e_add_del_fdir - Build raw packets to add/del fdir filter
  * @vsi: pointer to the targeted VSI
- * @cmd: command to get or set RX flow classification rules
+ * @input: filter to add or delete
  * @add: true adds a filter, false removes it
  *
  **/
@@ -713,7 +689,7 @@ void i40e_free_tx_resources(struct i40e_ring *tx_ring)
 
 /**
  * i40e_get_tx_pending - how many tx descriptors not processed
- * @tx_ring: the ring of descriptors
+ * @ring: the ring of descriptors
  * @in_sw: use SW variables
  *
  * Since there is no access to the ring head register
@@ -1795,6 +1771,8 @@ static inline int i40e_ptype_to_htype(u8 ptype)
  * i40e_rx_hash - set the hash value in the skb
  * @ring: descriptor ring
  * @rx_desc: specific descriptor
+ * @skb: skb currently being received and modified
+ * @rx_ptype: Rx packet type
  **/
 static inline void i40e_rx_hash(struct i40e_ring *ring,
                                union i40e_rx_desc *rx_desc,
index 4bf318b8be8580b262109c6bcb98110fab9df6ac..fdd2c55f03a6b5b3abb79691a47199a0ef444433 100644 (file)
@@ -1,29 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Driver
- * Copyright(c) 2013 - 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
 
 #ifndef _I40E_TXRX_H_
 #define _I40E_TXRX_H_
index bfb80092b3525e72404a648c521f437063d9e29b..7df969c59855ceab0bbdd72653120817566a537e 100644 (file)
@@ -1,29 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Driver
- * Copyright(c) 2013 - 2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
 
 #ifndef _I40E_TYPE_H_
 #define _I40E_TYPE_H_
@@ -1318,7 +1294,8 @@ struct i40e_hw_port_stats {
 
 /* Checksum and Shadow RAM pointers */
 #define I40E_SR_NVM_CONTROL_WORD               0x00
-#define I40E_SR_EMP_MODULE_PTR                 0x0F
+#define I40E_EMP_MODULE_PTR                    0x0F
+#define I40E_SR_EMP_MODULE_PTR                 0x48
 #define I40E_SR_PBA_FLAGS                      0x15
 #define I40E_SR_PBA_BLOCK_PTR                  0x16
 #define I40E_SR_BOOT_CONFIG_PTR                        0x17
@@ -1337,6 +1314,8 @@ struct i40e_hw_port_stats {
 #define I40E_SR_PCIE_ALT_MODULE_MAX_SIZE       1024
 #define I40E_SR_CONTROL_WORD_1_SHIFT           0x06
 #define I40E_SR_CONTROL_WORD_1_MASK    (0x03 << I40E_SR_CONTROL_WORD_1_SHIFT)
+#define I40E_SR_CONTROL_WORD_1_NVM_BANK_VALID  BIT(5)
+#define I40E_SR_NVM_MAP_STRUCTURE_TYPE         BIT(12)
 #define I40E_PTR_TYPE                          BIT(15)
 #define I40E_SR_OCP_CFG_WORD0                  0x2B
 #define I40E_SR_OCP_ENABLED                    BIT(15)
@@ -1454,7 +1433,8 @@ enum i40e_reset_type {
 };
 
 /* IEEE 802.1AB LLDP Agent Variables from NVM */
-#define I40E_NVM_LLDP_CFG_PTR          0xD
+#define I40E_NVM_LLDP_CFG_PTR  0x06
+#define I40E_SR_LLDP_CFG_PTR   0x31
 struct i40e_lldp_variables {
        u16 length;
        u16 adminstatus;
index 35173cbe80f7b13b7f58604f04137483ec70fbf4..c6d24eaede184fdd9ad2fde19724af1b9406b9da 100644 (file)
@@ -1,29 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Driver
- * Copyright(c) 2013 - 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
 
 #include "i40e.h"
 
@@ -32,8 +8,8 @@
 /**
  * i40e_vc_vf_broadcast
  * @pf: pointer to the PF structure
- * @opcode: operation code
- * @retval: return value
+ * @v_opcode: operation code
+ * @v_retval: return value
  * @msg: pointer to the msg buffer
  * @msglen: msg length
  *
@@ -1663,6 +1639,7 @@ static int i40e_vc_send_resp_to_vf(struct i40e_vf *vf,
 /**
  * i40e_vc_get_version_msg
  * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
  *
  * called from the VF to request the API version used by the PF
  **/
@@ -1706,7 +1683,6 @@ static void i40e_del_qch(struct i40e_vf *vf)
  * i40e_vc_get_vf_resources_msg
  * @vf: pointer to the VF info
  * @msg: pointer to the msg buffer
- * @msglen: msg length
  *
  * called from the VF to request its resources
  **/
@@ -1830,8 +1806,6 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg)
 /**
  * i40e_vc_reset_vf_msg
  * @vf: pointer to the VF info
- * @msg: pointer to the msg buffer
- * @msglen: msg length
  *
  * called from the VF to reset itself,
  * unlike other virtchnl messages, PF driver
@@ -2179,6 +2153,51 @@ static int i40e_vc_config_irq_map_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
                                       aq_ret);
 }
 
+/**
+ * i40e_ctrl_vf_tx_rings
+ * @vsi: the SRIOV VSI being configured
+ * @q_map: bit map of the queues to be enabled
+ * @enable: start or stop the queue
+ **/
+static int i40e_ctrl_vf_tx_rings(struct i40e_vsi *vsi, unsigned long q_map,
+                                bool enable)
+{
+       struct i40e_pf *pf = vsi->back;
+       int ret = 0;
+       u16 q_id;
+
+       for_each_set_bit(q_id, &q_map, I40E_MAX_VF_QUEUES) {
+               ret = i40e_control_wait_tx_q(vsi->seid, pf,
+                                            vsi->base_queue + q_id,
+                                            false /*is xdp*/, enable);
+               if (ret)
+                       break;
+       }
+       return ret;
+}
+
+/**
+ * i40e_ctrl_vf_rx_rings
+ * @vsi: the SRIOV VSI being configured
+ * @q_map: bit map of the queues to be enabled
+ * @enable: start or stop the queue
+ **/
+static int i40e_ctrl_vf_rx_rings(struct i40e_vsi *vsi, unsigned long q_map,
+                                bool enable)
+{
+       struct i40e_pf *pf = vsi->back;
+       int ret = 0;
+       u16 q_id;
+
+       for_each_set_bit(q_id, &q_map, I40E_MAX_VF_QUEUES) {
+               ret = i40e_control_wait_rx_q(pf, vsi->base_queue + q_id,
+                                            enable);
+               if (ret)
+                       break;
+       }
+       return ret;
+}
+
 /**
  * i40e_vc_enable_queues_msg
  * @vf: pointer to the VF info
@@ -2211,8 +2230,17 @@ static int i40e_vc_enable_queues_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
                goto error_param;
        }
 
-       if (i40e_vsi_start_rings(pf->vsi[vf->lan_vsi_idx]))
+       /* Use the queue bit map sent by the VF */
+       if (i40e_ctrl_vf_rx_rings(pf->vsi[vf->lan_vsi_idx], vqs->rx_queues,
+                                 true)) {
                aq_ret = I40E_ERR_TIMEOUT;
+               goto error_param;
+       }
+       if (i40e_ctrl_vf_tx_rings(pf->vsi[vf->lan_vsi_idx], vqs->tx_queues,
+                                 true)) {
+               aq_ret = I40E_ERR_TIMEOUT;
+               goto error_param;
+       }
 
        /* need to start the rings for additional ADq VSI's as well */
        if (vf->adq_enabled) {
@@ -2260,8 +2288,17 @@ static int i40e_vc_disable_queues_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
                goto error_param;
        }
 
-       i40e_vsi_stop_rings(pf->vsi[vf->lan_vsi_idx]);
-
+       /* Use the queue bit map sent by the VF */
+       if (i40e_ctrl_vf_tx_rings(pf->vsi[vf->lan_vsi_idx], vqs->tx_queues,
+                                 false)) {
+               aq_ret = I40E_ERR_TIMEOUT;
+               goto error_param;
+       }
+       if (i40e_ctrl_vf_rx_rings(pf->vsi[vf->lan_vsi_idx], vqs->rx_queues,
+                                 false)) {
+               aq_ret = I40E_ERR_TIMEOUT;
+               goto error_param;
+       }
 error_param:
        /* send the response to the VF */
        return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_DISABLE_QUEUES,
@@ -3556,15 +3593,16 @@ static int i40e_vc_del_qch_msg(struct i40e_vf *vf, u8 *msg)
  * i40e_vc_process_vf_msg
  * @pf: pointer to the PF structure
  * @vf_id: source VF id
+ * @v_opcode: operation code
+ * @v_retval: unused return value code
  * @msg: pointer to the msg buffer
  * @msglen: msg length
- * @msghndl: msg handle
  *
  * called from the common aeq/arq handler to
  * process request from VF
  **/
 int i40e_vc_process_vf_msg(struct i40e_pf *pf, s16 vf_id, u32 v_opcode,
-                          u32 v_retval, u8 *msg, u16 msglen)
+                          u32 __always_unused v_retval, u8 *msg, u16 msglen)
 {
        struct i40e_hw *hw = &pf->hw;
        int local_vf_id = vf_id - (s16)hw->func_caps.vf_base_id;
@@ -4015,7 +4053,8 @@ int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, int vf_id,
  * i40e_ndo_set_vf_bw
  * @netdev: network interface device structure
  * @vf_id: VF identifier
- * @tx_rate: Tx rate
+ * @min_tx_rate: Minimum Tx rate
+ * @max_tx_rate: Maximum Tx rate
  *
  * configure VF Tx rate
  **/
index 57f727bb9e36e154d817018b5b6e32c9f1dee165..bf67d62e2b5fd0b4ddb53a82c8f0a8e9081e1ef5 100644 (file)
@@ -1,29 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Driver
- * Copyright(c) 2013 - 2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
 
 #ifndef _I40E_VIRTCHNL_PF_H_
 #define _I40E_VIRTCHNL_PF_H_
index 1e89c5487676a06543b265f6ace4feb22ebbcbc1..3c5c6e9622805c3f890c683b3a87aa37f10b241e 100644 (file)
@@ -1,29 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
-################################################################################
-#
-# Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
-# Copyright(c) 2013 - 2014 Intel Corporation.
-#
-# This program is free software; you can redistribute it and/or modify it
-# under the terms and conditions of the GNU General Public License,
-# version 2, as published by the Free Software Foundation.
-#
-# This program is distributed in the hope it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-# more details.
-#
-# You should have received a copy of the GNU General Public License along
-# with this program.  If not, see <http://www.gnu.org/licenses/>.
-#
-# The full GNU General Public License is included in this distribution in
-# the file called "COPYING".
-#
-# Contact Information:
-# e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-# Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-#
-################################################################################
+# Copyright(c) 2013 - 2018 Intel Corporation.
 
 #
 ## Makefile for the Intel(R) 40GbE VF driver
index 6fd677efa9da0ce414c1dda5263af3e65c0e9d6f..c355120dfdfd30d1d8ccdfecd04925588e48b845 100644 (file)
@@ -1,29 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
- * Copyright(c) 2013 - 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
 
 #include "i40e_status.h"
 #include "i40e_type.h"
index a7137c1652567e14b76972b05c3d836b924d760a..1f264b9b6805e7d1f376af34249f24d656461737 100644 (file)
@@ -1,29 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
- * Copyright(c) 2013 - 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
 
 #ifndef _I40E_ADMINQ_H_
 #define _I40E_ADMINQ_H_
index 439e718820495a27b819c3ebb91aabb1e55186d6..aa81e87cd4713ab68e08c073bcd2856a375df0b5 100644 (file)
@@ -1,29 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
- * Copyright(c) 2013 - 2017 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
 
 #ifndef _I40E_ADMINQ_CMD_H_
 #define _I40E_ADMINQ_CMD_H_
index 7e0fddd8af36e83700481bfd3dba076b11471835..cb8689222c8b77c957be573b6fad8e6d75bcaa4d 100644 (file)
@@ -1,29 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
- * Copyright(c) 2013 - 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
 
 #ifndef _I40E_ALLOC_H_
 #define _I40E_ALLOC_H_
index 67140cdbcd7ae91aa433f195a13af95d14fadd2c..9cef549713123893869e56eebfeff8a24b8cda6c 100644 (file)
@@ -1,29 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
- * Copyright(c) 2013 - 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
 
 #include "i40e_type.h"
 #include "i40e_adminq.h"
@@ -1255,6 +1231,7 @@ i40e_status_code i40evf_aq_write_ddp(struct i40e_hw *hw, void *buff,
  * @hw: pointer to the hw struct
  * @buff: command buffer (size in bytes = buff_size)
  * @buff_size: buffer size in bytes
+ * @flags: AdminQ command flags
  * @cmd_details: pointer to command details structure or NULL
  **/
 enum
index 352dd3f3eb6a3c58a1b17bbeb2ee9f5c7a9c8b66..f300bf271824acbc515ea4faf2d093b3328f99e2 100644 (file)
@@ -1,29 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
- * Copyright(c) 2013 - 2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
 
 #ifndef _I40E_DEVIDS_H_
 #define _I40E_DEVIDS_H_
index 7432596164f41bcf69ce1482b7d77bf309f51b97..1c78de838857be788669cc6873984d0ac1cd49d5 100644 (file)
@@ -1,29 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
- * Copyright(c) 2013 - 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
 
 #ifndef _I40E_HMC_H_
 #define _I40E_HMC_H_
index ddac0e4908d3850f5f841f12bb9b4540f8883abf..82b00f70a6320a1dd9d957db006e08410cebf050 100644 (file)
@@ -1,29 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
- * Copyright(c) 2013 - 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
 
 #ifndef _I40E_LAN_HMC_H_
 #define _I40E_LAN_HMC_H_
index 8668ad6c1a6552d95fb3252ed4981a3a84d68f64..3ddddb46455b2fbdeac7c14426fa74de61b1dec1 100644 (file)
@@ -1,29 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
- * Copyright(c) 2013 - 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
 
 #ifndef _I40E_OSDEP_H_
 #define _I40E_OSDEP_H_
index 72501bd0f1a9c36753d9772f02e8711dae78009f..a358f4b9d5aa639cc6c5b927f38551cad2b0fc05 100644 (file)
@@ -1,29 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
- * Copyright(c) 2013 - 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
 
 #ifndef _I40E_PROTOTYPE_H_
 #define _I40E_PROTOTYPE_H_
index c9c9356597583c0465a0c57b2cf391fbbda85612..49e1f57d99cc6703a97ecc5cb7a580a9138113ef 100644 (file)
@@ -1,29 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
- * Copyright(c) 2013 - 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
 
 #ifndef _I40E_REGISTER_H_
 #define _I40E_REGISTER_H_
index 0d7993ecb99a6bce68a3c79913bd9e4fac55d842..77be0702d07c8c044469ac70406b3e26656f75ba 100644 (file)
@@ -1,29 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
- * Copyright(c) 2013 - 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
 
 #ifndef _I40E_STATUS_H_
 #define _I40E_STATUS_H_
index ece01dd12a3c43a2f872248986a913d47c3d9277..d7a4e68820a895cda92981b8f1fbb9c58d7d42af 100644 (file)
@@ -1,26 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
- *
- * Intel(R) 40-10 Gigabit Ethernet Virtual Function Driver
- * Copyright(c) 2013 - 2017 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
 
 /* Modeled on trace-events-sample.h */
 
index 12bd937861e794185295429f8d6f820ce5da94d6..a9730711e2579da0ed1621f57d99597868ca2c83 100644 (file)
@@ -1,29 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
- * Copyright(c) 2013 - 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
 
 #include <linux/prefetch.h>
 #include <net/busy_poll.h>
@@ -129,7 +105,7 @@ void i40evf_free_tx_resources(struct i40e_ring *tx_ring)
 
 /**
  * i40evf_get_tx_pending - how many Tx descriptors not processed
- * @tx_ring: the ring of descriptors
+ * @ring: the ring of descriptors
  * @in_sw: is tx_pending being checked in SW or HW
  *
  * Since there is no access to the ring head register
@@ -1070,6 +1046,8 @@ static inline int i40e_ptype_to_htype(u8 ptype)
  * i40e_rx_hash - set the hash value in the skb
  * @ring: descriptor ring
  * @rx_desc: specific descriptor
+ * @skb: skb currently being received and modified
+ * @rx_ptype: Rx packet type
  **/
 static inline void i40e_rx_hash(struct i40e_ring *ring,
                                union i40e_rx_desc *rx_desc,
index 5790897eae2e722aafe53eb6ff4073d7799dcbcb..3b5a63b3236ebdbfeb4bd82b3dfd5e90f2596858 100644 (file)
@@ -1,29 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
- * Copyright(c) 2013 - 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
 
 #ifndef _I40E_TXRX_H_
 #define _I40E_TXRX_H_
index 449de4b0058e6593c00979da82671c77a67c26aa..094387db3c110a707d21db6b525b984fc54c5165 100644 (file)
@@ -1,29 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
- * Copyright(c) 2013 - 2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
 
 #ifndef _I40E_TYPE_H_
 #define _I40E_TYPE_H_
@@ -1257,7 +1233,8 @@ struct i40e_hw_port_stats {
 
 /* Checksum and Shadow RAM pointers */
 #define I40E_SR_NVM_CONTROL_WORD               0x00
-#define I40E_SR_EMP_MODULE_PTR                 0x0F
+#define I40E_EMP_MODULE_PTR                    0x0F
+#define I40E_SR_EMP_MODULE_PTR                 0x48
 #define I40E_NVM_OEM_VER_OFF                   0x83
 #define I40E_SR_NVM_DEV_STARTER_VERSION                0x18
 #define I40E_SR_NVM_WAKE_ON_LAN                        0x19
@@ -1273,6 +1250,9 @@ struct i40e_hw_port_stats {
 #define I40E_SR_PCIE_ALT_MODULE_MAX_SIZE       1024
 #define I40E_SR_CONTROL_WORD_1_SHIFT           0x06
 #define I40E_SR_CONTROL_WORD_1_MASK    (0x03 << I40E_SR_CONTROL_WORD_1_SHIFT)
+#define I40E_SR_CONTROL_WORD_1_NVM_BANK_VALID  BIT(5)
+#define I40E_SR_NVM_MAP_STRUCTURE_TYPE         BIT(12)
+#define I40E_PTR_TYPE                          BIT(15)
 
 /* Shadow RAM related */
 #define I40E_SR_SECTOR_SIZE_IN_WORDS   0x800
@@ -1386,6 +1366,10 @@ enum i40e_reset_type {
        I40E_RESET_EMPR         = 3,
 };
 
+/* IEEE 802.1AB LLDP Agent Variables from NVM */
+#define I40E_NVM_LLDP_CFG_PTR  0x06
+#define I40E_SR_LLDP_CFG_PTR   0x31
+
 /* RSS Hash Table Size */
 #define I40E_PFQF_CTL_0_HASHLUTSIZE_512        0x00010000
 
index 3a7a1e77bf39f67013f5660de14c1cf411b28732..96e537a350004e8310abeea6bb70955c03942871 100644 (file)
@@ -1,29 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
- * Copyright(c) 2013 - 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
 
 #ifndef _I40EVF_H_
 #define _I40EVF_H_
@@ -105,7 +81,6 @@ struct i40e_vsi {
 #define I40E_TX_DESC(R, i) (&(((struct i40e_tx_desc *)((R)->desc))[i]))
 #define I40E_TX_CTXTDESC(R, i) \
        (&(((struct i40e_tx_context_desc *)((R)->desc))[i]))
-#define MAX_QUEUES 16
 #define I40EVF_MAX_REQ_QUEUES 4
 
 #define I40EVF_HKEY_ARRAY_SIZE ((I40E_VFQF_HKEY_MAX_INDEX + 1) * 4)
index da60ce12b33d31660c6c00c0425b6ce5919dadd2..3cc9d60d0d72ecc0771324d472f24e575c2bc5c4 100644 (file)
@@ -1,4 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
+
 #include <linux/list.h>
 #include <linux/errno.h>
 
@@ -176,7 +178,6 @@ void i40evf_notify_client_close(struct i40e_vsi *vsi, bool reset)
 /**
  * i40evf_client_add_instance - add a client instance to the instance list
  * @adapter: pointer to the board struct
- * @client: pointer to a client struct in the client list.
  *
  * Returns cinst ptr on success, NULL on failure
  **/
@@ -234,7 +235,6 @@ i40evf_client_add_instance(struct i40evf_adapter *adapter)
 /**
  * i40evf_client_del_instance - removes a client instance from the list
  * @adapter: pointer to the board struct
- * @client: pointer to the client struct
  *
  **/
 static
@@ -438,7 +438,7 @@ static u32 i40evf_client_virtchnl_send(struct i40e_info *ldev,
  * i40evf_client_setup_qvlist - send a message to the PF to setup iwarp qv map
  * @ldev: pointer to L2 context.
  * @client: Client pointer.
- * @qv_info: queue and vector list
+ * @qvlist_info: queue and vector list
  *
  * Return 0 on success or < 0 on error
  **/
index 15a10da5bd4aea957f28a97aa542cb8619d8d0e1..5585f362048a7914f8af91b03a5588afe0737692 100644 (file)
@@ -1,6 +1,8 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _I40E_CLIENT_H_
-#define _I40E_CLIENT_H_
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
+
+#ifndef _I40EVF_CLIENT_H_
+#define _I40EVF_CLIENT_H_
 
 #define I40EVF_CLIENT_STR_LENGTH 10
 
@@ -164,4 +166,4 @@ struct i40e_client {
 /* used by clients */
 int i40evf_register_client(struct i40e_client *client);
 int i40evf_unregister_client(struct i40e_client *client);
-#endif /* _I40E_CLIENT_H_ */
+#endif /* _I40EVF_CLIENT_H_ */
index dc4cde274fb82872a7d3b4dc7021c9e909e40e86..69efe0aec76a449db0c6f4fc55af28e4ca0e75fd 100644 (file)
@@ -1,29 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
- * Copyright(c) 2013 - 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
 
 /* ethtool support for i40evf */
 #include "i40evf.h"
@@ -226,7 +202,7 @@ static void i40evf_get_strings(struct net_device *netdev, u32 sset, u8 *data)
 
 /**
  * i40evf_get_priv_flags - report device private flags
- * @dev: network interface device structure
+ * @netdev: network interface device structure
  *
  * The get string set count and the string set should be matched for each
  * flag returned.  Add new strings for each flag to the i40e_gstrings_priv_flags
@@ -253,7 +229,7 @@ static u32 i40evf_get_priv_flags(struct net_device *netdev)
 
 /**
  * i40evf_set_priv_flags - set private flags
- * @dev: network interface device structure
+ * @netdev: network interface device structure
  * @flags: bit flags to be set
  **/
 static int i40evf_set_priv_flags(struct net_device *netdev, u32 flags)
@@ -627,6 +603,7 @@ static int i40evf_set_per_queue_coalesce(struct net_device *netdev,
  * i40evf_get_rxnfc - command to get RX flow classification rules
  * @netdev: network interface device structure
  * @cmd: ethtool rxnfc command
+ * @rule_locs: pointer to store rule locations
  *
  * Returns Success if the command is supported.
  **/
@@ -746,6 +723,7 @@ static u32 i40evf_get_rxfh_indir_size(struct net_device *netdev)
  * @netdev: network interface device structure
  * @indir: indirection table
  * @key: hash key
+ * @hfunc: hash function in use
  *
  * Reads the indirection table directly from the hardware. Always returns 0.
  **/
@@ -774,6 +752,7 @@ static int i40evf_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
  * @netdev: network interface device structure
  * @indir: indirection table
  * @key: hash key
+ * @hfunc: hash function to use
  *
  * Returns -EINVAL if the table specifies an inavlid queue id, otherwise
  * returns 0 after programming the table.
index 5f71532be7f143cbaea948e7ece7e82bc3d19280..a7b87f93541138c497056f9c91dacbd91c519fc5 100644 (file)
@@ -1,29 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
- * Copyright(c) 2013 - 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
 
 #include "i40evf.h"
 #include "i40e_prototype.h"
@@ -473,6 +449,7 @@ static void i40evf_irq_affinity_release(struct kref *ref) {}
 /**
  * i40evf_request_traffic_irqs - Initialize MSI-X interrupts
  * @adapter: board private structure
+ * @basename: device basename
  *
  * Allocates MSI-X vectors for tx and rx handling, and requests
  * interrupts from the kernel.
@@ -705,7 +682,7 @@ i40evf_vlan_filter *i40evf_add_vlan(struct i40evf_adapter *adapter, u16 vlan)
 
        f = i40evf_find_vlan(adapter, vlan);
        if (!f) {
-               f = kzalloc(sizeof(*f), GFP_ATOMIC);
+               f = kzalloc(sizeof(*f), GFP_KERNEL);
                if (!f)
                        goto clearout;
 
@@ -745,6 +722,7 @@ static void i40evf_del_vlan(struct i40evf_adapter *adapter, u16 vlan)
 /**
  * i40evf_vlan_rx_add_vid - Add a VLAN filter to a device
  * @netdev: network device struct
+ * @proto: unused protocol data
  * @vid: VLAN tag
  **/
 static int i40evf_vlan_rx_add_vid(struct net_device *netdev,
@@ -762,6 +740,7 @@ static int i40evf_vlan_rx_add_vid(struct net_device *netdev,
 /**
  * i40evf_vlan_rx_kill_vid - Remove a VLAN filter from a device
  * @netdev: network device struct
+ * @proto: unused protocol data
  * @vid: VLAN tag
  **/
 static int i40evf_vlan_rx_kill_vid(struct net_device *netdev,
@@ -1946,7 +1925,8 @@ static void i40evf_reset_task(struct work_struct *work)
         * ndo_open() returning, so we can't assume it means all our open
         * tasks have finished, since we're not holding the rtnl_lock here.
         */
-       running = (adapter->state == __I40EVF_RUNNING);
+       running = ((adapter->state == __I40EVF_RUNNING) ||
+                  (adapter->state == __I40EVF_RESETTING));
 
        if (running) {
                netif_carrier_off(netdev);
@@ -2352,7 +2332,7 @@ static int i40evf_validate_ch_config(struct i40evf_adapter *adapter,
                total_max_rate += tx_rate;
                num_qps += mqprio_qopt->qopt.count[i];
        }
-       if (num_qps > MAX_QUEUES)
+       if (num_qps > I40EVF_MAX_REQ_QUEUES)
                return -EINVAL;
 
        ret = i40evf_validate_tx_bandwidth(adapter, total_max_rate);
@@ -3160,7 +3140,7 @@ static int i40evf_set_features(struct net_device *netdev,
 /**
  * i40evf_features_check - Validate encapsulated packet conforms to limits
  * @skb: skb buff
- * @netdev: This physical port's netdev
+ * @dev: This physical port's netdev
  * @features: Offload features that the stack believes apply
  **/
 static netdev_features_t i40evf_features_check(struct sk_buff *skb,
@@ -3378,6 +3358,24 @@ int i40evf_process_config(struct i40evf_adapter *adapter)
        if (vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN)
                netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
 
+       /* Do not turn on offloads when they are requested to be turned off.
+        * TSO needs minimum 576 bytes to work correctly.
+        */
+       if (netdev->wanted_features) {
+               if (!(netdev->wanted_features & NETIF_F_TSO) ||
+                   netdev->mtu < 576)
+                       netdev->features &= ~NETIF_F_TSO;
+               if (!(netdev->wanted_features & NETIF_F_TSO6) ||
+                   netdev->mtu < 576)
+                       netdev->features &= ~NETIF_F_TSO6;
+               if (!(netdev->wanted_features & NETIF_F_TSO_ECN))
+                       netdev->features &= ~NETIF_F_TSO_ECN;
+               if (!(netdev->wanted_features & NETIF_F_GRO))
+                       netdev->features &= ~NETIF_F_GRO;
+               if (!(netdev->wanted_features & NETIF_F_GSO))
+                       netdev->features &= ~NETIF_F_GSO;
+       }
+
        adapter->vsi.id = adapter->vsi_res->vsi_id;
 
        adapter->vsi.back = adapter;
@@ -3692,7 +3690,8 @@ static int i40evf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
        pci_set_master(pdev);
 
-       netdev = alloc_etherdev_mq(sizeof(struct i40evf_adapter), MAX_QUEUES);
+       netdev = alloc_etherdev_mq(sizeof(struct i40evf_adapter),
+                                  I40EVF_MAX_REQ_QUEUES);
        if (!netdev) {
                err = -ENOMEM;
                goto err_alloc_etherdev;
index 26a59890532f725ea6d4c951db00fc6295f5ec03..565677de5ba376184d5acbe32644f13207d2767e 100644 (file)
@@ -1,29 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
- * Copyright(c) 2013 - 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
 
 #include "i40evf.h"
 #include "i40e_prototype.h"
@@ -179,8 +155,7 @@ int i40evf_send_vf_config_msg(struct i40evf_adapter *adapter)
 
 /**
  * i40evf_get_vf_config
- * @hw: pointer to the hardware structure
- * @len: length of buffer
+ * @adapter: private adapter structure
  *
  * Get VF configuration from PF and populate hw structure. Must be called after
  * admin queue is initialized. Busy waits until response is received from PF,
@@ -423,8 +398,6 @@ int i40evf_request_queues(struct i40evf_adapter *adapter, int num)
 /**
  * i40evf_add_ether_addrs
  * @adapter: adapter structure
- * @addrs: the MAC address filters to add (contiguous)
- * @count: number of filters
  *
  * Request that the PF add one or more addresses to our filters.
  **/
@@ -497,8 +470,6 @@ void i40evf_add_ether_addrs(struct i40evf_adapter *adapter)
 /**
  * i40evf_del_ether_addrs
  * @adapter: adapter structure
- * @addrs: the MAC address filters to remove (contiguous)
- * @count: number of filtes
  *
  * Request that the PF remove one or more addresses from our filters.
  **/
@@ -571,8 +542,6 @@ void i40evf_del_ether_addrs(struct i40evf_adapter *adapter)
 /**
  * i40evf_add_vlans
  * @adapter: adapter structure
- * @vlans: the VLANs to add
- * @count: number of VLANs
  *
  * Request that the PF add one or more VLAN filters to our VSI.
  **/
@@ -643,8 +612,6 @@ void i40evf_add_vlans(struct i40evf_adapter *adapter)
 /**
  * i40evf_del_vlans
  * @adapter: adapter structure
- * @vlans: the VLANs to remove
- * @count: number of VLANs
  *
  * Request that the PF remove one or more VLAN filters from our VSI.
  **/
index 5b13ca1bd85f6ecc10df136f3aaf2cba78ce979a..7541ec2270b3708d4e5e9362d0ae8a1f5e09318d 100644 (file)
@@ -586,7 +586,7 @@ struct ice_sw_rule_lg_act {
 #define ICE_LG_ACT_MIRROR_VSI_ID_S     3
 #define ICE_LG_ACT_MIRROR_VSI_ID_M     (0x3FF << ICE_LG_ACT_MIRROR_VSI_ID_S)
 
-       /* Action type = 5 - Large Action */
+       /* Action type = 5 - Generic Value */
 #define ICE_LG_ACT_GENERIC             0x5
 #define ICE_LG_ACT_GENERIC_VALUE_S     3
 #define ICE_LG_ACT_GENERIC_VALUE_M     (0xFFFF << ICE_LG_ACT_GENERIC_VALUE_S)
@@ -1049,7 +1049,9 @@ struct ice_aqc_set_event_mask {
  * NVM Update commands (indirect 0x0703)
  */
 struct ice_aqc_nvm {
-       u8      cmd_flags;
+       __le16 offset_low;
+       u8 offset_high;
+       u8 cmd_flags;
 #define ICE_AQC_NVM_LAST_CMD           BIT(0)
 #define ICE_AQC_NVM_PCIR_REQ           BIT(0)  /* Used by NVM Update reply */
 #define ICE_AQC_NVM_PRESERVATION_S     1
@@ -1058,12 +1060,11 @@ struct ice_aqc_nvm {
 #define ICE_AQC_NVM_PRESERVE_ALL       BIT(1)
 #define ICE_AQC_NVM_PRESERVE_SELECTED  (3 << CSR_AQ_NVM_PRESERVATION_S)
 #define ICE_AQC_NVM_FLASH_ONLY         BIT(7)
-       u8      module_typeid;
-       __le16  length;
+       __le16 module_typeid;
+       __le16 length;
 #define ICE_AQC_NVM_ERASE_LEN  0xFFFF
-       __le32  offset;
-       __le32  addr_high;
-       __le32  addr_low;
+       __le32 addr_high;
+       __le32 addr_low;
 };
 
 /* Get/Set RSS key (indirect 0x0B04/0x0B02) */
index 21977ec984c4a1f193d7b68f3dce278edae02986..71d032cc5fa7d7ee8d6af579ca3d0e7813de05b5 100644 (file)
@@ -78,6 +78,7 @@ ice_aq_manage_mac_read(struct ice_hw *hw, void *buf, u16 buf_size,
        struct ice_aq_desc desc;
        enum ice_status status;
        u16 flags;
+       u8 i;
 
        cmd = &desc.params.mac_read;
 
@@ -98,8 +99,16 @@ ice_aq_manage_mac_read(struct ice_hw *hw, void *buf, u16 buf_size,
                return ICE_ERR_CFG;
        }
 
-       ether_addr_copy(hw->port_info->mac.lan_addr, resp->mac_addr);
-       ether_addr_copy(hw->port_info->mac.perm_addr, resp->mac_addr);
+       /* A single port can report up to two (LAN and WoL) addresses */
+       for (i = 0; i < cmd->num_addr; i++)
+               if (resp[i].addr_type == ICE_AQC_MAN_MAC_ADDR_TYPE_LAN) {
+                       ether_addr_copy(hw->port_info->mac.lan_addr,
+                                       resp[i].mac_addr);
+                       ether_addr_copy(hw->port_info->mac.perm_addr,
+                                       resp[i].mac_addr);
+                       break;
+               }
+
        return 0;
 }
 
@@ -464,9 +473,12 @@ enum ice_status ice_init_hw(struct ice_hw *hw)
        if (status)
                goto err_unroll_sched;
 
-       /* Get port MAC information */
-       mac_buf_len = sizeof(struct ice_aqc_manage_mac_read_resp);
-       mac_buf = devm_kzalloc(ice_hw_to_dev(hw), mac_buf_len, GFP_KERNEL);
+       /* Get MAC information */
+       /* A single port can report up to two (LAN and WoL) addresses */
+       mac_buf = devm_kcalloc(ice_hw_to_dev(hw), 2,
+                              sizeof(struct ice_aqc_manage_mac_read_resp),
+                              GFP_KERNEL);
+       mac_buf_len = 2 * sizeof(struct ice_aqc_manage_mac_read_resp);
 
        if (!mac_buf) {
                status = ICE_ERR_NO_MEMORY;
index 5909a4407e38f912345d1285d40919903a85294d..7c511f144ed60d92c16f0f526cfb5b03a3c2a59e 100644 (file)
@@ -1014,10 +1014,10 @@ ice_clean_rq_elem(struct ice_hw *hw, struct ice_ctl_q_info *cq,
        desc = ICE_CTL_Q_DESC(cq->rq, ntc);
        desc_idx = ntc;
 
+       cq->rq_last_status = (enum ice_aq_err)le16_to_cpu(desc->retval);
        flags = le16_to_cpu(desc->flags);
        if (flags & ICE_AQ_FLAG_ERR) {
                ret_code = ICE_ERR_AQ_ERROR;
-               cq->rq_last_status = (enum ice_aq_err)le16_to_cpu(desc->retval);
                ice_debug(hw, ICE_DBG_AQ_MSG,
                          "Control Receive Queue Event received with error 0x%x\n",
                          cq->rq_last_status);
index 1b9e2ef48a9dc6ed1a915fc9905b33c53bc22bfd..499904874b3ff863554c315add28a03f3a260eeb 100644 (file)
 #define PFINT_FW_CTL_CAUSE_ENA_S       30
 #define PFINT_FW_CTL_CAUSE_ENA_M       BIT(PFINT_FW_CTL_CAUSE_ENA_S)
 #define PFINT_OICR                     0x0016CA00
-#define PFINT_OICR_INTEVENT_S          0
-#define PFINT_OICR_INTEVENT_M          BIT(PFINT_OICR_INTEVENT_S)
 #define PFINT_OICR_HLP_RDY_S           14
 #define PFINT_OICR_HLP_RDY_M           BIT(PFINT_OICR_HLP_RDY_S)
 #define PFINT_OICR_CPM_RDY_S           15
index 210b7910f1cd1947f000feec0a313273bdaae52c..5299caf55a7f2b44772988d9f4eeb70b27c51cc4 100644 (file)
@@ -1722,9 +1722,6 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data)
        oicr = rd32(hw, PFINT_OICR);
        ena_mask = rd32(hw, PFINT_OICR_ENA);
 
-       if (!(oicr & PFINT_OICR_INTEVENT_M))
-               goto ena_intr;
-
        if (oicr & PFINT_OICR_GRST_M) {
                u32 reset;
                /* we have a reset warning */
@@ -1782,7 +1779,6 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data)
        }
        ret = IRQ_HANDLED;
 
-ena_intr:
        /* re-enable interrupt causes that are not handled during this pass */
        wr32(hw, PFINT_OICR_ENA, ena_mask);
        if (!test_bit(__ICE_DOWN, pf->state)) {
index fa7a69ac92b07ce6ae883d8fa429a829c4de4bbc..92da0a626ce0b38395633ff90df504f8f88f40c8 100644 (file)
@@ -16,7 +16,7 @@
  * Read the NVM using the admin queue commands (0x0701)
  */
 static enum ice_status
-ice_aq_read_nvm(struct ice_hw *hw, u8 module_typeid, u32 offset, u16 length,
+ice_aq_read_nvm(struct ice_hw *hw, u16 module_typeid, u32 offset, u16 length,
                void *data, bool last_command, struct ice_sq_cd *cd)
 {
        struct ice_aq_desc desc;
@@ -33,8 +33,9 @@ ice_aq_read_nvm(struct ice_hw *hw, u8 module_typeid, u32 offset, u16 length,
        /* If this is the last command in a series, set the proper flag. */
        if (last_command)
                cmd->cmd_flags |= ICE_AQC_NVM_LAST_CMD;
-       cmd->module_typeid = module_typeid;
-       cmd->offset = cpu_to_le32(offset);
+       cmd->module_typeid = cpu_to_le16(module_typeid);
+       cmd->offset_low = cpu_to_le16(offset & 0xFFFF);
+       cmd->offset_high = (offset >> 16) & 0xFF;
        cmd->length = cpu_to_le16(length);
 
        return ice_aq_send_cmd(hw, &desc, data, length, cd);
index f16ff3e4a84043e0b8a9a12d465cc4214e4484d5..2e6c1d92cc8884b2bb9755483f82c421b5d9e6f1 100644 (file)
@@ -751,14 +751,14 @@ ice_sched_add_nodes_to_layer(struct ice_port_info *pi,
        u16 num_added = 0;
        u32 temp;
 
+       *num_nodes_added = 0;
+
        if (!num_nodes)
                return status;
 
        if (!parent || layer < hw->sw_entry_point_layer)
                return ICE_ERR_PARAM;
 
-       *num_nodes_added = 0;
-
        /* max children per node per layer */
        max_child_nodes =
            le16_to_cpu(hw->layer_info[parent->tx_sched_layer].max_children);
index c48583e98ac1b45b2f39f31fe721a8c4c5f1dcd6..394c1e0656b9ec2c2183cc50ac82e9b705b5ac7e 100644 (file)
@@ -1,31 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
-################################################################################
-#
-# Intel 82575 PCI-Express Ethernet Linux driver
-# Copyright(c) 1999 - 2014 Intel Corporation.
-#
-# This program is free software; you can redistribute it and/or modify it
-# under the terms and conditions of the GNU General Public License,
-# version 2, as published by the Free Software Foundation.
-#
-# This program is distributed in the hope it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-# more details.
-#
-# You should have received a copy of the GNU General Public License along with
-# this program; if not, see <http://www.gnu.org/licenses/>.
-#
-# The full GNU General Public License is included in this distribution in
-# the file called "COPYING".
-#
-# Contact Information:
-# Linux NICS <linux.nics@intel.com>
-# e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-# Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-#
-################################################################################
-
+# Copyright(c) 1999 - 2018 Intel Corporation.
 #
 # Makefile for the Intel(R) 82575 PCI-Express ethernet driver
 #
index dd9b6cac220d40bcfc837a8317138e9089fdf6ca..b13b42e5a1d9925351dde946f8389202adb4bd91 100644 (file)
@@ -1,26 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Intel(R) Gigabit Ethernet Linux driver
- * Copyright(c) 2007-2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 2007 - 2018 Intel Corporation. */
 
 /* e1000_82575
  * e1000_82576
index e53ebe97d709d743d9436f96416cf1aea2deda09..6ad775b1a4c56346e2b306bc1f32c41a7bf01f30 100644 (file)
@@ -1,26 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Intel(R) Gigabit Ethernet Linux driver
- * Copyright(c) 2007-2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 2007 - 2018 Intel Corporation. */
 
 #ifndef _E1000_82575_H_
 #define _E1000_82575_H_
index 98534f765e0eae25e2975cbc027fec0831d44583..252440a418dc4ba0dc0693cfb5a3b4dd69fd6b28 100644 (file)
@@ -1,26 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Intel(R) Gigabit Ethernet Linux driver
- * Copyright(c) 2007-2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 2007 - 2018 Intel Corporation. */
 
 #ifndef _E1000_DEFINES_H_
 #define _E1000_DEFINES_H_
  * manageability enabled, allowing us room for 15 multicast addresses.
  */
 #define E1000_RAH_AV  0x80000000        /* Receive descriptor valid */
+#define E1000_RAH_ASEL_SRC_ADDR 0x00010000
+#define E1000_RAH_QSEL_ENABLE 0x10000000
 #define E1000_RAL_MAC_ADDR_LEN 4
 #define E1000_RAH_MAC_ADDR_LEN 2
 #define E1000_RAH_POOL_MASK 0x03FC0000
index ff835e1e853d9baa3486a0856d81a7523c699bb1..5d87957b2627c06561f978e84c76a82eb9c5cac3 100644 (file)
@@ -1,25 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Intel(R) Gigabit Ethernet Linux driver
- * Copyright(c) 2007-2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 2007 - 2018 Intel Corporation. */
 
 #ifndef _E1000_HW_H_
 #define _E1000_HW_H_
index 6f548247e6d86226d228abb80064657888f8a9b1..c54ebedca6da9a3ddaeff1b43cde2993438c4714 100644 (file)
@@ -1,26 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Intel(R) Gigabit Ethernet Linux driver
- * Copyright(c) 2007-2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 2007 - 2018 Intel Corporation. */
 
 /* e1000_i210
  * e1000_i211
index 56f015ccb2060b7a77f0046080bf0346c30bf027..5c437fdc49eef18d27a60c9caa923dafac4dca39 100644 (file)
@@ -1,26 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Intel(R) Gigabit Ethernet Linux driver
- * Copyright(c) 2007-2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 2007 - 2018 Intel Corporation. */
 
 #ifndef _E1000_I210_H_
 #define _E1000_I210_H_
index 298afa0d9159b04d4843e5f896eca3cb274873a9..79ee0a7472608ec823e29d576a40a29b05237fe1 100644 (file)
@@ -1,26 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Intel(R) Gigabit Ethernet Linux driver
- * Copyright(c) 2007-2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 2007 - 2018 Intel Corporation. */
 
 #include <linux/if_ether.h>
 #include <linux/delay.h>
index 04d80c765aeea291db5ae67999a1a9cd101b3329..6e110f28f92260e0272ff61b689e1cc467c7c7ce 100644 (file)
@@ -1,26 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Intel(R) Gigabit Ethernet Linux driver
- * Copyright(c) 2007-2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 2007 - 2018 Intel Corporation. */
 
 #ifndef _E1000_MAC_H_
 #define _E1000_MAC_H_
index ef42f1689b3b43ec96aef4bfaab56d3324f3658f..46debd991bfe1d84aa15329ed831e08dd711f582 100644 (file)
@@ -1,26 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Intel(R) Gigabit Ethernet Linux driver
- * Copyright(c) 2007-2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 2007 - 2018 Intel Corporation. */
 
 #include "e1000_mbx.h"
 
index 4f0ecd28354d18a752a47cf2ddb8716f9fa179d6..178e60ec71d46ef8ac3434754909d106226d8871 100644 (file)
@@ -1,26 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Intel(R) Gigabit Ethernet Linux driver
- * Copyright(c) 2007-2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 2007 - 2018 Intel Corporation. */
 
 #ifndef _E1000_MBX_H_
 #define _E1000_MBX_H_
index e4596f151cd437d5a56522b87f596676b10777dd..09f4dcb09632a2821cdad81d3a755531afc2e3ea 100644 (file)
@@ -1,25 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Intel(R) Gigabit Ethernet Linux driver
- * Copyright(c) 2007-2014 Intel Corporation.
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 2007 - 2018 Intel Corporation. */
 
 #include <linux/if_ether.h>
 #include <linux/delay.h>
index dde68cd54a5307384d91b8d9774be4d7de1d9968..091cddf4ada80cc74f25dbd76d7b6b001851492e 100644 (file)
@@ -1,26 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Intel(R) Gigabit Ethernet Linux driver
- * Copyright(c) 2007-2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 2007 - 2018 Intel Corporation. */
 
 #ifndef _E1000_NVM_H_
 #define _E1000_NVM_H_
index 4ec61243da82e2b14e595f3ab85db6b1b4b46269..2be0e762ec69b635f89b2386785354f61fbf8ee0 100644 (file)
@@ -1,26 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Intel(R) Gigabit Ethernet Linux driver
- * Copyright(c) 2007-2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 2007 - 2018 Intel Corporation. */
 
 #include <linux/if_ether.h>
 #include <linux/delay.h>
index 856d2cda0643312491b20214b6a32e42552324a2..5894e4b1d0a8489a3f1820858fbd3a70d44f42cc 100644 (file)
@@ -1,26 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Intel(R) Gigabit Ethernet Linux driver
- * Copyright(c) 2007-2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 2007 - 2018 Intel Corporation. */
 
 #ifndef _E1000_PHY_H_
 #define _E1000_PHY_H_
index e8fa8c6530e050460bb0ffd1a9811b03fd043e20..0ad737d2f28910071cdadf9046712a0435eec615 100644 (file)
@@ -1,26 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Intel(R) Gigabit Ethernet Linux driver
- * Copyright(c) 2007-2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 2007 - 2018 Intel Corporation. */
 
 #ifndef _E1000_REGS_H_
 #define _E1000_REGS_H_
index 8dbc399b345e54903cb0bf0704367a0ca57f8c64..9643b5b3d444b2aa19d0f73045d4a72911e24fd4 100644 (file)
@@ -1,26 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Intel(R) Gigabit Ethernet Linux driver
- * Copyright(c) 2007-2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 2007 - 2018 Intel Corporation. */
 
 /* Linux PRO/1000 Ethernet Driver main header file */
 
@@ -442,6 +421,8 @@ struct hwmon_buff {
 enum igb_filter_match_flags {
        IGB_FILTER_FLAG_ETHER_TYPE = 0x1,
        IGB_FILTER_FLAG_VLAN_TCI   = 0x2,
+       IGB_FILTER_FLAG_SRC_MAC_ADDR   = 0x4,
+       IGB_FILTER_FLAG_DST_MAC_ADDR   = 0x8,
 };
 
 #define IGB_MAX_RXNFC_FILTERS 16
@@ -456,11 +437,14 @@ struct igb_nfc_input {
        u8 match_flags;
        __be16 etype;
        __be16 vlan_tci;
+       u8 src_addr[ETH_ALEN];
+       u8 dst_addr[ETH_ALEN];
 };
 
 struct igb_nfc_filter {
        struct hlist_node nfc_node;
        struct igb_nfc_input filter;
+       unsigned long cookie;
        u16 etype_reg_index;
        u16 sw_idx;
        u16 action;
@@ -474,6 +458,8 @@ struct igb_mac_addr {
 
 #define IGB_MAC_STATE_DEFAULT  0x1
 #define IGB_MAC_STATE_IN_USE   0x2
+#define IGB_MAC_STATE_SRC_ADDR 0x4
+#define IGB_MAC_STATE_QUEUE_STEERING 0x8
 
 /* board specific private data structure */
 struct igb_adapter {
@@ -598,6 +584,7 @@ struct igb_adapter {
 
        /* RX network flow classification support */
        struct hlist_head nfc_filter_list;
+       struct hlist_head cls_flower_list;
        unsigned int nfc_filter_count;
        /* lock for RX network flow classification filter */
        spinlock_t nfc_lock;
@@ -739,4 +726,9 @@ int igb_add_filter(struct igb_adapter *adapter,
 int igb_erase_filter(struct igb_adapter *adapter,
                     struct igb_nfc_filter *input);
 
+int igb_add_mac_steering_filter(struct igb_adapter *adapter,
+                               const u8 *addr, u8 queue, u8 flags);
+int igb_del_mac_steering_filter(struct igb_adapter *adapter,
+                               const u8 *addr, u8 queue, u8 flags);
+
 #endif /* _IGB_H_ */
index e77ba0d5866d0cc64aae1aae57568638dc95e751..2d798499d35e5278d8fa80aa751de56a3bb983c2 100644 (file)
@@ -1,26 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Intel(R) Gigabit Ethernet Linux driver
- * Copyright(c) 2007-2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 2007 - 2018 Intel Corporation. */
 
 /* ethtool support for igb */
 
@@ -2495,6 +2474,23 @@ static int igb_get_ethtool_nfc_entry(struct igb_adapter *adapter,
                        fsp->h_ext.vlan_tci = rule->filter.vlan_tci;
                        fsp->m_ext.vlan_tci = htons(VLAN_PRIO_MASK);
                }
+               if (rule->filter.match_flags & IGB_FILTER_FLAG_DST_MAC_ADDR) {
+                       ether_addr_copy(fsp->h_u.ether_spec.h_dest,
+                                       rule->filter.dst_addr);
+                       /* As we only support matching by the full
+                        * mask, return the mask to userspace
+                        */
+                       eth_broadcast_addr(fsp->m_u.ether_spec.h_dest);
+               }
+               if (rule->filter.match_flags & IGB_FILTER_FLAG_SRC_MAC_ADDR) {
+                       ether_addr_copy(fsp->h_u.ether_spec.h_source,
+                                       rule->filter.src_addr);
+                       /* As we only support matching by the full
+                        * mask, return the mask to userspace
+                        */
+                       eth_broadcast_addr(fsp->m_u.ether_spec.h_source);
+               }
+
                return 0;
        }
        return -EINVAL;
@@ -2768,14 +2764,41 @@ static int igb_rxnfc_write_vlan_prio_filter(struct igb_adapter *adapter,
 
 int igb_add_filter(struct igb_adapter *adapter, struct igb_nfc_filter *input)
 {
+       struct e1000_hw *hw = &adapter->hw;
        int err = -EINVAL;
 
+       if (hw->mac.type == e1000_i210 &&
+           !(input->filter.match_flags & ~IGB_FILTER_FLAG_SRC_MAC_ADDR)) {
+               dev_err(&adapter->pdev->dev,
+                       "i210 doesn't support flow classification rules specifying only source addresses.\n");
+               return -EOPNOTSUPP;
+       }
+
        if (input->filter.match_flags & IGB_FILTER_FLAG_ETHER_TYPE) {
                err = igb_rxnfc_write_etype_filter(adapter, input);
                if (err)
                        return err;
        }
 
+       if (input->filter.match_flags & IGB_FILTER_FLAG_DST_MAC_ADDR) {
+               err = igb_add_mac_steering_filter(adapter,
+                                                 input->filter.dst_addr,
+                                                 input->action, 0);
+               err = min_t(int, err, 0);
+               if (err)
+                       return err;
+       }
+
+       if (input->filter.match_flags & IGB_FILTER_FLAG_SRC_MAC_ADDR) {
+               err = igb_add_mac_steering_filter(adapter,
+                                                 input->filter.src_addr,
+                                                 input->action,
+                                                 IGB_MAC_STATE_SRC_ADDR);
+               err = min_t(int, err, 0);
+               if (err)
+                       return err;
+       }
+
        if (input->filter.match_flags & IGB_FILTER_FLAG_VLAN_TCI)
                err = igb_rxnfc_write_vlan_prio_filter(adapter, input);
 
@@ -2824,6 +2847,15 @@ int igb_erase_filter(struct igb_adapter *adapter, struct igb_nfc_filter *input)
                igb_clear_vlan_prio_filter(adapter,
                                           ntohs(input->filter.vlan_tci));
 
+       if (input->filter.match_flags & IGB_FILTER_FLAG_SRC_MAC_ADDR)
+               igb_del_mac_steering_filter(adapter, input->filter.src_addr,
+                                           input->action,
+                                           IGB_MAC_STATE_SRC_ADDR);
+
+       if (input->filter.match_flags & IGB_FILTER_FLAG_DST_MAC_ADDR)
+               igb_del_mac_steering_filter(adapter, input->filter.dst_addr,
+                                           input->action, 0);
+
        return 0;
 }
 
@@ -2865,7 +2897,7 @@ static int igb_update_ethtool_nfc_entry(struct igb_adapter *adapter,
 
        /* add filter to the list */
        if (parent)
-               hlist_add_behind(&parent->nfc_node, &input->nfc_node);
+               hlist_add_behind(&input->nfc_node, &parent->nfc_node);
        else
                hlist_add_head(&input->nfc_node, &adapter->nfc_filter_list);
 
@@ -2905,10 +2937,6 @@ static int igb_add_ethtool_nfc_entry(struct igb_adapter *adapter,
        if ((fsp->flow_type & ~FLOW_EXT) != ETHER_FLOW)
                return -EINVAL;
 
-       if (fsp->m_u.ether_spec.h_proto != ETHER_TYPE_FULL_MASK &&
-           fsp->m_ext.vlan_tci != htons(VLAN_PRIO_MASK))
-               return -EINVAL;
-
        input = kzalloc(sizeof(*input), GFP_KERNEL);
        if (!input)
                return -ENOMEM;
@@ -2918,6 +2946,20 @@ static int igb_add_ethtool_nfc_entry(struct igb_adapter *adapter,
                input->filter.match_flags = IGB_FILTER_FLAG_ETHER_TYPE;
        }
 
+       /* Only support matching addresses by the full mask */
+       if (is_broadcast_ether_addr(fsp->m_u.ether_spec.h_source)) {
+               input->filter.match_flags |= IGB_FILTER_FLAG_SRC_MAC_ADDR;
+               ether_addr_copy(input->filter.src_addr,
+                               fsp->h_u.ether_spec.h_source);
+       }
+
+       /* Only support matching addresses by the full mask */
+       if (is_broadcast_ether_addr(fsp->m_u.ether_spec.h_dest)) {
+               input->filter.match_flags |= IGB_FILTER_FLAG_DST_MAC_ADDR;
+               ether_addr_copy(input->filter.dst_addr,
+                               fsp->h_u.ether_spec.h_dest);
+       }
+
        if ((fsp->flow_type & FLOW_EXT) && fsp->m_ext.vlan_tci) {
                if (fsp->m_ext.vlan_tci != htons(VLAN_PRIO_MASK)) {
                        err = -EINVAL;
index bebe43b3a836647a11af161245906dfd220b9d65..3b83747b27009d9f7e83dff9eaf18e20764978ac 100644 (file)
@@ -1,26 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Intel(R) Gigabit Ethernet Linux driver
- * Copyright(c) 2007-2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 2007 - 2018 Intel Corporation. */
 
 #include "igb.h"
 #include "e1000_82575.h"
index c1c0bc30a16d8196f8319a21dcb1f8d13ef4ac52..78574c06635bb4d3836500daa6e28bc6ba749a7c 100644 (file)
@@ -1,26 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Intel(R) Gigabit Ethernet Linux driver
- * Copyright(c) 2007-2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 2007 - 2018 Intel Corporation. */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
@@ -36,6 +15,7 @@
 #include <net/checksum.h>
 #include <net/ip6_checksum.h>
 #include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
 #include <linux/net_tstamp.h>
 #include <linux/mii.h>
 #include <linux/ethtool.h>
@@ -1700,7 +1680,22 @@ static void igb_configure_cbs(struct igb_adapter *adapter, int queue,
        WARN_ON(hw->mac.type != e1000_i210);
        WARN_ON(queue < 0 || queue > 1);
 
-       if (enable) {
+       if (enable || queue == 0) {
+               /* i210 does not allow the queue 0 to be in the Strict
+                * Priority mode while the Qav mode is enabled, so,
+                * instead of disabling strict priority mode, we give
+                * queue 0 the maximum of credits possible.
+                *
+                * See section 8.12.19 of the i210 datasheet, "Note:
+                * Queue0 QueueMode must be set to 1b when
+                * TransmitMode is set to Qav."
+                */
+               if (queue == 0 && !enable) {
+                       /* max "linkspeed" idleslope in kbps */
+                       idleslope = 1000000;
+                       hicredit = ETH_FRAME_LEN;
+               }
+
                set_tx_desc_fetch_prio(hw, queue, TX_QUEUE_PRIO_HIGH);
                set_queue_mode(hw, queue, QUEUE_MODE_STREAM_RESERVATION);
 
@@ -2498,6 +2493,250 @@ static int igb_offload_cbs(struct igb_adapter *adapter,
        return 0;
 }
 
+#define ETHER_TYPE_FULL_MASK ((__force __be16)~0)
+#define VLAN_PRIO_FULL_MASK (0x07)
+
+static int igb_parse_cls_flower(struct igb_adapter *adapter,
+                               struct tc_cls_flower_offload *f,
+                               int traffic_class,
+                               struct igb_nfc_filter *input)
+{
+       struct netlink_ext_ack *extack = f->common.extack;
+
+       if (f->dissector->used_keys &
+           ~(BIT(FLOW_DISSECTOR_KEY_BASIC) |
+             BIT(FLOW_DISSECTOR_KEY_CONTROL) |
+             BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
+             BIT(FLOW_DISSECTOR_KEY_VLAN))) {
+               NL_SET_ERR_MSG_MOD(extack,
+                                  "Unsupported key used, only BASIC, CONTROL, ETH_ADDRS and VLAN are supported");
+               return -EOPNOTSUPP;
+       }
+
+       if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
+               struct flow_dissector_key_eth_addrs *key, *mask;
+
+               key = skb_flow_dissector_target(f->dissector,
+                                               FLOW_DISSECTOR_KEY_ETH_ADDRS,
+                                               f->key);
+               mask = skb_flow_dissector_target(f->dissector,
+                                                FLOW_DISSECTOR_KEY_ETH_ADDRS,
+                                                f->mask);
+
+               if (!is_zero_ether_addr(mask->dst)) {
+                       if (!is_broadcast_ether_addr(mask->dst)) {
+                               NL_SET_ERR_MSG_MOD(extack, "Only full masks are supported for destination MAC address");
+                               return -EINVAL;
+                       }
+
+                       input->filter.match_flags |=
+                               IGB_FILTER_FLAG_DST_MAC_ADDR;
+                       ether_addr_copy(input->filter.dst_addr, key->dst);
+               }
+
+               if (!is_zero_ether_addr(mask->src)) {
+                       if (!is_broadcast_ether_addr(mask->src)) {
+                               NL_SET_ERR_MSG_MOD(extack, "Only full masks are supported for source MAC address");
+                               return -EINVAL;
+                       }
+
+                       input->filter.match_flags |=
+                               IGB_FILTER_FLAG_SRC_MAC_ADDR;
+                       ether_addr_copy(input->filter.src_addr, key->src);
+               }
+       }
+
+       if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
+               struct flow_dissector_key_basic *key, *mask;
+
+               key = skb_flow_dissector_target(f->dissector,
+                                               FLOW_DISSECTOR_KEY_BASIC,
+                                               f->key);
+               mask = skb_flow_dissector_target(f->dissector,
+                                                FLOW_DISSECTOR_KEY_BASIC,
+                                                f->mask);
+
+               if (mask->n_proto) {
+                       if (mask->n_proto != ETHER_TYPE_FULL_MASK) {
+                               NL_SET_ERR_MSG_MOD(extack, "Only full mask is supported for EtherType filter");
+                               return -EINVAL;
+                       }
+
+                       input->filter.match_flags |= IGB_FILTER_FLAG_ETHER_TYPE;
+                       input->filter.etype = key->n_proto;
+               }
+       }
+
+       if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_VLAN)) {
+               struct flow_dissector_key_vlan *key, *mask;
+
+               key = skb_flow_dissector_target(f->dissector,
+                                               FLOW_DISSECTOR_KEY_VLAN,
+                                               f->key);
+               mask = skb_flow_dissector_target(f->dissector,
+                                                FLOW_DISSECTOR_KEY_VLAN,
+                                                f->mask);
+
+               if (mask->vlan_priority) {
+                       if (mask->vlan_priority != VLAN_PRIO_FULL_MASK) {
+                               NL_SET_ERR_MSG_MOD(extack, "Only full mask is supported for VLAN priority");
+                               return -EINVAL;
+                       }
+
+                       input->filter.match_flags |= IGB_FILTER_FLAG_VLAN_TCI;
+                       input->filter.vlan_tci = key->vlan_priority;
+               }
+       }
+
+       input->action = traffic_class;
+       input->cookie = f->cookie;
+
+       return 0;
+}
+
+static int igb_configure_clsflower(struct igb_adapter *adapter,
+                                  struct tc_cls_flower_offload *cls_flower)
+{
+       struct netlink_ext_ack *extack = cls_flower->common.extack;
+       struct igb_nfc_filter *filter, *f;
+       int err, tc;
+
+       tc = tc_classid_to_hwtc(adapter->netdev, cls_flower->classid);
+       if (tc < 0) {
+               NL_SET_ERR_MSG_MOD(extack, "Invalid traffic class");
+               return -EINVAL;
+       }
+
+       filter = kzalloc(sizeof(*filter), GFP_KERNEL);
+       if (!filter)
+               return -ENOMEM;
+
+       err = igb_parse_cls_flower(adapter, cls_flower, tc, filter);
+       if (err < 0)
+               goto err_parse;
+
+       spin_lock(&adapter->nfc_lock);
+
+       hlist_for_each_entry(f, &adapter->nfc_filter_list, nfc_node) {
+               if (!memcmp(&f->filter, &filter->filter, sizeof(f->filter))) {
+                       err = -EEXIST;
+                       NL_SET_ERR_MSG_MOD(extack,
+                                          "This filter is already set in ethtool");
+                       goto err_locked;
+               }
+       }
+
+       hlist_for_each_entry(f, &adapter->cls_flower_list, nfc_node) {
+               if (!memcmp(&f->filter, &filter->filter, sizeof(f->filter))) {
+                       err = -EEXIST;
+                       NL_SET_ERR_MSG_MOD(extack,
+                                          "This filter is already set in cls_flower");
+                       goto err_locked;
+               }
+       }
+
+       err = igb_add_filter(adapter, filter);
+       if (err < 0) {
+               NL_SET_ERR_MSG_MOD(extack, "Could not add filter to the adapter");
+               goto err_locked;
+       }
+
+       hlist_add_head(&filter->nfc_node, &adapter->cls_flower_list);
+
+       spin_unlock(&adapter->nfc_lock);
+
+       return 0;
+
+err_locked:
+       spin_unlock(&adapter->nfc_lock);
+
+err_parse:
+       kfree(filter);
+
+       return err;
+}
+
+static int igb_delete_clsflower(struct igb_adapter *adapter,
+                               struct tc_cls_flower_offload *cls_flower)
+{
+       struct igb_nfc_filter *filter;
+       int err;
+
+       spin_lock(&adapter->nfc_lock);
+
+       hlist_for_each_entry(filter, &adapter->cls_flower_list, nfc_node)
+               if (filter->cookie == cls_flower->cookie)
+                       break;
+
+       if (!filter) {
+               err = -ENOENT;
+               goto out;
+       }
+
+       err = igb_erase_filter(adapter, filter);
+       if (err < 0)
+               goto out;
+
+       hlist_del(&filter->nfc_node);
+       kfree(filter);
+
+out:
+       spin_unlock(&adapter->nfc_lock);
+
+       return err;
+}
+
+static int igb_setup_tc_cls_flower(struct igb_adapter *adapter,
+                                  struct tc_cls_flower_offload *cls_flower)
+{
+       switch (cls_flower->command) {
+       case TC_CLSFLOWER_REPLACE:
+               return igb_configure_clsflower(adapter, cls_flower);
+       case TC_CLSFLOWER_DESTROY:
+               return igb_delete_clsflower(adapter, cls_flower);
+       case TC_CLSFLOWER_STATS:
+               return -EOPNOTSUPP;
+       default:
+               return -EINVAL;
+       }
+}
+
+static int igb_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
+                                void *cb_priv)
+{
+       struct igb_adapter *adapter = cb_priv;
+
+       if (!tc_cls_can_offload_and_chain0(adapter->netdev, type_data))
+               return -EOPNOTSUPP;
+
+       switch (type) {
+       case TC_SETUP_CLSFLOWER:
+               return igb_setup_tc_cls_flower(adapter, type_data);
+
+       default:
+               return -EOPNOTSUPP;
+       }
+}
+
+static int igb_setup_tc_block(struct igb_adapter *adapter,
+                             struct tc_block_offload *f)
+{
+       if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
+               return -EOPNOTSUPP;
+
+       switch (f->command) {
+       case TC_BLOCK_BIND:
+               return tcf_block_cb_register(f->block, igb_setup_tc_block_cb,
+                                            adapter, adapter);
+       case TC_BLOCK_UNBIND:
+               tcf_block_cb_unregister(f->block, igb_setup_tc_block_cb,
+                                       adapter);
+               return 0;
+       default:
+               return -EOPNOTSUPP;
+       }
+}
+
 static int igb_setup_tc(struct net_device *dev, enum tc_setup_type type,
                        void *type_data)
 {
@@ -2506,6 +2745,8 @@ static int igb_setup_tc(struct net_device *dev, enum tc_setup_type type,
        switch (type) {
        case TC_SETUP_QDISC_CBS:
                return igb_offload_cbs(adapter, type_data);
+       case TC_SETUP_BLOCK:
+               return igb_setup_tc_block(adapter, type_data);
 
        default:
                return -EOPNOTSUPP;
@@ -2807,6 +3048,9 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        if (hw->mac.type >= e1000_82576)
                netdev->features |= NETIF_F_SCTP_CRC;
 
+       if (hw->mac.type >= e1000_i350)
+               netdev->features |= NETIF_F_HW_TC;
+
 #define IGB_GSO_PARTIAL_FEATURES (NETIF_F_GSO_GRE | \
                                  NETIF_F_GSO_GRE_CSUM | \
                                  NETIF_F_GSO_IPXIP4 | \
@@ -6841,8 +7085,35 @@ static void igb_set_default_mac_filter(struct igb_adapter *adapter)
        igb_rar_set_index(adapter, 0);
 }
 
-static int igb_add_mac_filter(struct igb_adapter *adapter, const u8 *addr,
-                             const u8 queue)
+/* If the filter to be added and an already existing filter express
+ * the same address and address type, it should be possible to only
+ * override the other configurations, for example the queue to steer
+ * traffic.
+ */
+static bool igb_mac_entry_can_be_used(const struct igb_mac_addr *entry,
+                                     const u8 *addr, const u8 flags)
+{
+       if (!(entry->state & IGB_MAC_STATE_IN_USE))
+               return true;
+
+       if ((entry->state & IGB_MAC_STATE_SRC_ADDR) !=
+           (flags & IGB_MAC_STATE_SRC_ADDR))
+               return false;
+
+       if (!ether_addr_equal(addr, entry->addr))
+               return false;
+
+       return true;
+}
+
+/* Add a MAC filter for 'addr' directing matching traffic to 'queue',
+ * 'flags' is used to indicate what kind of match is made, match is by
+ * default for the destination address, if matching by source address
+ * is desired the flag IGB_MAC_STATE_SRC_ADDR can be used.
+ */
+static int igb_add_mac_filter_flags(struct igb_adapter *adapter,
+                                   const u8 *addr, const u8 queue,
+                                   const u8 flags)
 {
        struct e1000_hw *hw = &adapter->hw;
        int rar_entries = hw->mac.rar_entry_count -
@@ -6857,12 +7128,13 @@ static int igb_add_mac_filter(struct igb_adapter *adapter, const u8 *addr,
         * addresses.
         */
        for (i = 0; i < rar_entries; i++) {
-               if (adapter->mac_table[i].state & IGB_MAC_STATE_IN_USE)
+               if (!igb_mac_entry_can_be_used(&adapter->mac_table[i],
+                                              addr, flags))
                        continue;
 
                ether_addr_copy(adapter->mac_table[i].addr, addr);
                adapter->mac_table[i].queue = queue;
-               adapter->mac_table[i].state |= IGB_MAC_STATE_IN_USE;
+               adapter->mac_table[i].state |= IGB_MAC_STATE_IN_USE | flags;
 
                igb_rar_set_index(adapter, i);
                return i;
@@ -6871,8 +7143,21 @@ static int igb_add_mac_filter(struct igb_adapter *adapter, const u8 *addr,
        return -ENOSPC;
 }
 
-static int igb_del_mac_filter(struct igb_adapter *adapter, const u8 *addr,
+static int igb_add_mac_filter(struct igb_adapter *adapter, const u8 *addr,
                              const u8 queue)
+{
+       return igb_add_mac_filter_flags(adapter, addr, queue, 0);
+}
+
+/* Remove a MAC filter for 'addr' directing matching traffic to
+ * 'queue', 'flags' is used to indicate what kind of match need to be
+ * removed, match is by default for the destination address, if
+ * matching by source address is to be removed the flag
+ * IGB_MAC_STATE_SRC_ADDR can be used.
+ */
+static int igb_del_mac_filter_flags(struct igb_adapter *adapter,
+                                   const u8 *addr, const u8 queue,
+                                   const u8 flags)
 {
        struct e1000_hw *hw = &adapter->hw;
        int rar_entries = hw->mac.rar_entry_count -
@@ -6889,14 +7174,26 @@ static int igb_del_mac_filter(struct igb_adapter *adapter, const u8 *addr,
        for (i = 0; i < rar_entries; i++) {
                if (!(adapter->mac_table[i].state & IGB_MAC_STATE_IN_USE))
                        continue;
+               if ((adapter->mac_table[i].state & flags) != flags)
+                       continue;
                if (adapter->mac_table[i].queue != queue)
                        continue;
                if (!ether_addr_equal(adapter->mac_table[i].addr, addr))
                        continue;
 
-               adapter->mac_table[i].state &= ~IGB_MAC_STATE_IN_USE;
-               memset(adapter->mac_table[i].addr, 0, ETH_ALEN);
-               adapter->mac_table[i].queue = 0;
+               /* When a filter for the default address is "deleted",
+                * we return it to its initial configuration
+                */
+               if (adapter->mac_table[i].state & IGB_MAC_STATE_DEFAULT) {
+                       adapter->mac_table[i].state =
+                               IGB_MAC_STATE_DEFAULT | IGB_MAC_STATE_IN_USE;
+                       adapter->mac_table[i].queue =
+                               adapter->vfs_allocated_count;
+               } else {
+                       adapter->mac_table[i].state = 0;
+                       adapter->mac_table[i].queue = 0;
+                       memset(adapter->mac_table[i].addr, 0, ETH_ALEN);
+               }
 
                igb_rar_set_index(adapter, i);
                return 0;
@@ -6905,6 +7202,34 @@ static int igb_del_mac_filter(struct igb_adapter *adapter, const u8 *addr,
        return -ENOENT;
 }
 
+static int igb_del_mac_filter(struct igb_adapter *adapter, const u8 *addr,
+                             const u8 queue)
+{
+       return igb_del_mac_filter_flags(adapter, addr, queue, 0);
+}
+
+int igb_add_mac_steering_filter(struct igb_adapter *adapter,
+                               const u8 *addr, u8 queue, u8 flags)
+{
+       struct e1000_hw *hw = &adapter->hw;
+
+       /* In theory, this should be supported on 82575 as well, but
+        * that part wasn't easily accessible during development.
+        */
+       if (hw->mac.type != e1000_i210)
+               return -EOPNOTSUPP;
+
+       return igb_add_mac_filter_flags(adapter, addr, queue,
+                                       IGB_MAC_STATE_QUEUE_STEERING | flags);
+}
+
+int igb_del_mac_steering_filter(struct igb_adapter *adapter,
+                               const u8 *addr, u8 queue, u8 flags)
+{
+       return igb_del_mac_filter_flags(adapter, addr, queue,
+                                       IGB_MAC_STATE_QUEUE_STEERING | flags);
+}
+
 static int igb_uc_sync(struct net_device *netdev, const unsigned char *addr)
 {
        struct igb_adapter *adapter = netdev_priv(netdev);
@@ -8748,12 +9073,24 @@ static void igb_rar_set_index(struct igb_adapter *adapter, u32 index)
                if (is_valid_ether_addr(addr))
                        rar_high |= E1000_RAH_AV;
 
-               if (hw->mac.type == e1000_82575)
+               if (adapter->mac_table[index].state & IGB_MAC_STATE_SRC_ADDR)
+                       rar_high |= E1000_RAH_ASEL_SRC_ADDR;
+
+               switch (hw->mac.type) {
+               case e1000_82575:
+               case e1000_i210:
+                       if (adapter->mac_table[index].state &
+                           IGB_MAC_STATE_QUEUE_STEERING)
+                               rar_high |= E1000_RAH_QSEL_ENABLE;
+
                        rar_high |= E1000_RAH_POOL_1 *
                                    adapter->mac_table[index].queue;
-               else
+                       break;
+               default:
                        rar_high |= E1000_RAH_POOL_1 <<
                                    adapter->mac_table[index].queue;
+                       break;
+               }
        }
 
        wr32(E1000_RAL(index), rar_low);
@@ -9191,6 +9528,9 @@ static void igb_nfc_filter_exit(struct igb_adapter *adapter)
        hlist_for_each_entry(rule, &adapter->nfc_filter_list, nfc_node)
                igb_erase_filter(adapter, rule);
 
+       hlist_for_each_entry(rule, &adapter->cls_flower_list, nfc_node)
+               igb_erase_filter(adapter, rule);
+
        spin_unlock(&adapter->nfc_lock);
 }
 
index 7454b9895a651c0b3ef47f5dab6a095b5355d4db..9f4d700e09df33cb5d3e17576859a563f9c6c52c 100644 (file)
@@ -1,21 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0+
-/* PTP Hardware Clock (PHC) driver for the Intel 82576 and 82580
- *
- * Copyright (C) 2011 Richard Cochran <richardcochran@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, see <http://www.gnu.org/licenses/>.
- */
+/* Copyright (C) 2011 Richard Cochran <richardcochran@gmail.com> */
+
 #include <linux/module.h>
 #include <linux/device.h>
 #include <linux/pci.h>
index efe29dae384ada199110505a4a4afa816418ca33..afd3e36eae75704a2a3b6144db0e4b50c8086bb1 100644 (file)
@@ -1,31 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
-################################################################################
-#
-# Intel(R) 82576 Virtual Function Linux driver
-# Copyright(c) 2009 - 2012 Intel Corporation.
-#
-# This program is free software; you can redistribute it and/or modify it
-# under the terms and conditions of the GNU General Public License,
-# version 2, as published by the Free Software Foundation.
-#
-# This program is distributed in the hope it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-# more details.
-#
-# You should have received a copy of the GNU General Public License along with
-# this program; if not, write to the Free Software Foundation, Inc.,
-# 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-#
-# The full GNU General Public License is included in this distribution in
-# the file called "COPYING".
-#
-# Contact Information:
-# e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-# Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-#
-################################################################################
-
+# Copyright(c) 2009 - 2018 Intel Corporation.
 #
 # Makefile for the Intel(R) 82576 VF ethernet driver
 #
index 04bcfec0641b9cef2968a4f7b10ac33135950785..4437f832412ddc77c8b265c53295d25e98b1e275 100644 (file)
@@ -1,29 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
-  Intel(R) 82576 Virtual Function Linux driver
-  Copyright(c) 1999 - 2012 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, see <http://www.gnu.org/licenses/>.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
 
 #ifndef _E1000_DEFINES_H_
 #define _E1000_DEFINES_H_
index ca39e3cccaeb738fd88c5cf3fa7b6be3e4536294..3ae358b35227a9ca2d265ac4744cc45620020707 100644 (file)
@@ -1,29 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
-
-  Intel(R) 82576 Virtual Function Linux driver
-  Copyright(c) 2009 - 2012 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, see <http://www.gnu.org/licenses/>.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Copyright(c) 2009 - 2018 Intel Corporation. */
 
 /* ethtool support for igbvf */
 
index f5bf248e22eb0aea3cd49cb72168218dbc7611c6..eee26a3be90ba8aad04baa961f01badb0d889565 100644 (file)
@@ -1,29 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
-  Intel(R) 82576 Virtual Function Linux driver
-  Copyright(c) 2009 - 2012 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, see <http://www.gnu.org/licenses/>.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Copyright(c) 2009 - 2018 Intel Corporation. */
 
 /* Linux PRO/1000 Ethernet Driver main header file */
 
index 9195884096f87a269acfa737a6542f5d15b47ab6..163e5838f7c2e77b83501ed37c5f9931f8cc37fb 100644 (file)
@@ -1,29 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
-
-  Intel(R) 82576 Virtual Function Linux driver
-  Copyright(c) 2009 - 2012 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, see <http://www.gnu.org/licenses/>.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Copyright(c) 2009 - 2018 Intel Corporation. */
 
 #include "mbx.h"
 
index 479b062fe9eed3a997a5bbf45c3de32f4e9fb324..e5b31818d565859df4ee21ebcc60a5646882d319 100644 (file)
@@ -1,29 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
-  Intel(R) 82576 Virtual Function Linux driver
-  Copyright(c) 1999 - 2012 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, see <http://www.gnu.org/licenses/>.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
 
 #ifndef _E1000_MBX_H_
 #define _E1000_MBX_H_
index e2b7502f1953899d5cd8f65e6d40d8adc7c1358a..f818f060e5a736c830a3de263d68b27231a04c1a 100644 (file)
@@ -1,29 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
-
-  Intel(R) 82576 Virtual Function Linux driver
-  Copyright(c) 2009 - 2012 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, see <http://www.gnu.org/licenses/>.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Copyright(c) 2009 - 2018 Intel Corporation. */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
index 614e52409f11d0448c3aa2cc9639419203fd9da7..625a309a3355610805e7e230f05a6de0cfc7c002 100644 (file)
@@ -1,29 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
-  Intel(R) 82576 Virtual Function Linux driver
-  Copyright(c) 2009 - 2012 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, see <http://www.gnu.org/licenses/>.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Copyright(c) 2009 - 2018 Intel Corporation. */
 
 #ifndef _E1000_REGS_H_
 #define _E1000_REGS_H_
index bfe8d8297b2e2daac6fd80b258735e68a557338b..b8ba3f94c363229b230d3c8dbe1a882149e6e265 100644 (file)
@@ -1,29 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
-
-  Intel(R) 82576 Virtual Function Linux driver
-  Copyright(c) 2009 - 2012 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, see <http://www.gnu.org/licenses/>.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Copyright(c) 2009 - 2018 Intel Corporation. */
 
 #include "vf.h"
 
index 193b50026246fed1b0750db99a201fe8ce843744..c71b0d7dbceed5c25c4c0453350eb03931a30abc 100644 (file)
@@ -1,29 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
-  Intel(R) 82576 Virtual Function Linux driver
-  Copyright(c) 2009 - 2012 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, see <http://www.gnu.org/licenses/>.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Copyright(c) 2009 - 2018 Intel Corporation. */
 
 #ifndef _E1000_VF_H_
 #define _E1000_VF_H_
index 1b42dd554dd274642266308ca9dad1272e9e1633..2433e9300a33dd486c4f53797ea1c56350bdb34a 100644 (file)
@@ -1,32 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
-################################################################################
-#
-# Intel PRO/10GbE Linux driver
 # Copyright(c) 1999 - 2008 Intel Corporation.
-#
-# This program is free software; you can redistribute it and/or modify it
-# under the terms and conditions of the GNU General Public License,
-# version 2, as published by the Free Software Foundation.
-#
-# This program is distributed in the hope it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-# more details.
-#
-# You should have received a copy of the GNU General Public License along with
-# this program; if not, write to the Free Software Foundation, Inc.,
-# 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-#
-# The full GNU General Public License is included in this distribution in
-# the file called "COPYING".
-#
-# Contact Information:
-# Linux NICS <linux.nics@intel.com>
-# e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-# Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-#
-################################################################################
-
 #
 # Makefile for the Intel(R) PRO/10GbE ethernet driver
 #
index 92022841755f249638bbd9aa76c303776bd1aa2b..e85271b68410843bb7aebd5ef20462458d826ea3 100644 (file)
@@ -1,31 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
-  Intel PRO/10GbE Linux driver
-  Copyright(c) 1999 - 2008 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, write to the Free Software Foundation, Inc.,
-  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  Linux NICS <linux.nics@intel.com>
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Copyright(c) 1999 - 2008 Intel Corporation. */
 
 #ifndef _IXGB_H_
 #define _IXGB_H_
index eca216b9b859b623d4db03e6ea8987f3e5c8bada..129286fc1634d45681d3a1ef08276dd6e7e9e4ba 100644 (file)
@@ -1,30 +1,5 @@
-/*******************************************************************************
-
-  Intel PRO/10GbE Linux driver
-  Copyright(c) 1999 - 2008 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, write to the Free Software Foundation, Inc.,
-  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  Linux NICS <linux.nics@intel.com>
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2008 Intel Corporation. */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
index 475297a810fe6a42cbfeec6bd58ea95c7e76fce3..3ee0a09e5d0a02ea0808fbed8900aff33f888a25 100644 (file)
@@ -1,31 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
-  Intel PRO/10GbE Linux driver
-  Copyright(c) 1999 - 2008 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, write to the Free Software Foundation, Inc.,
-  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  Linux NICS <linux.nics@intel.com>
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Copyright(c) 1999 - 2008 Intel Corporation. */
 
 #ifndef _IXGB_EE_H_
 #define _IXGB_EE_H_
index d10a0d242dda5db4f8c474a0814bc7a1b9eae491..43744bf0fc1cf0f4de4e9bcbcdd85008531259e4 100644 (file)
@@ -1,30 +1,5 @@
-/*******************************************************************************
-
-  Intel PRO/10GbE Linux driver
-  Copyright(c) 1999 - 2008 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, write to the Free Software Foundation, Inc.,
-  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  Linux NICS <linux.nics@intel.com>
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2008 Intel Corporation. */
 
 /* ethtool support for ixgb */
 
index bf9a220f71fba5dfe6c5ed83c45569c87e30d26b..cbaa933ef30d7e16d45c7b8342aa1e6d7ab6823c 100644 (file)
@@ -1,30 +1,5 @@
-/*******************************************************************************
-
-  Intel PRO/10GbE Linux driver
-  Copyright(c) 1999 - 2008 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, write to the Free Software Foundation, Inc.,
-  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  Linux NICS <linux.nics@intel.com>
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2008 Intel Corporation. */
 
 /* ixgb_hw.c
  * Shared functions for accessing and configuring the adapter
index 19f36d87ef6198a4f61cb02288e4ff4e025d3f60..6064583095da79578a761993c9a57cea230f920e 100644 (file)
@@ -1,31 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
-  Intel PRO/10GbE Linux driver
-  Copyright(c) 1999 - 2008 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, write to the Free Software Foundation, Inc.,
-  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  Linux NICS <linux.nics@intel.com>
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Copyright(c) 1999 - 2008 Intel Corporation. */
 
 #ifndef _IXGB_HW_H_
 #define _IXGB_HW_H_
index 24e849902d600ace0af72f50ee58f49487abac2d..9695b8215f014aa5d8a0229c4f4dfb416d13ea24 100644 (file)
@@ -1,31 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
-  Intel PRO/10GbE Linux driver
-  Copyright(c) 1999 - 2008 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, write to the Free Software Foundation, Inc.,
-  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  Linux NICS <linux.nics@intel.com>
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Copyright(c) 1999 - 2008 Intel Corporation. */
 
 #ifndef _IXGB_IDS_H_
 #define _IXGB_IDS_H_
index 2353c383f0a7de092b4ba6581af4516ad692069c..62f2173bc20ed43b5268f7705c03c38f57aac0f8 100644 (file)
@@ -1,30 +1,5 @@
-/*******************************************************************************
-
-  Intel PRO/10GbE Linux driver
-  Copyright(c) 1999 - 2008 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, write to the Free Software Foundation, Inc.,
-  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  Linux NICS <linux.nics@intel.com>
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2008 Intel Corporation. */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
index b1710379192e4f96f2e7625ca415f30ffc19ad3a..7bd54efa698d318052ae31035f14e9eb637f4150 100644 (file)
@@ -1,31 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
-  Intel PRO/10GbE Linux driver
-  Copyright(c) 1999 - 2008 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, write to the Free Software Foundation, Inc.,
-  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  Linux NICS <linux.nics@intel.com>
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Copyright(c) 1999 - 2008 Intel Corporation. */
 
 /* glue for the OS independent part of ixgb
  * includes register access macros
index 04a60640dddaaaf4f01d85169d61d3e77a3d83e8..f0cadd532c539c1d0a896d68693c2c24b96a2a04 100644 (file)
@@ -1,30 +1,5 @@
-/*******************************************************************************
-
-  Intel PRO/10GbE Linux driver
-  Copyright(c) 1999 - 2008 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, write to the Free Software Foundation, Inc.,
-  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  Linux NICS <linux.nics@intel.com>
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2008 Intel Corporation. */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
index 4cd96c88cb5d1547235191ad31cc1294b3b1127d..5414685189cef099d83b4ee46e3261d16c1e2825 100644 (file)
@@ -1,32 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
-################################################################################
-#
-# Intel 10 Gigabit PCI Express Linux driver
-# Copyright(c) 1999 - 2013 Intel Corporation.
-#
-# This program is free software; you can redistribute it and/or modify it
-# under the terms and conditions of the GNU General Public License,
-# version 2, as published by the Free Software Foundation.
-#
-# This program is distributed in the hope it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-# more details.
-#
-# You should have received a copy of the GNU General Public License along with
-# this program; if not, write to the Free Software Foundation, Inc.,
-# 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-#
-# The full GNU General Public License is included in this distribution in
-# the file called "COPYING".
-#
-# Contact Information:
-# Linux NICS <linux.nics@intel.com>
-# e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-# Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-#
-################################################################################
-
+# Copyright(c) 1999 - 2018 Intel Corporation.
 #
 # Makefile for the Intel(R) 10GbE PCI Express ethernet driver
 #
index 7dd5038cfcc4465cbaacc3c1564de5892bb6951a..fc534e91c6b247ebc79c47c470445eebe20d3b48 100644 (file)
@@ -1,31 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
-  Intel 10 Gigabit PCI Express Linux driver
-  Copyright(c) 1999 - 2016 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, write to the Free Software Foundation, Inc.,
-  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  Linux NICS <linux.nics@intel.com>
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
 
 #ifndef _IXGBE_H_
 #define _IXGBE_H_
@@ -305,7 +279,6 @@ enum ixgbe_ring_state_t {
 struct ixgbe_fwd_adapter {
        unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
        struct net_device *netdev;
-       struct ixgbe_adapter *real_adapter;
        unsigned int tx_base_queue;
        unsigned int rx_base_queue;
        int pool;
index cb0fe5fedb33b8c2482a8ef5ed3b70ef87dddcaa..eee277c1bedfc7bc6f3c04420f8b99ae22ee143f 100644 (file)
@@ -1,31 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
-
-  Intel 10 Gigabit PCI Express Linux driver
-  Copyright(c) 1999 - 2016 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, write to the Free Software Foundation, Inc.,
-  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  Linux NICS <linux.nics@intel.com>
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
 
 #include <linux/pci.h>
 #include <linux/delay.h>
index 66a74f4651e8d980803857159887dd68f53c2f0b..1e49716f52bcd2cdeea88a7a71f3b871e27927d1 100644 (file)
@@ -1,31 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
-
-  Intel 10 Gigabit PCI Express Linux driver
-  Copyright(c) 1999 - 2016 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, write to the Free Software Foundation, Inc.,
-  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  Linux NICS <linux.nics@intel.com>
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
 
 #include <linux/pci.h>
 #include <linux/delay.h>
@@ -1462,7 +1436,8 @@ void ixgbe_atr_compute_perfect_hash_82599(union ixgbe_atr_input *input,
 {
 
        u32 hi_hash_dword, lo_hash_dword, flow_vm_vlan;
-       u32 bucket_hash = 0, hi_dword = 0;
+       u32 bucket_hash = 0;
+       __be32 hi_dword = 0;
        int i;
 
        /* Apply masks to input data */
@@ -1501,7 +1476,7 @@ void ixgbe_atr_compute_perfect_hash_82599(union ixgbe_atr_input *input,
         * Limit hash to 13 bits since max bucket count is 8K.
         * Store result at the end of the input stream.
         */
-       input->formatted.bkt_hash = bucket_hash & 0x1FFF;
+       input->formatted.bkt_hash = (__force __be16)(bucket_hash & 0x1FFF);
 }
 
 /**
@@ -1610,7 +1585,7 @@ s32 ixgbe_fdir_set_input_mask_82599(struct ixgbe_hw *hw,
                return IXGBE_ERR_CONFIG;
        }
 
-       switch (input_mask->formatted.flex_bytes & 0xFFFF) {
+       switch ((__force u16)input_mask->formatted.flex_bytes & 0xFFFF) {
        case 0x0000:
                /* Mask Flex Bytes */
                fdirm |= IXGBE_FDIRM_FLEX;
@@ -1680,13 +1655,13 @@ s32 ixgbe_fdir_write_perfect_filter_82599(struct ixgbe_hw *hw,
        IXGBE_WRITE_REG(hw, IXGBE_FDIRPORT, fdirport);
 
        /* record vlan (little-endian) and flex_bytes(big-endian) */
-       fdirvlan = IXGBE_STORE_AS_BE16(input->formatted.flex_bytes);
+       fdirvlan = IXGBE_STORE_AS_BE16((__force u16)input->formatted.flex_bytes);
        fdirvlan <<= IXGBE_FDIRVLAN_FLEX_SHIFT;
        fdirvlan |= ntohs(input->formatted.vlan_id);
        IXGBE_WRITE_REG(hw, IXGBE_FDIRVLAN, fdirvlan);
 
        /* configure FDIRHASH register */
-       fdirhash = input->formatted.bkt_hash;
+       fdirhash = (__force u32)input->formatted.bkt_hash;
        fdirhash |= soft_id << IXGBE_FDIRHASH_SIG_SW_INDEX_SHIFT;
        IXGBE_WRITE_REG(hw, IXGBE_FDIRHASH, fdirhash);
 
@@ -1724,7 +1699,7 @@ s32 ixgbe_fdir_erase_perfect_filter_82599(struct ixgbe_hw *hw,
        s32 err;
 
        /* configure FDIRHASH register */
-       fdirhash = input->formatted.bkt_hash;
+       fdirhash = (__force u32)input->formatted.bkt_hash;
        fdirhash |= soft_id << IXGBE_FDIRHASH_SIG_SW_INDEX_SHIFT;
        IXGBE_WRITE_REG(hw, IXGBE_FDIRHASH, fdirhash);
 
index 633be93f3dbbe8454c7cc832d2b268bec1507ef4..3f5c350716bb0e595d79ec928188f5862461694c 100644 (file)
@@ -1,31 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
-
-  Intel 10 Gigabit PCI Express Linux driver
-  Copyright(c) 1999 - 2016 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, write to the Free Software Foundation, Inc.,
-  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  Linux NICS <linux.nics@intel.com>
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
 
 #include <linux/pci.h>
 #include <linux/delay.h>
@@ -3652,7 +3626,7 @@ s32 ixgbe_hic_unlocked(struct ixgbe_hw *hw, u32 *buffer, u32 length,
         */
        for (i = 0; i < dword_len; i++)
                IXGBE_WRITE_REG_ARRAY(hw, IXGBE_FLEX_MNG,
-                                     i, cpu_to_le32(buffer[i]));
+                                     i, (__force u32)cpu_to_le32(buffer[i]));
 
        /* Setting this bit tells the ARC that a new command is pending. */
        IXGBE_WRITE_REG(hw, IXGBE_HICR, hicr | IXGBE_HICR_C);
index 2b311382167a8f2112cfb3b01a210a9c559010cc..4b531e8ae38ae7eb1f25fc569fc7382783f12c3e 100644 (file)
@@ -1,31 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
-  Intel 10 Gigabit PCI Express Linux driver
-  Copyright(c) 1999 - 2016 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, write to the Free Software Foundation, Inc.,
-  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  Linux NICS <linux.nics@intel.com>
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
 
 #ifndef _IXGBE_COMMON_H_
 #define _IXGBE_COMMON_H_
index aaea8282bfd26aedd2e45a9baa8c966a34a2578b..d26cea5b43bd64be9bbd4df4027b2ce63cde578b 100644 (file)
@@ -1,31 +1,5 @@
-/*******************************************************************************
-
-  Intel 10 Gigabit PCI Express Linux driver
-  Copyright(c) 1999 - 2016 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, write to the Free Software Foundation, Inc.,
-  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  Linux NICS <linux.nics@intel.com>
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
 
 #include "ixgbe.h"
 #include "ixgbe_type.h"
index 73b6362d4327d64ec5cf332303d2a8424cfd51b4..60cd5863bf5e63a19c3867047ffb8f31ac6e8995 100644 (file)
@@ -1,31 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
-  Intel 10 Gigabit PCI Express Linux driver
-  Copyright(c) 1999 - 2013 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, write to the Free Software Foundation, Inc.,
-  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  Linux NICS <linux.nics@intel.com>
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
 
 #ifndef _DCB_CONFIG_H_
 #define _DCB_CONFIG_H_
index 085130626330695891b0be3ea3e06a138c7d28ff..379ae747cdce421d7c52884541aee8c73ab47f0a 100644 (file)
@@ -1,31 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
-
-  Intel 10 Gigabit PCI Express Linux driver
-  Copyright(c) 1999 - 2013 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, write to the Free Software Foundation, Inc.,
-  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  Linux NICS <linux.nics@intel.com>
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
 
 #include "ixgbe.h"
 #include "ixgbe_type.h"
index 7edce607f901080d0d8c30cabc28370f7c6c01eb..fdca41abb44ccbe71ab200118454f7b89245fa4a 100644 (file)
@@ -1,31 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
-  Intel 10 Gigabit PCI Express Linux driver
-  Copyright(c) 1999 - 2013 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, write to the Free Software Foundation, Inc.,
-  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  Linux NICS <linux.nics@intel.com>
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
 
 #ifndef _DCB_82598_CONFIG_H_
 #define _DCB_82598_CONFIG_H_
index 1eed6811e914f2f428e214c37b7475920ebf34e2..7948849840a586d0c697d4488d68768a8cc208bf 100644 (file)
@@ -1,30 +1,5 @@
-/*******************************************************************************
-
-  Intel 10 Gigabit PCI Express Linux driver
-  Copyright(c) 1999 - 2013 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, write to the Free Software Foundation, Inc.,
-  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  Linux NICS <linux.nics@intel.com>
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
 
 #include "ixgbe.h"
 #include "ixgbe_type.h"
index fa030f0abc18f2662bf58e1f2415ccd20d618da5..c6f084883cabcbb2dacb2d034cd1ec943466643e 100644 (file)
@@ -1,31 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
-  Intel 10 Gigabit PCI Express Linux driver
-  Copyright(c) 1999 - 2013 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, write to the Free Software Foundation, Inc.,
-  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  Linux NICS <linux.nics@intel.com>
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
 
 #ifndef _DCB_82599_CONFIG_H_
 #define _DCB_82599_CONFIG_H_
index b33f3f87e4b17cf3cde104c076ec298664d56afb..c00332d2e02a8047b7658f0b6e00607e4d592468 100644 (file)
@@ -1,30 +1,5 @@
-/*******************************************************************************
-
-  Intel 10 Gigabit PCI Express Linux driver
-  Copyright(c) 1999 - 2014 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, write to the Free Software Foundation, Inc.,
-  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  Linux NICS <linux.nics@intel.com>
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
 
 #include "ixgbe.h"
 #include <linux/dcbnl.h>
index ad54080488ee52be37617c4d2e53b1a5c57f2ec0..55fe8114fe99ef8aa68f5289c892dc8767078315 100644 (file)
@@ -1,30 +1,6 @@
-/*******************************************************************************
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
 
-  Intel 10 Gigabit PCI Express Linux driver
-  Copyright(c) 1999 - 2013 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, write to the Free Software Foundation, Inc.,
-  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  Linux NICS <linux.nics@intel.com>
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
 #include <linux/debugfs.h>
 #include <linux/module.h>
 
index c0e6ab42e0e1dac088f0496fc3fbbc0a65ede993..bdd179c29ea4ce5cb49fd4ea0b7d96436f5c82cc 100644 (file)
@@ -1,30 +1,5 @@
-/*******************************************************************************
-
-  Intel 10 Gigabit PCI Express Linux driver
-  Copyright(c) 1999 - 2016 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, write to the Free Software Foundation, Inc.,
-  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  Linux NICS <linux.nics@intel.com>
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
 
 /* ethtool support for ixgbe */
 
index 7a09a40e447272a875323a902db28b86b8193fbb..94b3165ff543055621326783d330da1f7b4272e7 100644 (file)
@@ -1,30 +1,5 @@
-/*******************************************************************************
-
-  Intel 10 Gigabit PCI Express Linux driver
-  Copyright(c) 1999 - 2014 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, write to the Free Software Foundation, Inc.,
-  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  Linux NICS <linux.nics@intel.com>
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
 
 #include "ixgbe.h"
 #include <linux/if_ether.h>
@@ -465,7 +440,7 @@ int ixgbe_fcoe_ddp(struct ixgbe_adapter *adapter,
        case cpu_to_le32(IXGBE_RXDADV_STAT_FCSTAT_FCPRSP):
                dma_unmap_sg(&adapter->pdev->dev, ddp->sgl,
                             ddp->sgc, DMA_FROM_DEVICE);
-               ddp->err = ddp_err;
+               ddp->err = (__force u32)ddp_err;
                ddp->sgl = NULL;
                ddp->sgc = 0;
                /* fall through */
index cf19199015143f6c1efbb507b91df8433133f81e..724f5382329f7170c572131a2048ffa40366ddde 100644 (file)
@@ -1,31 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
-  Intel 10 Gigabit PCI Express Linux driver
-  Copyright(c) 1999 - 2013 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, write to the Free Software Foundation, Inc.,
-  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  Linux NICS <linux.nics@intel.com>
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
 
 #ifndef _IXGBE_FCOE_H
 #define _IXGBE_FCOE_H
index 68af127987bcbc65981ebbd7e7be5512f4b63bde..99b170f1efd190b2851f7353dcaa9e73387d9328 100644 (file)
@@ -1,29 +1,5 @@
-/*******************************************************************************
- *
- * Intel 10 Gigabit PCI Express Linux driver
- * Copyright(c) 2017 Oracle and/or its affiliates. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * Linux NICS <linux.nics@intel.com>
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2017 Oracle and/or its affiliates. All rights reserved. */
 
 #include "ixgbe.h"
 #include <net/xfrm.h>
@@ -43,8 +19,9 @@ static void ixgbe_ipsec_set_tx_sa(struct ixgbe_hw *hw, u16 idx,
        int i;
 
        for (i = 0; i < 4; i++)
-               IXGBE_WRITE_REG(hw, IXGBE_IPSTXKEY(i), cpu_to_be32(key[3 - i]));
-       IXGBE_WRITE_REG(hw, IXGBE_IPSTXSALT, cpu_to_be32(salt));
+               IXGBE_WRITE_REG(hw, IXGBE_IPSTXKEY(i),
+                               (__force u32)cpu_to_be32(key[3 - i]));
+       IXGBE_WRITE_REG(hw, IXGBE_IPSTXSALT, (__force u32)cpu_to_be32(salt));
        IXGBE_WRITE_FLUSH(hw);
 
        reg = IXGBE_READ_REG(hw, IXGBE_IPSTXIDX);
@@ -93,7 +70,8 @@ static void ixgbe_ipsec_set_rx_sa(struct ixgbe_hw *hw, u16 idx, __be32 spi,
        int i;
 
        /* store the SPI (in bigendian) and IPidx */
-       IXGBE_WRITE_REG(hw, IXGBE_IPSRXSPI, cpu_to_le32(spi));
+       IXGBE_WRITE_REG(hw, IXGBE_IPSRXSPI,
+                       (__force u32)cpu_to_le32((__force u32)spi));
        IXGBE_WRITE_REG(hw, IXGBE_IPSRXIPIDX, ip_idx);
        IXGBE_WRITE_FLUSH(hw);
 
@@ -101,8 +79,9 @@ static void ixgbe_ipsec_set_rx_sa(struct ixgbe_hw *hw, u16 idx, __be32 spi,
 
        /* store the key, salt, and mode */
        for (i = 0; i < 4; i++)
-               IXGBE_WRITE_REG(hw, IXGBE_IPSRXKEY(i), cpu_to_be32(key[3 - i]));
-       IXGBE_WRITE_REG(hw, IXGBE_IPSRXSALT, cpu_to_be32(salt));
+               IXGBE_WRITE_REG(hw, IXGBE_IPSRXKEY(i),
+                               (__force u32)cpu_to_be32(key[3 - i]));
+       IXGBE_WRITE_REG(hw, IXGBE_IPSRXSALT, (__force u32)cpu_to_be32(salt));
        IXGBE_WRITE_REG(hw, IXGBE_IPSRXMOD, mode);
        IXGBE_WRITE_FLUSH(hw);
 
@@ -121,7 +100,8 @@ static void ixgbe_ipsec_set_rx_ip(struct ixgbe_hw *hw, u16 idx, __be32 addr[])
 
        /* store the ip address */
        for (i = 0; i < 4; i++)
-               IXGBE_WRITE_REG(hw, IXGBE_IPSRXIPADDR(i), cpu_to_le32(addr[i]));
+               IXGBE_WRITE_REG(hw, IXGBE_IPSRXIPADDR(i),
+                               (__force u32)cpu_to_le32((__force u32)addr[i]));
        IXGBE_WRITE_FLUSH(hw);
 
        ixgbe_ipsec_set_rx_item(hw, idx, ips_rx_ip_tbl);
@@ -391,7 +371,8 @@ static struct xfrm_state *ixgbe_ipsec_find_rx_state(struct ixgbe_ipsec *ipsec,
        struct xfrm_state *ret = NULL;
 
        rcu_read_lock();
-       hash_for_each_possible_rcu(ipsec->rx_sa_list, rsa, hlist, spi)
+       hash_for_each_possible_rcu(ipsec->rx_sa_list, rsa, hlist,
+                                  (__force u32)spi) {
                if (spi == rsa->xs->id.spi &&
                    ((ip4 && *daddr == rsa->xs->id.daddr.a4) ||
                      (!ip4 && !memcmp(daddr, &rsa->xs->id.daddr.a6,
@@ -401,6 +382,7 @@ static struct xfrm_state *ixgbe_ipsec_find_rx_state(struct ixgbe_ipsec *ipsec,
                        xfrm_state_hold(ret);
                        break;
                }
+       }
        rcu_read_unlock();
        return ret;
 }
@@ -593,7 +575,7 @@ static int ixgbe_ipsec_add_sa(struct xfrm_state *xs)
 
                /* hash the new entry for faster search in Rx path */
                hash_add_rcu(ipsec->rx_sa_list, &ipsec->rx_tbl[sa_idx].hlist,
-                            rsa.xs->id.spi);
+                            (__force u64)rsa.xs->id.spi);
        } else {
                struct tx_sa tsa;
 
@@ -677,7 +659,8 @@ static void ixgbe_ipsec_del_sa(struct xfrm_state *xs)
                        if (!ipsec->ip_tbl[ipi].ref_cnt) {
                                memset(&ipsec->ip_tbl[ipi], 0,
                                       sizeof(struct rx_ip_sa));
-                               ixgbe_ipsec_set_rx_ip(hw, ipi, zerobuf);
+                               ixgbe_ipsec_set_rx_ip(hw, ipi,
+                                                     (__force __be32 *)zerobuf);
                        }
                }
 
@@ -943,8 +926,8 @@ void ixgbe_init_ipsec_offload(struct ixgbe_adapter *adapter)
        kfree(ipsec->ip_tbl);
        kfree(ipsec->rx_tbl);
        kfree(ipsec->tx_tbl);
+       kfree(ipsec);
 err1:
-       kfree(adapter->ipsec);
        netdev_err(adapter->netdev, "Unable to allocate memory for SA tables");
 }
 
index 4f099f516645d072a3ba03246075f7ec5e99e54d..9ef7faadda69464844787b4a1cd3f156e1c1106c 100644 (file)
@@ -1,30 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
-  Intel 10 Gigabit PCI Express Linux driver
-  Copyright(c) 2017 Oracle and/or its affiliates. All rights reserved.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program.  If not, see <http://www.gnu.org/licenses/>.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  Linux NICS <linux.nics@intel.com>
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Copyright(c) 2017 Oracle and/or its affiliates. All rights reserved. */
 
 #ifndef _IXGBE_IPSEC_H_
 #define _IXGBE_IPSEC_H_
index ed4cbe94c3554660a024bcfac653caef0deffadd..893a9206e718611250196fb6459d80d73d58cd64 100644 (file)
@@ -1,30 +1,5 @@
-/*******************************************************************************
-
-  Intel 10 Gigabit PCI Express Linux driver
-  Copyright(c) 1999 - 2016 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, write to the Free Software Foundation, Inc.,
-  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  Linux NICS <linux.nics@intel.com>
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
 
 #include "ixgbe.h"
 #include "ixgbe_sriov.h"
index 51e7d82a58605adbbd23dde07d0ac0f0be999476..a52d92e182eea431458e6398a21cf5b0164b923b 100644 (file)
@@ -1,30 +1,5 @@
-/*******************************************************************************
-
-  Intel 10 Gigabit PCI Express Linux driver
-  Copyright(c) 1999 - 2016 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, write to the Free Software Foundation, Inc.,
-  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  Linux NICS <linux.nics@intel.com>
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
 
 #include <linux/types.h>
 #include <linux/module.h>
@@ -752,8 +727,8 @@ static void ixgbe_dump(struct ixgbe_adapter *adapter)
                                        ring_desc = "";
                                pr_info("T [0x%03X]    %016llX %016llX %016llX %08X %p %016llX %p%s",
                                        i,
-                                       le64_to_cpu(u0->a),
-                                       le64_to_cpu(u0->b),
+                                       le64_to_cpu((__force __le64)u0->a),
+                                       le64_to_cpu((__force __le64)u0->b),
                                        (u64)dma_unmap_addr(tx_buffer, dma),
                                        dma_unmap_len(tx_buffer, len),
                                        tx_buffer->next_to_watch,
@@ -864,15 +839,15 @@ static void ixgbe_dump(struct ixgbe_adapter *adapter)
                                /* Descriptor Done */
                                pr_info("RWB[0x%03X]     %016llX %016llX ---------------- %p%s\n",
                                        i,
-                                       le64_to_cpu(u0->a),
-                                       le64_to_cpu(u0->b),
+                                       le64_to_cpu((__force __le64)u0->a),
+                                       le64_to_cpu((__force __le64)u0->b),
                                        rx_buffer_info->skb,
                                        ring_desc);
                        } else {
                                pr_info("R  [0x%03X]     %016llX %016llX %016llX %p%s\n",
                                        i,
-                                       le64_to_cpu(u0->a),
-                                       le64_to_cpu(u0->b),
+                                       le64_to_cpu((__force __le64)u0->a),
+                                       le64_to_cpu((__force __le64)u0->b),
                                        (u64)rx_buffer_info->dma,
                                        rx_buffer_info->skb,
                                        ring_desc);
@@ -1768,15 +1743,14 @@ static void ixgbe_process_skb_fields(struct ixgbe_ring *rx_ring,
        if (ixgbe_test_staterr(rx_desc, IXGBE_RXDADV_STAT_SECP))
                ixgbe_ipsec_rx(rx_ring, rx_desc, skb);
 
-       skb->protocol = eth_type_trans(skb, dev);
-
        /* record Rx queue, or update MACVLAN statistics */
        if (netif_is_ixgbe(dev))
                skb_record_rx_queue(skb, rx_ring->queue_index);
        else
                macvlan_count_rx(netdev_priv(dev), skb->len + ETH_HLEN, true,
-                                (skb->pkt_type == PACKET_BROADCAST) ||
-                                (skb->pkt_type == PACKET_MULTICAST));
+                                false);
+
+       skb->protocol = eth_type_trans(skb, dev);
 }
 
 static void ixgbe_rx_skb(struct ixgbe_q_vector *q_vector,
@@ -4219,7 +4193,8 @@ static void ixgbe_setup_psrtype(struct ixgbe_adapter *adapter)
 static void ixgbe_configure_virtualization(struct ixgbe_adapter *adapter)
 {
        struct ixgbe_hw *hw = &adapter->hw;
-       u32 reg_offset, vf_shift;
+       u16 pool = adapter->num_rx_pools;
+       u32 reg_offset, vf_shift, vmolr;
        u32 gcr_ext, vmdctl;
        int i;
 
@@ -4233,6 +4208,13 @@ static void ixgbe_configure_virtualization(struct ixgbe_adapter *adapter)
        vmdctl |= IXGBE_VT_CTL_REPLEN;
        IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vmdctl);
 
+       /* accept untagged packets until a vlan tag is
+        * specifically set for the VMDQ queue/pool
+        */
+       vmolr = IXGBE_VMOLR_AUPE;
+       while (pool--)
+               IXGBE_WRITE_REG(hw, IXGBE_VMOLR(VMDQ_P(pool)), vmolr);
+
        vf_shift = VMDQ_P(0) % 32;
        reg_offset = (VMDQ_P(0) >= 32) ? 1 : 0;
 
@@ -4900,36 +4882,6 @@ int ixgbe_del_mac_filter(struct ixgbe_adapter *adapter,
        return -ENOMEM;
 }
 
-/**
- * ixgbe_write_uc_addr_list - write unicast addresses to RAR table
- * @netdev: network interface device structure
- * @vfn: pool to associate with unicast addresses
- *
- * Writes unicast address list to the RAR table.
- * Returns: -ENOMEM on failure/insufficient address space
- *                0 on no addresses written
- *                X on writing X addresses to the RAR table
- **/
-static int ixgbe_write_uc_addr_list(struct net_device *netdev, int vfn)
-{
-       struct ixgbe_adapter *adapter = netdev_priv(netdev);
-       int count = 0;
-
-       /* return ENOMEM indicating insufficient memory for addresses */
-       if (netdev_uc_count(netdev) > ixgbe_available_rars(adapter, vfn))
-               return -ENOMEM;
-
-       if (!netdev_uc_empty(netdev)) {
-               struct netdev_hw_addr *ha;
-               netdev_for_each_uc_addr(ha, netdev) {
-                       ixgbe_del_mac_filter(adapter, ha->addr, vfn);
-                       ixgbe_add_mac_filter(adapter, ha->addr, vfn);
-                       count++;
-               }
-       }
-       return count;
-}
-
 static int ixgbe_uc_sync(struct net_device *netdev, const unsigned char *addr)
 {
        struct ixgbe_adapter *adapter = netdev_priv(netdev);
@@ -5309,29 +5261,6 @@ static void ixgbe_fdir_filter_restore(struct ixgbe_adapter *adapter)
        spin_unlock(&adapter->fdir_perfect_lock);
 }
 
-static void ixgbe_macvlan_set_rx_mode(struct net_device *dev, unsigned int pool,
-                                     struct ixgbe_adapter *adapter)
-{
-       struct ixgbe_hw *hw = &adapter->hw;
-       u32 vmolr;
-
-       /* No unicast promiscuous support for VMDQ devices. */
-       vmolr = IXGBE_READ_REG(hw, IXGBE_VMOLR(pool));
-       vmolr |= (IXGBE_VMOLR_ROMPE | IXGBE_VMOLR_BAM | IXGBE_VMOLR_AUPE);
-
-       /* clear the affected bit */
-       vmolr &= ~IXGBE_VMOLR_MPE;
-
-       if (dev->flags & IFF_ALLMULTI) {
-               vmolr |= IXGBE_VMOLR_MPE;
-       } else {
-               vmolr |= IXGBE_VMOLR_ROMPE;
-               hw->mac.ops.update_mc_addr_list(hw, dev);
-       }
-       ixgbe_write_uc_addr_list(adapter->netdev, pool);
-       IXGBE_WRITE_REG(hw, IXGBE_VMOLR(pool), vmolr);
-}
-
 /**
  * ixgbe_clean_rx_ring - Free Rx Buffers per Queue
  * @rx_ring: ring to free buffers from
@@ -5384,21 +5313,17 @@ static void ixgbe_clean_rx_ring(struct ixgbe_ring *rx_ring)
        rx_ring->next_to_use = 0;
 }
 
-static int ixgbe_fwd_ring_up(struct net_device *vdev,
+static int ixgbe_fwd_ring_up(struct ixgbe_adapter *adapter,
                             struct ixgbe_fwd_adapter *accel)
 {
-       struct ixgbe_adapter *adapter = accel->real_adapter;
+       struct net_device *vdev = accel->netdev;
        int i, baseq, err;
 
-       if (!test_bit(accel->pool, adapter->fwd_bitmask))
-               return 0;
-
        baseq = accel->pool * adapter->num_rx_queues_per_pool;
        netdev_dbg(vdev, "pool %i:%i queues %i:%i\n",
                   accel->pool, adapter->num_rx_pools,
                   baseq, baseq + adapter->num_rx_queues_per_pool);
 
-       accel->netdev = vdev;
        accel->rx_base_queue = baseq;
        accel->tx_base_queue = baseq;
 
@@ -5415,26 +5340,36 @@ static int ixgbe_fwd_ring_up(struct net_device *vdev,
         */
        err = ixgbe_add_mac_filter(adapter, vdev->dev_addr,
                                   VMDQ_P(accel->pool));
-       if (err >= 0) {
-               ixgbe_macvlan_set_rx_mode(vdev, accel->pool, adapter);
+       if (err >= 0)
                return 0;
-       }
+
+       /* if we cannot add the MAC rule then disable the offload */
+       macvlan_release_l2fw_offload(vdev);
 
        for (i = 0; i < adapter->num_rx_queues_per_pool; i++)
                adapter->rx_ring[baseq + i]->netdev = NULL;
 
+       netdev_err(vdev, "L2FW offload disabled due to L2 filter error\n");
+
+       clear_bit(accel->pool, adapter->fwd_bitmask);
+       kfree(accel);
+
        return err;
 }
 
-static int ixgbe_upper_dev_walk(struct net_device *upper, void *data)
+static int ixgbe_macvlan_up(struct net_device *vdev, void *data)
 {
-       if (netif_is_macvlan(upper)) {
-               struct macvlan_dev *dfwd = netdev_priv(upper);
-               struct ixgbe_fwd_adapter *vadapter = dfwd->fwd_priv;
+       struct ixgbe_adapter *adapter = data;
+       struct ixgbe_fwd_adapter *accel;
 
-               if (dfwd->fwd_priv)
-                       ixgbe_fwd_ring_up(upper, vadapter);
-       }
+       if (!netif_is_macvlan(vdev))
+               return 0;
+
+       accel = macvlan_accel_priv(vdev);
+       if (!accel)
+               return 0;
+
+       ixgbe_fwd_ring_up(adapter, accel);
 
        return 0;
 }
@@ -5442,7 +5377,7 @@ static int ixgbe_upper_dev_walk(struct net_device *upper, void *data)
 static void ixgbe_configure_dfwd(struct ixgbe_adapter *adapter)
 {
        netdev_walk_all_upper_dev_rcu(adapter->netdev,
-                                     ixgbe_upper_dev_walk, NULL);
+                                     ixgbe_macvlan_up, adapter);
 }
 
 static void ixgbe_configure(struct ixgbe_adapter *adapter)
@@ -7816,7 +7751,7 @@ static int ixgbe_tso(struct ixgbe_ring *tx_ring,
 
        /* remove payload length from inner checksum */
        paylen = skb->len - l4_offset;
-       csum_replace_by_diff(&l4.tcp->check, htonl(paylen));
+       csum_replace_by_diff(&l4.tcp->check, (__force __wsum)htonl(paylen));
 
        /* update gso size and bytecount with header size */
        first->gso_segs = skb_shinfo(skb)->gso_segs;
@@ -8843,6 +8778,49 @@ static void ixgbe_set_prio_tc_map(struct ixgbe_adapter *adapter)
 }
 
 #endif /* CONFIG_IXGBE_DCB */
+static int ixgbe_reassign_macvlan_pool(struct net_device *vdev, void *data)
+{
+       struct ixgbe_adapter *adapter = data;
+       struct ixgbe_fwd_adapter *accel;
+       int pool;
+
+       /* we only care about macvlans... */
+       if (!netif_is_macvlan(vdev))
+               return 0;
+
+       /* that have hardware offload enabled... */
+       accel = macvlan_accel_priv(vdev);
+       if (!accel)
+               return 0;
+
+       /* If we can relocate to a different bit do so */
+       pool = find_first_zero_bit(adapter->fwd_bitmask, adapter->num_rx_pools);
+       if (pool < adapter->num_rx_pools) {
+               set_bit(pool, adapter->fwd_bitmask);
+               accel->pool = pool;
+               return 0;
+       }
+
+       /* if we cannot find a free pool then disable the offload */
+       netdev_err(vdev, "L2FW offload disabled due to lack of queue resources\n");
+       macvlan_release_l2fw_offload(vdev);
+       kfree(accel);
+
+       return 0;
+}
+
+static void ixgbe_defrag_macvlan_pools(struct net_device *dev)
+{
+       struct ixgbe_adapter *adapter = netdev_priv(dev);
+
+       /* flush any stale bits out of the fwd bitmask */
+       bitmap_clear(adapter->fwd_bitmask, 1, 63);
+
+       /* walk through upper devices reassigning pools */
+       netdev_walk_all_upper_dev_rcu(dev, ixgbe_reassign_macvlan_pool,
+                                     adapter);
+}
+
 /**
  * ixgbe_setup_tc - configure net_device for multiple traffic classes
  *
@@ -8910,6 +8888,8 @@ int ixgbe_setup_tc(struct net_device *dev, u8 tc)
 #endif /* CONFIG_IXGBE_DCB */
        ixgbe_init_interrupt_scheme(adapter);
 
+       ixgbe_defrag_macvlan_pools(dev);
+
        if (netif_running(dev))
                return ixgbe_open(dev);
 
@@ -9014,13 +8994,12 @@ struct upper_walk_data {
 static int get_macvlan_queue(struct net_device *upper, void *_data)
 {
        if (netif_is_macvlan(upper)) {
-               struct macvlan_dev *dfwd = netdev_priv(upper);
-               struct ixgbe_fwd_adapter *vadapter = dfwd->fwd_priv;
+               struct ixgbe_fwd_adapter *vadapter = macvlan_accel_priv(upper);
                struct upper_walk_data *data = _data;
                struct ixgbe_adapter *adapter = data->adapter;
                int ifindex = data->ifindex;
 
-               if (vadapter && vadapter->netdev->ifindex == ifindex) {
+               if (vadapter && upper->ifindex == ifindex) {
                        data->queue = adapter->rx_ring[vadapter->rx_base_queue]->reg_idx;
                        data->action = data->queue;
                        return 1;
@@ -9125,7 +9104,8 @@ static int ixgbe_clsu32_build_input(struct ixgbe_fdir_filter *input,
 
                for (j = 0; field_ptr[j].val; j++) {
                        if (field_ptr[j].off == off) {
-                               field_ptr[j].val(input, mask, val, m);
+                               field_ptr[j].val(input, mask, (__force u32)val,
+                                                (__force u32)m);
                                input->filter.formatted.flow_type |=
                                        field_ptr[j].type;
                                found_entry = true;
@@ -9134,8 +9114,10 @@ static int ixgbe_clsu32_build_input(struct ixgbe_fdir_filter *input,
                }
                if (nexthdr) {
                        if (nexthdr->off == cls->knode.sel->keys[i].off &&
-                           nexthdr->val == cls->knode.sel->keys[i].val &&
-                           nexthdr->mask == cls->knode.sel->keys[i].mask)
+                           nexthdr->val ==
+                           (__force u32)cls->knode.sel->keys[i].val &&
+                           nexthdr->mask ==
+                           (__force u32)cls->knode.sel->keys[i].mask)
                                found_jump_field = true;
                        else
                                continue;
@@ -9239,7 +9221,8 @@ static int ixgbe_configure_clsu32(struct ixgbe_adapter *adapter,
                for (i = 0; nexthdr[i].jump; i++) {
                        if (nexthdr[i].o != cls->knode.sel->offoff ||
                            nexthdr[i].s != cls->knode.sel->offshift ||
-                           nexthdr[i].m != cls->knode.sel->offmask)
+                           nexthdr[i].m !=
+                           (__force u32)cls->knode.sel->offmask)
                                return err;
 
                        jump = kzalloc(sizeof(*jump), GFP_KERNEL);
@@ -9460,6 +9443,22 @@ static netdev_features_t ixgbe_fix_features(struct net_device *netdev,
        return features;
 }
 
+static void ixgbe_reset_l2fw_offload(struct ixgbe_adapter *adapter)
+{
+       int rss = min_t(int, ixgbe_max_rss_indices(adapter),
+                       num_online_cpus());
+
+       /* go back to full RSS if we're not running SR-IOV */
+       if (!adapter->ring_feature[RING_F_VMDQ].offset)
+               adapter->flags &= ~(IXGBE_FLAG_VMDQ_ENABLED |
+                                   IXGBE_FLAG_SRIOV_ENABLED);
+
+       adapter->ring_feature[RING_F_RSS].limit = rss;
+       adapter->ring_feature[RING_F_VMDQ].limit = 1;
+
+       ixgbe_setup_tc(adapter->netdev, adapter->hw_tcs);
+}
+
 static int ixgbe_set_features(struct net_device *netdev,
                              netdev_features_t features)
 {
@@ -9540,7 +9539,9 @@ static int ixgbe_set_features(struct net_device *netdev,
                }
        }
 
-       if (need_reset)
+       if ((changed & NETIF_F_HW_L2FW_DOFFLOAD) && adapter->num_rx_pools > 1)
+               ixgbe_reset_l2fw_offload(adapter);
+       else if (need_reset)
                ixgbe_do_reset(netdev);
        else if (changed & (NETIF_F_HW_VLAN_CTAG_RX |
                            NETIF_F_HW_VLAN_CTAG_FILTER))
@@ -9803,71 +9804,98 @@ static int ixgbe_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
 
 static void *ixgbe_fwd_add(struct net_device *pdev, struct net_device *vdev)
 {
-       struct ixgbe_fwd_adapter *fwd_adapter = NULL;
        struct ixgbe_adapter *adapter = netdev_priv(pdev);
-       int used_pools = adapter->num_vfs + adapter->num_rx_pools;
+       struct ixgbe_fwd_adapter *accel;
        int tcs = adapter->hw_tcs ? : 1;
-       unsigned int limit;
        int pool, err;
 
-       /* Hardware has a limited number of available pools. Each VF, and the
-        * PF require a pool. Check to ensure we don't attempt to use more
-        * then the available number of pools.
+       /* The hardware supported by ixgbe only filters on the destination MAC
+        * address. In order to avoid issues we only support offloading modes
+        * where the hardware can actually provide the functionality.
         */
-       if (used_pools >= IXGBE_MAX_VF_FUNCTIONS)
-               return ERR_PTR(-EINVAL);
+       if (!macvlan_supports_dest_filter(vdev))
+               return ERR_PTR(-EMEDIUMTYPE);
 
-       if (((adapter->flags & IXGBE_FLAG_DCB_ENABLED) &&
-             adapter->num_rx_pools >= (MAX_TX_QUEUES / tcs)) ||
-           (adapter->num_rx_pools > IXGBE_MAX_MACVLANS))
-               return ERR_PTR(-EBUSY);
+       pool = find_first_zero_bit(adapter->fwd_bitmask, adapter->num_rx_pools);
+       if (pool == adapter->num_rx_pools) {
+               u16 used_pools = adapter->num_vfs + adapter->num_rx_pools;
+               u16 reserved_pools;
+
+               if (((adapter->flags & IXGBE_FLAG_DCB_ENABLED) &&
+                    adapter->num_rx_pools >= (MAX_TX_QUEUES / tcs)) ||
+                   adapter->num_rx_pools > IXGBE_MAX_MACVLANS)
+                       return ERR_PTR(-EBUSY);
+
+               /* Hardware has a limited number of available pools. Each VF,
+                * and the PF require a pool. Check to ensure we don't
+                * attempt to use more then the available number of pools.
+                */
+               if (used_pools >= IXGBE_MAX_VF_FUNCTIONS)
+                       return ERR_PTR(-EBUSY);
 
-       fwd_adapter = kzalloc(sizeof(*fwd_adapter), GFP_KERNEL);
-       if (!fwd_adapter)
-               return ERR_PTR(-ENOMEM);
+               /* Enable VMDq flag so device will be set in VM mode */
+               adapter->flags |= IXGBE_FLAG_VMDQ_ENABLED |
+                                 IXGBE_FLAG_SRIOV_ENABLED;
 
-       pool = find_first_zero_bit(adapter->fwd_bitmask, adapter->num_rx_pools);
-       set_bit(pool, adapter->fwd_bitmask);
-       limit = find_last_bit(adapter->fwd_bitmask, adapter->num_rx_pools + 1);
+               /* Try to reserve as many queues per pool as possible,
+                * we start with the configurations that support 4 queues
+                * per pools, followed by 2, and then by just 1 per pool.
+                */
+               if (used_pools < 32 && adapter->num_rx_pools < 16)
+                       reserved_pools = min_t(u16,
+                                              32 - used_pools,
+                                              16 - adapter->num_rx_pools);
+               else if (adapter->num_rx_pools < 32)
+                       reserved_pools = min_t(u16,
+                                              64 - used_pools,
+                                              32 - adapter->num_rx_pools);
+               else
+                       reserved_pools = 64 - used_pools;
 
-       /* Enable VMDq flag so device will be set in VM mode */
-       adapter->flags |= IXGBE_FLAG_VMDQ_ENABLED | IXGBE_FLAG_SRIOV_ENABLED;
-       adapter->ring_feature[RING_F_VMDQ].limit = limit + 1;
 
-       fwd_adapter->pool = pool;
-       fwd_adapter->real_adapter = adapter;
+               if (!reserved_pools)
+                       return ERR_PTR(-EBUSY);
 
-       /* Force reinit of ring allocation with VMDQ enabled */
-       err = ixgbe_setup_tc(pdev, adapter->hw_tcs);
+               adapter->ring_feature[RING_F_VMDQ].limit += reserved_pools;
 
-       if (!err && netif_running(pdev))
-               err = ixgbe_fwd_ring_up(vdev, fwd_adapter);
+               /* Force reinit of ring allocation with VMDQ enabled */
+               err = ixgbe_setup_tc(pdev, adapter->hw_tcs);
+               if (err)
+                       return ERR_PTR(err);
 
-       if (!err)
-               return fwd_adapter;
+               if (pool >= adapter->num_rx_pools)
+                       return ERR_PTR(-ENOMEM);
+       }
 
-       /* unwind counter and free adapter struct */
-       netdev_info(pdev,
-                   "%s: dfwd hardware acceleration failed\n", vdev->name);
-       clear_bit(pool, adapter->fwd_bitmask);
-       kfree(fwd_adapter);
-       return ERR_PTR(err);
+       accel = kzalloc(sizeof(*accel), GFP_KERNEL);
+       if (!accel)
+               return ERR_PTR(-ENOMEM);
+
+       set_bit(pool, adapter->fwd_bitmask);
+       accel->pool = pool;
+       accel->netdev = vdev;
+
+       if (!netif_running(pdev))
+               return accel;
+
+       err = ixgbe_fwd_ring_up(adapter, accel);
+       if (err)
+               return ERR_PTR(err);
+
+       return accel;
 }
 
 static void ixgbe_fwd_del(struct net_device *pdev, void *priv)
 {
        struct ixgbe_fwd_adapter *accel = priv;
-       struct ixgbe_adapter *adapter = accel->real_adapter;
+       struct ixgbe_adapter *adapter = netdev_priv(pdev);
        unsigned int rxbase = accel->rx_base_queue;
-       unsigned int limit, i;
+       unsigned int i;
 
        /* delete unicast filter associated with offloaded interface */
        ixgbe_del_mac_filter(adapter, accel->netdev->dev_addr,
                             VMDQ_P(accel->pool));
 
-       /* disable ability to receive packets for this pool */
-       IXGBE_WRITE_REG(&adapter->hw, IXGBE_VMOLR(accel->pool), 0);
-
        /* Allow remaining Rx packets to get flushed out of the
         * Rx FIFO before we drop the netdev for the ring.
         */
@@ -9886,25 +9914,6 @@ static void ixgbe_fwd_del(struct net_device *pdev, void *priv)
        }
 
        clear_bit(accel->pool, adapter->fwd_bitmask);
-       limit = find_last_bit(adapter->fwd_bitmask, adapter->num_rx_pools);
-       adapter->ring_feature[RING_F_VMDQ].limit = limit + 1;
-
-       /* go back to full RSS if we're done with our VMQs */
-       if (adapter->ring_feature[RING_F_VMDQ].limit == 1) {
-               int rss = min_t(int, ixgbe_max_rss_indices(adapter),
-                               num_online_cpus());
-
-               adapter->flags &= ~IXGBE_FLAG_VMDQ_ENABLED;
-               adapter->flags &= ~IXGBE_FLAG_SRIOV_ENABLED;
-               adapter->ring_feature[RING_F_RSS].limit = rss;
-       }
-
-       ixgbe_setup_tc(pdev, adapter->hw_tcs);
-       netdev_dbg(pdev, "pool %i:%i queues %i:%i\n",
-                  accel->pool, adapter->num_rx_pools,
-                  accel->rx_base_queue,
-                  accel->rx_base_queue +
-                  adapter->num_rx_queues_per_pool);
        kfree(accel);
 }
 
@@ -9986,7 +9995,8 @@ static int ixgbe_xdp_setup(struct net_device *dev, struct bpf_prog *prog)
                }
        } else {
                for (i = 0; i < adapter->num_rx_queues; i++)
-                       xchg(&adapter->rx_ring[i]->xdp_prog, adapter->xdp_prog);
+                       (void)xchg(&adapter->rx_ring[i]->xdp_prog,
+                           adapter->xdp_prog);
        }
 
        if (old_prog)
@@ -10925,14 +10935,14 @@ static pci_ers_result_t ixgbe_io_error_detected(struct pci_dev *pdev,
        rtnl_lock();
        netif_device_detach(netdev);
 
+       if (netif_running(netdev))
+               ixgbe_close_suspend(adapter);
+
        if (state == pci_channel_io_perm_failure) {
                rtnl_unlock();
                return PCI_ERS_RESULT_DISCONNECT;
        }
 
-       if (netif_running(netdev))
-               ixgbe_close_suspend(adapter);
-
        if (!test_and_set_bit(__IXGBE_DISABLED, &adapter->state))
                pci_disable_device(pdev);
        rtnl_unlock();
index a0cb84381cd060dfb2eae7d3f9e10eeef0cd8752..5679293e53f7afa56b86a17b67b389706d4d25c6 100644 (file)
@@ -1,30 +1,5 @@
-/*******************************************************************************
-
-  Intel 10 Gigabit PCI Express Linux driver
-  Copyright(c) 1999 - 2016 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, write to the Free Software Foundation, Inc.,
-  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  Linux NICS <linux.nics@intel.com>
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
 
 #include <linux/pci.h>
 #include <linux/delay.h>
index c4628b6635903f713ad1f5de012a5ff0398c39f3..e085b6520dac8871ebe4f117024aecd38c55b650 100644 (file)
@@ -1,31 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
-  Intel 10 Gigabit PCI Express Linux driver
-  Copyright(c) 1999 - 2016 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, write to the Free Software Foundation, Inc.,
-  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  Linux NICS <linux.nics@intel.com>
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
 
 #ifndef _IXGBE_MBX_H_
 #define _IXGBE_MBX_H_
index 72446644f9fa0efd45eb79c3fddd3567b8f51433..1e6cf220f54369f2f820b4aa0b168ed6bdcedd9b 100644 (file)
@@ -1,29 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
- *
- * Intel 10 Gigabit PCI Express Linux drive
- * Copyright(c) 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
 
 #ifndef _IXGBE_MODEL_H_
 #define _IXGBE_MODEL_H_
@@ -53,8 +29,8 @@ static inline int ixgbe_mat_prgm_sip(struct ixgbe_fdir_filter *input,
                                     union ixgbe_atr_input *mask,
                                     u32 val, u32 m)
 {
-       input->filter.formatted.src_ip[0] = val;
-       mask->formatted.src_ip[0] = m;
+       input->filter.formatted.src_ip[0] = (__force __be32)val;
+       mask->formatted.src_ip[0] = (__force __be32)m;
        return 0;
 }
 
@@ -62,8 +38,8 @@ static inline int ixgbe_mat_prgm_dip(struct ixgbe_fdir_filter *input,
                                     union ixgbe_atr_input *mask,
                                     u32 val, u32 m)
 {
-       input->filter.formatted.dst_ip[0] = val;
-       mask->formatted.dst_ip[0] = m;
+       input->filter.formatted.dst_ip[0] = (__force __be32)val;
+       mask->formatted.dst_ip[0] = (__force __be32)m;
        return 0;
 }
 
@@ -79,10 +55,10 @@ static inline int ixgbe_mat_prgm_ports(struct ixgbe_fdir_filter *input,
                                       union ixgbe_atr_input *mask,
                                       u32 val, u32 m)
 {
-       input->filter.formatted.src_port = val & 0xffff;
-       mask->formatted.src_port = m & 0xffff;
-       input->filter.formatted.dst_port = val >> 16;
-       mask->formatted.dst_port = m >> 16;
+       input->filter.formatted.src_port = (__force __be16)(val & 0xffff);
+       mask->formatted.src_port = (__force __be16)(m & 0xffff);
+       input->filter.formatted.dst_port = (__force __be16)(val >> 16);
+       mask->formatted.dst_port = (__force __be16)(m >> 16);
 
        return 0;
 };
index 91bde90f9265403224252722079fec40194a00da..919a7af84b423e83f563528509046a1a1394c3af 100644 (file)
@@ -1,30 +1,5 @@
-/*******************************************************************************
-
-  Intel 10 Gigabit PCI Express Linux driver
-  Copyright(c) 1999 - 2014 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, write to the Free Software Foundation, Inc.,
-  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  Linux NICS <linux.nics@intel.com>
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
 
 #include <linux/pci.h>
 #include <linux/delay.h>
index d6a7e77348c5409047e5e5918f752f06e64c4879..64e44e01c973fc4c047f04432c4694ac06271a25 100644 (file)
@@ -1,31 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
-  Intel 10 Gigabit PCI Express Linux driver
-  Copyright(c) 1999 - 2016 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, write to the Free Software Foundation, Inc.,
-  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  Linux NICS <linux.nics@intel.com>
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
 
 #ifndef _IXGBE_PHY_H_
 #define _IXGBE_PHY_H_
index f6cc9166082a13977004a48782c02471de4a009a..b3e0d8bb5cbd80f19852836c42512d3847215d41 100644 (file)
@@ -1,30 +1,6 @@
-/*******************************************************************************
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
 
-  Intel 10 Gigabit PCI Express Linux driver
-  Copyright(c) 1999 - 2016 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, write to the Free Software Foundation, Inc.,
-  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  Linux NICS <linux.nics@intel.com>
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
 #include "ixgbe.h"
 #include <linux/ptp_classify.h>
 #include <linux/clocksource.h>
index 008aa073a679d27f3d907015c68acb8fd824fe65..6f59933cdff7d5ff13620ced894036ddeb3052c3 100644 (file)
@@ -1,30 +1,5 @@
-/*******************************************************************************
-
-  Intel 10 Gigabit PCI Express Linux driver
-  Copyright(c) 1999 - 2015 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, write to the Free Software Foundation, Inc.,
-  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  Linux NICS <linux.nics@intel.com>
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
 
 #include <linux/types.h>
 #include <linux/module.h>
@@ -266,7 +241,7 @@ int ixgbe_disable_sriov(struct ixgbe_adapter *adapter)
 #endif
 
        /* Disable VMDq flag so device will be set in VM mode */
-       if (adapter->ring_feature[RING_F_VMDQ].limit == 1) {
+       if (bitmap_weight(adapter->fwd_bitmask, adapter->num_rx_pools) == 1) {
                adapter->flags &= ~IXGBE_FLAG_VMDQ_ENABLED;
                adapter->flags &= ~IXGBE_FLAG_SRIOV_ENABLED;
                rss = min_t(int, ixgbe_max_rss_indices(adapter),
@@ -312,7 +287,8 @@ static int ixgbe_pci_sriov_enable(struct pci_dev *dev, int num_vfs)
         * other values out of range.
         */
        num_tc = adapter->hw_tcs;
-       num_rx_pools = adapter->num_rx_pools;
+       num_rx_pools = bitmap_weight(adapter->fwd_bitmask,
+                                    adapter->num_rx_pools);
        limit = (num_tc > 4) ? IXGBE_MAX_VFS_8TC :
                (num_tc > 1) ? IXGBE_MAX_VFS_4TC : IXGBE_MAX_VFS_1TC;
 
@@ -878,14 +854,11 @@ static int ixgbe_vf_reset_msg(struct ixgbe_adapter *adapter, u32 vf)
 
        /* reply to reset with ack and vf mac address */
        msgbuf[0] = IXGBE_VF_RESET;
-       if (!is_zero_ether_addr(vf_mac)) {
+       if (!is_zero_ether_addr(vf_mac) && adapter->vfinfo[vf].pf_set_mac) {
                msgbuf[0] |= IXGBE_VT_MSGTYPE_ACK;
                memcpy(addr, vf_mac, ETH_ALEN);
        } else {
                msgbuf[0] |= IXGBE_VT_MSGTYPE_NACK;
-               dev_warn(&adapter->pdev->dev,
-                        "VF %d has no MAC address assigned, you may have to assign one manually\n",
-                        vf);
        }
 
        /*
index e30d1f07e8919f47addd362027987ad7c61b5fe0..3ec21923c89cd9e38cd39327445a184f68f661c9 100644 (file)
@@ -1,31 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
-  Intel 10 Gigabit PCI Express Linux driver
-  Copyright(c) 1999 - 2013 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, write to the Free Software Foundation, Inc.,
-  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  Linux NICS <linux.nics@intel.com>
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
 
 #ifndef _IXGBE_SRIOV_H_
 #define _IXGBE_SRIOV_H_
index 24766e125592e0278e4340ecf56d655cd1782be8..204844288c165980938255e5ddee7d6e9db6953a 100644 (file)
@@ -1,30 +1,5 @@
-/*******************************************************************************
-
-  Intel 10 Gigabit PCI Express Linux driver
-  Copyright(c) 1999 - 2013 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, write to the Free Software Foundation, Inc.,
-  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  Linux NICS <linux.nics@intel.com>
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
 
 #include "ixgbe.h"
 #include "ixgbe_common.h"
index 2daa81e6e9b241392f2632b86110c164fbbd67a6..e8ed37749ab1c91be3223a9ff727cd33ca21a046 100644 (file)
@@ -1,31 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
-  Intel 10 Gigabit PCI Express Linux driver
-  Copyright(c) 1999 - 2016 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, write to the Free Software Foundation, Inc.,
-  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  Linux NICS <linux.nics@intel.com>
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
 
 #ifndef _IXGBE_TYPE_H_
 #define _IXGBE_TYPE_H_
index b8c5fd2a2115c91b749906ef3bcef66de757a39e..de563cfd294d49ccbcc98a1fa723930836cb85e6 100644 (file)
@@ -1,30 +1,5 @@
-/*******************************************************************************
-
-  Intel 10 Gigabit PCI Express Linux driver
-  Copyright(c) 1999 - 2016 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, write to the Free Software Foundation, Inc.,
-  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  Linux NICS <linux.nics@intel.com>
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
 
 #include <linux/pci.h>
 #include <linux/delay.h>
index 182d640e9f7ac04a0deaf56009b6d8af96a2ad5c..e246c0d2a42726038f82392f5d1971e2053e2a3f 100644 (file)
@@ -1,27 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
- *
- * Intel 10 Gigabit PCI Express Linux driver
- *  Copyright(c) 1999 - 2014 Intel Corporation.
- *
- *  This program is free software; you can redistribute it and/or modify it
- *  under the terms and conditions of the GNU General Public License,
- *  version 2, as published by the Free Software Foundation.
- *
- *  This program is distributed in the hope it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- *  more details.
- *
- *  The full GNU General Public License is included in this distribution in
- *  the file called "COPYING".
- *
- *  Contact Information:
- *  Linux NICS <linux.nics@intel.com>
- *  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- *  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- *****************************************************************************/
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
 
 #include "ixgbe_type.h"
 
index 3123267dfba974049cc26496570cafe842eb81ac..a8148c7126e51d9757dde2f230901b9437adf116 100644 (file)
@@ -1,26 +1,6 @@
-/*******************************************************************************
- *
- *  Intel 10 Gigabit PCI Express Linux driver
- *  Copyright(c) 1999 - 2016 Intel Corporation.
- *
- *  This program is free software; you can redistribute it and/or modify it
- *  under the terms and conditions of the GNU General Public License,
- *  version 2, as published by the Free Software Foundation.
- *
- *  This program is distributed in the hope it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- *  more details.
- *
- *  The full GNU General Public License is included in this distribution in
- *  the file called "COPYING".
- *
- *  Contact Information:
- *  Linux NICS <linux.nics@intel.com>
- *  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- *  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
+
 #include "ixgbe_x540.h"
 #include "ixgbe_type.h"
 #include "ixgbe_common.h"
@@ -898,8 +878,9 @@ static s32 ixgbe_read_ee_hostif_buffer_X550(struct ixgbe_hw *hw,
                buffer.hdr.req.checksum = FW_DEFAULT_CHECKSUM;
 
                /* convert offset from words to bytes */
-               buffer.address = cpu_to_be32((offset + current_word) * 2);
-               buffer.length = cpu_to_be16(words_to_read * 2);
+               buffer.address = (__force u32)cpu_to_be32((offset +
+                                                          current_word) * 2);
+               buffer.length = (__force u16)cpu_to_be16(words_to_read * 2);
                buffer.pad2 = 0;
                buffer.pad3 = 0;
 
@@ -1109,9 +1090,9 @@ static s32 ixgbe_read_ee_hostif_X550(struct ixgbe_hw *hw, u16 offset, u16 *data)
        buffer.hdr.req.checksum = FW_DEFAULT_CHECKSUM;
 
        /* convert offset from words to bytes */
-       buffer.address = cpu_to_be32(offset * 2);
+       buffer.address = (__force u32)cpu_to_be32(offset * 2);
        /* one word */
-       buffer.length = cpu_to_be16(sizeof(u16));
+       buffer.length = (__force u16)cpu_to_be16(sizeof(u16));
 
        status = hw->mac.ops.acquire_swfw_sync(hw, mask);
        if (status)
@@ -3427,6 +3408,9 @@ static s32 ixgbe_reset_hw_X550em(struct ixgbe_hw *hw)
                hw->phy.sfp_setup_needed = false;
        }
 
+       if (status == IXGBE_ERR_SFP_NOT_SUPPORTED)
+               return status;
+
        /* Reset PHY */
        if (!hw->phy.reset_disable && hw->phy.ops.reset)
                hw->phy.ops.reset(hw);
index bb47814cfa9029a508176ec66fb51d063825875e..aba1e6a37a6a9e8fca900d94c9e6dc26fef3a632 100644 (file)
@@ -1,31 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
-################################################################################
-#
-# Intel 82599 Virtual Function driver
-# Copyright(c) 1999 - 2012 Intel Corporation.
-#
-# This program is free software; you can redistribute it and/or modify it
-# under the terms and conditions of the GNU General Public License,
-# version 2, as published by the Free Software Foundation.
-#
-# This program is distributed in the hope it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-# more details.
-#
-# You should have received a copy of the GNU General Public License along with
-# this program; if not, write to the Free Software Foundation, Inc.,
-# 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-#
-# The full GNU General Public License is included in this distribution in
-# the file called "COPYING".
-#
-# Contact Information:
-# e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-# Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-#
-################################################################################
-
+# Copyright(c) 1999 - 2018 Intel Corporation.
 #
 # Makefile for the Intel(R) 82599 VF ethernet driver
 #
index 71c828842b11b4f110f6352095737fcf78f7991b..700d8eb2f6f8a140d1162e7688a70b54f961c6e7 100644 (file)
@@ -1,29 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
-  Intel 82599 Virtual Function driver
-  Copyright(c) 1999 - 2015 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, see <http://www.gnu.org/licenses/>.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
 
 #ifndef _IXGBEVF_DEFINES_H_
 #define _IXGBEVF_DEFINES_H_
index 8e7d6c6f5c9242362b81ad98b7ee42f374433daf..e7813d76527cc4ff5e1b28a94f96a8682ecf5e3a 100644 (file)
@@ -1,28 +1,5 @@
-/*******************************************************************************
-
-  Intel 82599 Virtual Function driver
-  Copyright(c) 1999 - 2018 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, see <http://www.gnu.org/licenses/>.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
 
 /* ethtool support for ixgbevf */
 
index 447ce1d5e0e39db594d8c404033833dce013bf6d..70c75681495fe61246ebbfe093def68332894fb3 100644 (file)
@@ -1,29 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
-  Intel 82599 Virtual Function driver
-  Copyright(c) 1999 - 2018 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, see <http://www.gnu.org/licenses/>.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
 
 #ifndef _IXGBEVF_H_
 #define _IXGBEVF_H_
index 3d9033f26effa743fc4142a6b99a64d7a5effcfd..083041129539b5769509588b7712f832d9fd5cf2 100644 (file)
@@ -1,28 +1,5 @@
-/*******************************************************************************
-
-  Intel 82599 Virtual Function driver
-  Copyright(c) 1999 - 2018 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, see <http://www.gnu.org/licenses/>.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
 
 /******************************************************************************
  Copyright (c)2006 - 2007 Myricom, Inc. for some LRO specific code
@@ -3420,7 +3397,7 @@ static int ixgbevf_setup_all_tx_resources(struct ixgbevf_adapter *adapter)
                if (!err)
                        continue;
                hw_dbg(&adapter->hw, "Allocation for XDP Queue %u failed\n", j);
-               break;
+               goto err_setup_tx;
        }
 
        return 0;
@@ -4137,7 +4114,7 @@ static int ixgbevf_xmit_frame_ring(struct sk_buff *skb,
        return NETDEV_TX_OK;
 }
 
-static int ixgbevf_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
+static netdev_tx_t ixgbevf_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
 {
        struct ixgbevf_adapter *adapter = netdev_priv(netdev);
        struct ixgbevf_ring *tx_ring;
@@ -4187,6 +4164,7 @@ static int ixgbevf_set_mac(struct net_device *netdev, void *p)
                return -EPERM;
 
        ether_addr_copy(hw->mac.addr, addr->sa_data);
+       ether_addr_copy(hw->mac.perm_addr, addr->sa_data);
        ether_addr_copy(netdev->dev_addr, addr->sa_data);
 
        return 0;
@@ -4770,14 +4748,14 @@ static pci_ers_result_t ixgbevf_io_error_detected(struct pci_dev *pdev,
        rtnl_lock();
        netif_device_detach(netdev);
 
+       if (netif_running(netdev))
+               ixgbevf_close_suspend(adapter);
+
        if (state == pci_channel_io_perm_failure) {
                rtnl_unlock();
                return PCI_ERS_RESULT_DISCONNECT;
        }
 
-       if (netif_running(netdev))
-               ixgbevf_close_suspend(adapter);
-
        if (!test_and_set_bit(__IXGBEVF_DISABLED, &adapter->state))
                pci_disable_device(pdev);
        rtnl_unlock();
index 2819abc454c7e71c024ab280ec3a5a1780a07a4b..6bc1953263b91602e29dc4247d5000c572a09219 100644 (file)
@@ -1,28 +1,5 @@
-/*******************************************************************************
-
-  Intel 82599 Virtual Function driver
-  Copyright(c) 1999 - 2015 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, see <http://www.gnu.org/licenses/>.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
 
 #include "mbx.h"
 #include "ixgbevf.h"
index 5ec947fe3d09bdc5aef630009f8c145aab4f477c..bfd9ae150808810947e661907cae8e7e26cd5531 100644 (file)
@@ -1,29 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
-  Intel 82599 Virtual Function driver
-  Copyright(c) 1999 - 2015 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, see <http://www.gnu.org/licenses/>.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
 
 #ifndef _IXGBE_MBX_H_
 #define _IXGBE_MBX_H_
index 278f73980501f0f5a201333cd899900bac44536b..68d16ae5b65aa2fe0232a89c1d74d53253a4d446 100644 (file)
@@ -1,29 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
-  Intel 82599 Virtual Function driver
-  Copyright(c) 1999 - 2015 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, see <http://www.gnu.org/licenses/>.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
 
 #ifndef _IXGBEVF_REGS_H_
 #define _IXGBEVF_REGS_H_
index 38d3a327c1bc9cdc641015b5c71dfe003d9284a0..bf0577e819e1c247811f9e1f9c0380e21584a219 100644 (file)
@@ -1,28 +1,5 @@
-/*******************************************************************************
-
-  Intel 82599 Virtual Function driver
-  Copyright(c) 1999 - 2015 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, see <http://www.gnu.org/licenses/>.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
 
 #include "vf.h"
 #include "ixgbevf.h"
index 194fbdaa4519945ed0b22529a2761df0b3f9fc9d..d1e9e306653b87ca9ee6e73543ca918f8a3787af 100644 (file)
@@ -1,29 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
-  Intel 82599 Virtual Function driver
-  Copyright(c) 1999 - 2015 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, see <http://www.gnu.org/licenses/>.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
 
 #ifndef __IXGBE_VF_H__
 #define __IXGBE_VF_H__
index ebe5c91489355c9e8ae3cb71cb1731a7db354094..cc2f7701e71e1b033c4bd7ceb78c970351f4d9ee 100644 (file)
@@ -86,6 +86,7 @@ config MVPP2
        depends on ARCH_MVEBU || COMPILE_TEST
        depends on HAS_DMA
        select MVMDIO
+       select PHYLINK
        ---help---
          This driver supports the network interface units in the
          Marvell ARMADA 375, 7K and 8K SoCs.
index 0495487f7b42e7e80d416a2212fad2d8ca786f71..c5dac6bd2be4d31b988df0f572deedff15ce18fc 100644 (file)
@@ -348,10 +348,7 @@ static int orion_mdio_probe(struct platform_device *pdev)
                goto out_mdio;
        }
 
-       if (pdev->dev.of_node)
-               ret = of_mdiobus_register(bus, pdev->dev.of_node);
-       else
-               ret = mdiobus_register(bus);
+       ret = of_mdiobus_register(bus, pdev->dev.of_node);
        if (ret < 0) {
                dev_err(&pdev->dev, "Cannot register MDIO bus (%d)\n", ret);
                goto out_mdio;
index 4202f9b5b966345f1aae651ff20377b9b2977b1a..6847cd431aa07cb17250e30bbd416a08c56d66e2 100644 (file)
@@ -29,6 +29,7 @@
 #include <linux/of_address.h>
 #include <linux/of_device.h>
 #include <linux/phy.h>
+#include <linux/phylink.h>
 #include <linux/phy/phy.h>
 #include <linux/clk.h>
 #include <linux/hrtimer.h>
 #define     MVPP2_GMAC_FORCE_LINK_PASS         BIT(1)
 #define     MVPP2_GMAC_IN_BAND_AUTONEG         BIT(2)
 #define     MVPP2_GMAC_IN_BAND_AUTONEG_BYPASS  BIT(3)
+#define     MVPP2_GMAC_IN_BAND_RESTART_AN      BIT(4)
 #define     MVPP2_GMAC_CONFIG_MII_SPEED        BIT(5)
 #define     MVPP2_GMAC_CONFIG_GMII_SPEED       BIT(6)
 #define     MVPP2_GMAC_AN_SPEED_EN             BIT(7)
 #define     MVPP2_GMAC_FC_ADV_EN               BIT(9)
+#define     MVPP2_GMAC_FC_ADV_ASM_EN           BIT(10)
 #define     MVPP2_GMAC_FLOW_CTRL_AUTONEG       BIT(11)
 #define     MVPP2_GMAC_CONFIG_FULL_DUPLEX      BIT(12)
 #define     MVPP2_GMAC_AN_DUPLEX_EN            BIT(13)
 #define MVPP2_GMAC_STATUS0                     0x10
 #define     MVPP2_GMAC_STATUS0_LINK_UP         BIT(0)
+#define     MVPP2_GMAC_STATUS0_GMII_SPEED      BIT(1)
+#define     MVPP2_GMAC_STATUS0_MII_SPEED       BIT(2)
+#define     MVPP2_GMAC_STATUS0_FULL_DUPLEX     BIT(3)
+#define     MVPP2_GMAC_STATUS0_RX_PAUSE                BIT(6)
+#define     MVPP2_GMAC_STATUS0_TX_PAUSE                BIT(7)
+#define     MVPP2_GMAC_STATUS0_AN_COMPLETE     BIT(11)
 #define MVPP2_GMAC_PORT_FIFO_CFG_1_REG         0x1c
 #define     MVPP2_GMAC_TX_FIFO_MIN_TH_OFFS     6
 #define     MVPP2_GMAC_TX_FIFO_MIN_TH_ALL_MASK 0x1fc0
 #define     MVPP22_GMAC_INT_MASK_LINK_STAT     BIT(1)
 #define MVPP22_GMAC_CTRL_4_REG                 0x90
 #define     MVPP22_CTRL4_EXT_PIN_GMII_SEL      BIT(0)
+#define     MVPP22_CTRL4_RX_FC_EN              BIT(3)
+#define     MVPP22_CTRL4_TX_FC_EN              BIT(4)
 #define     MVPP22_CTRL4_DP_CLK_SEL            BIT(5)
 #define     MVPP22_CTRL4_SYNC_BYPASS_DIS       BIT(6)
 #define     MVPP22_CTRL4_QSGMII_BYPASS_ACTIVE  BIT(7)
 #define     MVPP22_XLG_CTRL0_PORT_EN           BIT(0)
 #define     MVPP22_XLG_CTRL0_MAC_RESET_DIS     BIT(1)
 #define     MVPP22_XLG_CTRL0_RX_FLOW_CTRL_EN   BIT(7)
+#define     MVPP22_XLG_CTRL0_TX_FLOW_CTRL_EN   BIT(8)
 #define     MVPP22_XLG_CTRL0_MIB_CNT_DIS       BIT(14)
 #define MVPP22_XLG_CTRL1_REG                   0x104
 #define     MVPP22_XLG_CTRL1_FRAMESIZELIMIT_OFFS       0
 #define     MVPP22_XLG_CTRL4_FWD_FC            BIT(5)
 #define     MVPP22_XLG_CTRL4_FWD_PFC           BIT(6)
 #define     MVPP22_XLG_CTRL4_MACMODSELECT_GMAC BIT(12)
+#define     MVPP22_XLG_CTRL4_EN_IDLE_CHECK     BIT(14)
 
 /* SMI registers. PPv2.2 only, relative to priv->iface_base. */
 #define MVPP22_SMI_MISC_CFG_REG                        0x1204
@@ -942,6 +955,7 @@ struct mvpp2 {
        struct clk *pp_clk;
        struct clk *gop_clk;
        struct clk *mg_clk;
+       struct clk *mg_core_clk;
        struct clk *axi_clk;
 
        /* List of pointers to port structures */
@@ -1016,6 +1030,9 @@ struct mvpp2_port {
        /* Firmware node associated to the port */
        struct fwnode_handle *fwnode;
 
+       /* Is a PHY always connected to the port */
+       bool has_phy;
+
        /* Per-port registers' base address */
        void __iomem *base;
        void __iomem *stats_base;
@@ -1043,12 +1060,11 @@ struct mvpp2_port {
        struct mutex gather_stats_lock;
        struct delayed_work stats_work;
 
+       struct device_node *of_node;
+
        phy_interface_t phy_interface;
-       struct device_node *phy_node;
+       struct phylink *phylink;
        struct phy *comphy;
-       unsigned int link;
-       unsigned int duplex;
-       unsigned int speed;
 
        struct mvpp2_bm_pool *pool_long;
        struct mvpp2_bm_pool *pool_short;
@@ -1337,6 +1353,12 @@ struct mvpp2_bm_pool {
         (addr) < (txq_pcpu)->tso_headers_dma + \
         (txq_pcpu)->size * TSO_HEADER_SIZE)
 
+/* The prototype is added here to be used in start_dev when using ACPI. This
+ * will be removed once phylink is used for all modes (dt+ACPI).
+ */
+static void mvpp2_mac_config(struct net_device *dev, unsigned int mode,
+                            const struct phylink_link_state *state);
+
 /* Queue modes */
 #define MVPP2_QDIST_SINGLE_MODE        0
 #define MVPP2_QDIST_MULTI_MODE 1
@@ -1734,7 +1756,6 @@ static void mvpp2_prs_tcam_ai_update(struct mvpp2_prs_entry *pe,
        int i, ai_idx = MVPP2_PRS_TCAM_AI_BYTE;
 
        for (i = 0; i < MVPP2_PRS_AI_BITS; i++) {
-
                if (!(enable & BIT(i)))
                        continue;
 
@@ -1818,7 +1839,6 @@ static void mvpp2_prs_sram_ai_update(struct mvpp2_prs_entry *pe,
        int ai_off = MVPP2_PRS_SRAM_AI_OFFS;
 
        for (i = 0; i < MVPP2_PRS_SRAM_AI_CTRL_BITS; i++) {
-
                if (!(mask & BIT(i)))
                        continue;
 
@@ -2108,6 +2128,9 @@ static void mvpp2_prs_dsa_tag_set(struct mvpp2 *priv, int port, bool add,
                                mvpp2_prs_sram_ai_update(&pe, 0,
                                                        MVPP2_PRS_SRAM_AI_MASK);
 
+                       /* Set result info bits to 'single vlan' */
+                       mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_VLAN_SINGLE,
+                                                MVPP2_PRS_RI_VLAN_MASK);
                        /* If packet is tagged continue check vid filtering */
                        mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_VID);
                } else {
@@ -4848,6 +4871,8 @@ static int mvpp22_gop_init(struct mvpp2_port *port)
                mvpp22_gop_init_rgmii(port);
                break;
        case PHY_INTERFACE_MODE_SGMII:
+       case PHY_INTERFACE_MODE_1000BASEX:
+       case PHY_INTERFACE_MODE_2500BASEX:
                mvpp22_gop_init_sgmii(port);
                break;
        case PHY_INTERFACE_MODE_10GKR:
@@ -4885,7 +4910,9 @@ static void mvpp22_gop_unmask_irq(struct mvpp2_port *port)
        u32 val;
 
        if (phy_interface_mode_is_rgmii(port->phy_interface) ||
-           port->phy_interface == PHY_INTERFACE_MODE_SGMII) {
+           port->phy_interface == PHY_INTERFACE_MODE_SGMII ||
+           port->phy_interface == PHY_INTERFACE_MODE_1000BASEX ||
+           port->phy_interface == PHY_INTERFACE_MODE_2500BASEX) {
                /* Enable the GMAC link status irq for this port */
                val = readl(port->base + MVPP22_GMAC_INT_SUM_MASK);
                val |= MVPP22_GMAC_INT_SUM_MASK_LINK_STAT;
@@ -4910,12 +4937,14 @@ static void mvpp22_gop_mask_irq(struct mvpp2_port *port)
        if (port->gop_id == 0) {
                val = readl(port->base + MVPP22_XLG_EXT_INT_MASK);
                val &= ~(MVPP22_XLG_EXT_INT_MASK_XLG |
-                        MVPP22_XLG_EXT_INT_MASK_GIG);
+                        MVPP22_XLG_EXT_INT_MASK_GIG);
                writel(val, port->base + MVPP22_XLG_EXT_INT_MASK);
        }
 
        if (phy_interface_mode_is_rgmii(port->phy_interface) ||
-           port->phy_interface == PHY_INTERFACE_MODE_SGMII) {
+           port->phy_interface == PHY_INTERFACE_MODE_SGMII ||
+           port->phy_interface == PHY_INTERFACE_MODE_1000BASEX ||
+           port->phy_interface == PHY_INTERFACE_MODE_2500BASEX) {
                val = readl(port->base + MVPP22_GMAC_INT_SUM_MASK);
                val &= ~MVPP22_GMAC_INT_SUM_MASK_LINK_STAT;
                writel(val, port->base + MVPP22_GMAC_INT_SUM_MASK);
@@ -4927,7 +4956,9 @@ static void mvpp22_gop_setup_irq(struct mvpp2_port *port)
        u32 val;
 
        if (phy_interface_mode_is_rgmii(port->phy_interface) ||
-           port->phy_interface == PHY_INTERFACE_MODE_SGMII) {
+           port->phy_interface == PHY_INTERFACE_MODE_SGMII ||
+           port->phy_interface == PHY_INTERFACE_MODE_1000BASEX ||
+           port->phy_interface == PHY_INTERFACE_MODE_2500BASEX) {
                val = readl(port->base + MVPP22_GMAC_INT_MASK);
                val |= MVPP22_GMAC_INT_MASK_LINK_STAT;
                writel(val, port->base + MVPP22_GMAC_INT_MASK);
@@ -4942,6 +4973,16 @@ static void mvpp22_gop_setup_irq(struct mvpp2_port *port)
        mvpp22_gop_unmask_irq(port);
 }
 
+/* Sets the PHY mode of the COMPHY (which configures the serdes lanes).
+ *
+ * The PHY mode used by the PPv2 driver comes from the network subsystem, while
+ * the one given to the COMPHY comes from the generic PHY subsystem. Hence they
+ * differ.
+ *
+ * The COMPHY configures the serdes lanes regardless of the actual use of the
+ * lanes by the physical layer. This is why configurations like
+ * "PPv2 (2500BaseX) - COMPHY (2500SGMII)" are valid.
+ */
 static int mvpp22_comphy_init(struct mvpp2_port *port)
 {
        enum phy_mode mode;
@@ -4952,8 +4993,12 @@ static int mvpp22_comphy_init(struct mvpp2_port *port)
 
        switch (port->phy_interface) {
        case PHY_INTERFACE_MODE_SGMII:
+       case PHY_INTERFACE_MODE_1000BASEX:
                mode = PHY_MODE_SGMII;
                break;
+       case PHY_INTERFACE_MODE_2500BASEX:
+               mode = PHY_MODE_2500SGMII;
+               break;
        case PHY_INTERFACE_MODE_10GKR:
                mode = PHY_MODE_10GKR;
                break;
@@ -4968,133 +5013,6 @@ static int mvpp22_comphy_init(struct mvpp2_port *port)
        return phy_power_on(port->comphy);
 }
 
-static void mvpp2_port_mii_gmac_configure_mode(struct mvpp2_port *port)
-{
-       u32 val;
-
-       if (port->phy_interface == PHY_INTERFACE_MODE_SGMII) {
-               val = readl(port->base + MVPP22_GMAC_CTRL_4_REG);
-               val |= MVPP22_CTRL4_SYNC_BYPASS_DIS | MVPP22_CTRL4_DP_CLK_SEL |
-                      MVPP22_CTRL4_QSGMII_BYPASS_ACTIVE;
-               val &= ~MVPP22_CTRL4_EXT_PIN_GMII_SEL;
-               writel(val, port->base + MVPP22_GMAC_CTRL_4_REG);
-       } else if (phy_interface_mode_is_rgmii(port->phy_interface)) {
-               val = readl(port->base + MVPP22_GMAC_CTRL_4_REG);
-               val |= MVPP22_CTRL4_EXT_PIN_GMII_SEL |
-                      MVPP22_CTRL4_SYNC_BYPASS_DIS |
-                      MVPP22_CTRL4_QSGMII_BYPASS_ACTIVE;
-               val &= ~MVPP22_CTRL4_DP_CLK_SEL;
-               writel(val, port->base + MVPP22_GMAC_CTRL_4_REG);
-       }
-
-       /* The port is connected to a copper PHY */
-       val = readl(port->base + MVPP2_GMAC_CTRL_0_REG);
-       val &= ~MVPP2_GMAC_PORT_TYPE_MASK;
-       writel(val, port->base + MVPP2_GMAC_CTRL_0_REG);
-
-       val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG);
-       val |= MVPP2_GMAC_IN_BAND_AUTONEG_BYPASS |
-              MVPP2_GMAC_AN_SPEED_EN | MVPP2_GMAC_FLOW_CTRL_AUTONEG |
-              MVPP2_GMAC_AN_DUPLEX_EN;
-       if (port->phy_interface == PHY_INTERFACE_MODE_SGMII)
-               val |= MVPP2_GMAC_IN_BAND_AUTONEG;
-       writel(val, port->base + MVPP2_GMAC_AUTONEG_CONFIG);
-}
-
-static void mvpp2_port_mii_gmac_configure(struct mvpp2_port *port)
-{
-       u32 val;
-
-       /* Force link down */
-       val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG);
-       val &= ~MVPP2_GMAC_FORCE_LINK_PASS;
-       val |= MVPP2_GMAC_FORCE_LINK_DOWN;
-       writel(val, port->base + MVPP2_GMAC_AUTONEG_CONFIG);
-
-       /* Set the GMAC in a reset state */
-       val = readl(port->base + MVPP2_GMAC_CTRL_2_REG);
-       val |= MVPP2_GMAC_PORT_RESET_MASK;
-       writel(val, port->base + MVPP2_GMAC_CTRL_2_REG);
-
-       /* Configure the PCS and in-band AN */
-       val = readl(port->base + MVPP2_GMAC_CTRL_2_REG);
-       if (port->phy_interface == PHY_INTERFACE_MODE_SGMII) {
-               val |= MVPP2_GMAC_INBAND_AN_MASK | MVPP2_GMAC_PCS_ENABLE_MASK;
-       } else if (phy_interface_mode_is_rgmii(port->phy_interface)) {
-               val &= ~MVPP2_GMAC_PCS_ENABLE_MASK;
-       }
-       writel(val, port->base + MVPP2_GMAC_CTRL_2_REG);
-
-       mvpp2_port_mii_gmac_configure_mode(port);
-
-       /* Unset the GMAC reset state */
-       val = readl(port->base + MVPP2_GMAC_CTRL_2_REG);
-       val &= ~MVPP2_GMAC_PORT_RESET_MASK;
-       writel(val, port->base + MVPP2_GMAC_CTRL_2_REG);
-
-       /* Stop forcing link down */
-       val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG);
-       val &= ~MVPP2_GMAC_FORCE_LINK_DOWN;
-       writel(val, port->base + MVPP2_GMAC_AUTONEG_CONFIG);
-}
-
-static void mvpp2_port_mii_xlg_configure(struct mvpp2_port *port)
-{
-       u32 val;
-
-       if (port->gop_id != 0)
-               return;
-
-       val = readl(port->base + MVPP22_XLG_CTRL0_REG);
-       val |= MVPP22_XLG_CTRL0_RX_FLOW_CTRL_EN;
-       writel(val, port->base + MVPP22_XLG_CTRL0_REG);
-
-       val = readl(port->base + MVPP22_XLG_CTRL4_REG);
-       val &= ~MVPP22_XLG_CTRL4_MACMODSELECT_GMAC;
-       val |= MVPP22_XLG_CTRL4_FWD_FC | MVPP22_XLG_CTRL4_FWD_PFC;
-       writel(val, port->base + MVPP22_XLG_CTRL4_REG);
-}
-
-static void mvpp22_port_mii_set(struct mvpp2_port *port)
-{
-       u32 val;
-
-       /* Only GOP port 0 has an XLG MAC */
-       if (port->gop_id == 0) {
-               val = readl(port->base + MVPP22_XLG_CTRL3_REG);
-               val &= ~MVPP22_XLG_CTRL3_MACMODESELECT_MASK;
-
-               if (port->phy_interface == PHY_INTERFACE_MODE_XAUI ||
-                   port->phy_interface == PHY_INTERFACE_MODE_10GKR)
-                       val |= MVPP22_XLG_CTRL3_MACMODESELECT_10G;
-               else
-                       val |= MVPP22_XLG_CTRL3_MACMODESELECT_GMAC;
-
-               writel(val, port->base + MVPP22_XLG_CTRL3_REG);
-       }
-}
-
-static void mvpp2_port_mii_set(struct mvpp2_port *port)
-{
-       if (port->priv->hw_version == MVPP22)
-               mvpp22_port_mii_set(port);
-
-       if (phy_interface_mode_is_rgmii(port->phy_interface) ||
-           port->phy_interface == PHY_INTERFACE_MODE_SGMII)
-               mvpp2_port_mii_gmac_configure(port);
-       else if (port->phy_interface == PHY_INTERFACE_MODE_10GKR)
-               mvpp2_port_mii_xlg_configure(port);
-}
-
-static void mvpp2_port_fc_adv_enable(struct mvpp2_port *port)
-{
-       u32 val;
-
-       val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG);
-       val |= MVPP2_GMAC_FC_ADV_EN;
-       writel(val, port->base + MVPP2_GMAC_AUTONEG_CONFIG);
-}
-
 static void mvpp2_port_enable(struct mvpp2_port *port)
 {
        u32 val;
@@ -5125,8 +5043,11 @@ static void mvpp2_port_disable(struct mvpp2_port *port)
            (port->phy_interface == PHY_INTERFACE_MODE_XAUI ||
             port->phy_interface == PHY_INTERFACE_MODE_10GKR)) {
                val = readl(port->base + MVPP22_XLG_CTRL0_REG);
-               val &= ~(MVPP22_XLG_CTRL0_PORT_EN |
-                        MVPP22_XLG_CTRL0_MAC_RESET_DIS);
+               val &= ~MVPP22_XLG_CTRL0_PORT_EN;
+               writel(val, port->base + MVPP22_XLG_CTRL0_REG);
+
+               /* Disable & reset should be done separately */
+               val &= ~MVPP22_XLG_CTRL0_MAC_RESET_DIS;
                writel(val, port->base + MVPP22_XLG_CTRL0_REG);
        } else {
                val = readl(port->base + MVPP2_GMAC_CTRL_0_REG);
@@ -5146,18 +5067,21 @@ static void mvpp2_port_periodic_xon_disable(struct mvpp2_port *port)
 }
 
 /* Configure loopback port */
-static void mvpp2_port_loopback_set(struct mvpp2_port *port)
+static void mvpp2_port_loopback_set(struct mvpp2_port *port,
+                                   const struct phylink_link_state *state)
 {
        u32 val;
 
        val = readl(port->base + MVPP2_GMAC_CTRL_1_REG);
 
-       if (port->speed == 1000)
+       if (state->speed == 1000)
                val |= MVPP2_GMAC_GMII_LB_EN_MASK;
        else
                val &= ~MVPP2_GMAC_GMII_LB_EN_MASK;
 
-       if (port->phy_interface == PHY_INTERFACE_MODE_SGMII)
+       if (port->phy_interface == PHY_INTERFACE_MODE_SGMII ||
+           port->phy_interface == PHY_INTERFACE_MODE_1000BASEX ||
+           port->phy_interface == PHY_INTERFACE_MODE_2500BASEX)
                val |= MVPP2_GMAC_PCS_LB_EN_MASK;
        else
                val &= ~MVPP2_GMAC_PCS_LB_EN_MASK;
@@ -5330,10 +5254,6 @@ static void mvpp2_defaults_set(struct mvpp2_port *port)
        int tx_port_num, val, queue, ptxq, lrxq;
 
        if (port->priv->hw_version == MVPP21) {
-               /* Configure port to loopback if needed */
-               if (port->flags & MVPP2_F_LOOPBACK)
-                       mvpp2_port_loopback_set(port);
-
                /* Update TX FIFO MIN Threshold */
                val = readl(port->base + MVPP2_GMAC_PORT_FIFO_CFG_1_REG);
                val &= ~MVPP2_GMAC_TX_FIFO_MIN_TH_ALL_MASK;
@@ -5551,7 +5471,6 @@ static void mvpp2_aggr_txq_pend_desc_add(struct mvpp2_port *port, int pending)
                           MVPP2_AGGR_TXQ_UPDATE_REG, pending);
 }
 
-
 /* Check if there are enough free descriptors in aggregated txq.
  * If not, update the number of occupied descriptors and repeat the check.
  *
@@ -5568,11 +5487,10 @@ static int mvpp2_aggr_desc_num_check(struct mvpp2 *priv,
                                             MVPP2_AGGR_TXQ_STATUS_REG(cpu));
 
                aggr_txq->count = val & MVPP2_AGGR_TXQ_PENDING_MASK;
-       }
-
-       if ((aggr_txq->count + num) > MVPP2_AGGR_TXQ_SIZE)
-               return -ENOMEM;
 
+               if ((aggr_txq->count + num) > MVPP2_AGGR_TXQ_SIZE)
+                       return -ENOMEM;
+       }
        return 0;
 }
 
@@ -5632,7 +5550,7 @@ static int mvpp2_txq_reserved_desc_num_proc(struct mvpp2 *priv,
 
        txq_pcpu->reserved_num += mvpp2_txq_alloc_reserved_desc(priv, txq, req);
 
-       /* OK, the descriptor cound has been updated: check again. */
+       /* OK, the descriptor could have been updated: check again. */
        if (txq_pcpu->reserved_num < num)
                return -ENOMEM;
        return 0;
@@ -6114,7 +6032,7 @@ static int mvpp2_txq_init(struct mvpp2_port *port,
        /* Calculate base address in prefetch buffer. We reserve 16 descriptors
         * for each existing TXQ.
         * TCONTS for PON port must be continuous from 0 to MVPP2_MAX_TCONT
-        * GBE ports assumed to be continious from 0 to MVPP2_MAX_PORTS
+        * GBE ports assumed to be continuous from 0 to MVPP2_MAX_PORTS
         */
        desc_per_txq = 16;
        desc = (port->id * MVPP2_MAX_TXQ * desc_per_txq) +
@@ -6371,7 +6289,9 @@ static irqreturn_t mvpp2_link_status_isr(int irq, void *dev_id)
                                link = true;
                }
        } else if (phy_interface_mode_is_rgmii(port->phy_interface) ||
-                  port->phy_interface == PHY_INTERFACE_MODE_SGMII) {
+                  port->phy_interface == PHY_INTERFACE_MODE_SGMII ||
+                  port->phy_interface == PHY_INTERFACE_MODE_1000BASEX ||
+                  port->phy_interface == PHY_INTERFACE_MODE_2500BASEX) {
                val = readl(port->base + MVPP22_GMAC_INT_STAT);
                if (val & MVPP22_GMAC_INT_STAT_LINK) {
                        event = true;
@@ -6381,6 +6301,11 @@ static irqreturn_t mvpp2_link_status_isr(int irq, void *dev_id)
                }
        }
 
+       if (port->phylink) {
+               phylink_mac_change(port->phylink, link);
+               goto handled;
+       }
+
        if (!netif_running(dev) || !event)
                goto handled;
 
@@ -6405,111 +6330,6 @@ static irqreturn_t mvpp2_link_status_isr(int irq, void *dev_id)
        return IRQ_HANDLED;
 }
 
-static void mvpp2_gmac_set_autoneg(struct mvpp2_port *port,
-                                  struct phy_device *phydev)
-{
-       u32 val;
-
-       if (port->phy_interface != PHY_INTERFACE_MODE_RGMII &&
-           port->phy_interface != PHY_INTERFACE_MODE_RGMII_ID &&
-           port->phy_interface != PHY_INTERFACE_MODE_RGMII_RXID &&
-           port->phy_interface != PHY_INTERFACE_MODE_RGMII_TXID &&
-           port->phy_interface != PHY_INTERFACE_MODE_SGMII)
-               return;
-
-       val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG);
-       val &= ~(MVPP2_GMAC_CONFIG_MII_SPEED |
-                MVPP2_GMAC_CONFIG_GMII_SPEED |
-                MVPP2_GMAC_CONFIG_FULL_DUPLEX |
-                MVPP2_GMAC_AN_SPEED_EN |
-                MVPP2_GMAC_AN_DUPLEX_EN);
-
-       if (phydev->duplex)
-               val |= MVPP2_GMAC_CONFIG_FULL_DUPLEX;
-
-       if (phydev->speed == SPEED_1000)
-               val |= MVPP2_GMAC_CONFIG_GMII_SPEED;
-       else if (phydev->speed == SPEED_100)
-               val |= MVPP2_GMAC_CONFIG_MII_SPEED;
-
-       writel(val, port->base + MVPP2_GMAC_AUTONEG_CONFIG);
-}
-
-/* Adjust link */
-static void mvpp2_link_event(struct net_device *dev)
-{
-       struct mvpp2_port *port = netdev_priv(dev);
-       struct phy_device *phydev = dev->phydev;
-       bool link_reconfigured = false;
-       u32 val;
-
-       if (phydev->link) {
-               if (port->phy_interface != phydev->interface && port->comphy) {
-                       /* disable current port for reconfiguration */
-                       mvpp2_interrupts_disable(port);
-                       netif_carrier_off(port->dev);
-                       mvpp2_port_disable(port);
-                       phy_power_off(port->comphy);
-
-                       /* comphy reconfiguration */
-                       port->phy_interface = phydev->interface;
-                       mvpp22_comphy_init(port);
-
-                       /* gop/mac reconfiguration */
-                       mvpp22_gop_init(port);
-                       mvpp2_port_mii_set(port);
-
-                       link_reconfigured = true;
-               }
-
-               if ((port->speed != phydev->speed) ||
-                   (port->duplex != phydev->duplex)) {
-                       mvpp2_gmac_set_autoneg(port, phydev);
-
-                       port->duplex = phydev->duplex;
-                       port->speed  = phydev->speed;
-               }
-       }
-
-       if (phydev->link != port->link || link_reconfigured) {
-               port->link = phydev->link;
-
-               if (phydev->link) {
-                       if (port->phy_interface == PHY_INTERFACE_MODE_RGMII ||
-                           port->phy_interface == PHY_INTERFACE_MODE_RGMII_ID ||
-                           port->phy_interface == PHY_INTERFACE_MODE_RGMII_RXID ||
-                           port->phy_interface == PHY_INTERFACE_MODE_RGMII_TXID ||
-                           port->phy_interface == PHY_INTERFACE_MODE_SGMII) {
-                               val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG);
-                               val |= (MVPP2_GMAC_FORCE_LINK_PASS |
-                                       MVPP2_GMAC_FORCE_LINK_DOWN);
-                               writel(val, port->base + MVPP2_GMAC_AUTONEG_CONFIG);
-                       }
-
-                       mvpp2_interrupts_enable(port);
-                       mvpp2_port_enable(port);
-
-                       mvpp2_egress_enable(port);
-                       mvpp2_ingress_enable(port);
-                       netif_carrier_on(dev);
-                       netif_tx_wake_all_queues(dev);
-               } else {
-                       port->duplex = -1;
-                       port->speed = 0;
-
-                       netif_tx_stop_all_queues(dev);
-                       netif_carrier_off(dev);
-                       mvpp2_ingress_disable(port);
-                       mvpp2_egress_disable(port);
-
-                       mvpp2_port_disable(port);
-                       mvpp2_interrupts_disable(port);
-               }
-
-               phy_print_status(phydev);
-       }
-}
-
 static void mvpp2_timer_set(struct mvpp2_port_pcpu *port_pcpu)
 {
        ktime_t interval;
@@ -6561,21 +6381,23 @@ static void mvpp2_rx_error(struct mvpp2_port *port,
 {
        u32 status = mvpp2_rxdesc_status_get(port, rx_desc);
        size_t sz = mvpp2_rxdesc_size_get(port, rx_desc);
+       char *err_str = NULL;
 
        switch (status & MVPP2_RXD_ERR_CODE_MASK) {
        case MVPP2_RXD_ERR_CRC:
-               netdev_err(port->dev, "bad rx status %08x (crc error), size=%zu\n",
-                          status, sz);
+               err_str = "crc";
                break;
        case MVPP2_RXD_ERR_OVERRUN:
-               netdev_err(port->dev, "bad rx status %08x (overrun error), size=%zu\n",
-                          status, sz);
+               err_str = "overrun";
                break;
        case MVPP2_RXD_ERR_RESOURCE:
-               netdev_err(port->dev, "bad rx status %08x (resource error), size=%zu\n",
-                          status, sz);
+               err_str = "resource";
                break;
        }
+       if (err_str && net_ratelimit())
+               netdev_err(port->dev,
+                          "bad rx status %08x (%s error), size=%zu\n",
+                          status, err_str, sz);
 }
 
 /* Handle RX checksum offload */
@@ -6780,8 +6602,7 @@ static int mvpp2_tx_frag_process(struct mvpp2_port *port, struct sk_buff *skb,
                mvpp2_txdesc_size_set(port, tx_desc, frag->size);
 
                buf_dma_addr = dma_map_single(port->dev->dev.parent, addr,
-                                              frag->size,
-                                              DMA_TO_DEVICE);
+                                             frag->size, DMA_TO_DEVICE);
                if (dma_mapping_error(port->dev->dev.parent, buf_dma_addr)) {
                        mvpp2_txq_desc_put(txq);
                        goto cleanup;
@@ -7117,11 +6938,29 @@ static int mvpp2_poll(struct napi_struct *napi, int budget)
        return rx_done;
 }
 
-/* Set hw internals when starting port */
-static void mvpp2_start_dev(struct mvpp2_port *port)
+static void mvpp22_mode_reconfigure(struct mvpp2_port *port)
 {
-       struct net_device *ndev = port->dev;
-       int i;
+       u32 ctrl3;
+
+       /* comphy reconfiguration */
+       mvpp22_comphy_init(port);
+
+       /* gop reconfiguration */
+       mvpp22_gop_init(port);
+
+       /* Only GOP port 0 has an XLG MAC */
+       if (port->gop_id == 0) {
+               ctrl3 = readl(port->base + MVPP22_XLG_CTRL3_REG);
+               ctrl3 &= ~MVPP22_XLG_CTRL3_MACMODESELECT_MASK;
+
+               if (port->phy_interface == PHY_INTERFACE_MODE_XAUI ||
+                   port->phy_interface == PHY_INTERFACE_MODE_10GKR)
+                       ctrl3 |= MVPP22_XLG_CTRL3_MACMODESELECT_10G;
+               else
+                       ctrl3 |= MVPP22_XLG_CTRL3_MACMODESELECT_GMAC;
+
+               writel(ctrl3, port->base + MVPP22_XLG_CTRL3_REG);
+       }
 
        if (port->gop_id == 0 &&
            (port->phy_interface == PHY_INTERFACE_MODE_XAUI ||
@@ -7129,6 +6968,12 @@ static void mvpp2_start_dev(struct mvpp2_port *port)
                mvpp2_xlg_max_rx_size_set(port);
        else
                mvpp2_gmac_max_rx_size_set(port);
+}
+
+/* Set hw internals when starting port */
+static void mvpp2_start_dev(struct mvpp2_port *port)
+{
+       int i;
 
        mvpp2_txp_max_tx_size_set(port);
 
@@ -7138,42 +6983,39 @@ static void mvpp2_start_dev(struct mvpp2_port *port)
        /* Enable interrupts on all CPUs */
        mvpp2_interrupts_enable(port);
 
-       if (port->priv->hw_version == MVPP22) {
-               mvpp22_comphy_init(port);
-               mvpp22_gop_init(port);
+       if (port->priv->hw_version == MVPP22)
+               mvpp22_mode_reconfigure(port);
+
+       if (port->phylink) {
+               phylink_start(port->phylink);
+       } else {
+               /* Phylink isn't used as of now for ACPI, so the MAC has to be
+                * configured manually when the interface is started. This will
+                * be removed as soon as the phylink ACPI support lands in.
+                */
+               struct phylink_link_state state = {
+                       .interface = port->phy_interface,
+                       .link = 1,
+               };
+               mvpp2_mac_config(port->dev, MLO_AN_INBAND, &state);
        }
 
-       mvpp2_port_mii_set(port);
-       mvpp2_port_enable(port);
-       if (ndev->phydev)
-               phy_start(ndev->phydev);
        netif_tx_start_all_queues(port->dev);
 }
 
 /* Set hw internals when stopping port */
 static void mvpp2_stop_dev(struct mvpp2_port *port)
 {
-       struct net_device *ndev = port->dev;
        int i;
 
-       /* Stop new packets from arriving to RXQs */
-       mvpp2_ingress_disable(port);
-
-       mdelay(10);
-
        /* Disable interrupts on all CPUs */
        mvpp2_interrupts_disable(port);
 
        for (i = 0; i < port->nqvecs; i++)
                napi_disable(&port->qvecs[i].napi);
 
-       netif_carrier_off(port->dev);
-       netif_tx_stop_all_queues(port->dev);
-
-       mvpp2_egress_disable(port);
-       mvpp2_port_disable(port);
-       if (ndev->phydev)
-               phy_stop(ndev->phydev);
+       if (port->phylink)
+               phylink_stop(port->phylink);
        phy_power_off(port->comphy);
 }
 
@@ -7232,40 +7074,6 @@ static void mvpp21_get_mac_address(struct mvpp2_port *port, unsigned char *addr)
        addr[5] = (mac_addr_l >> MVPP2_GMAC_SA_LOW_OFFS) & 0xFF;
 }
 
-static int mvpp2_phy_connect(struct mvpp2_port *port)
-{
-       struct phy_device *phy_dev;
-
-       /* No PHY is attached */
-       if (!port->phy_node)
-               return 0;
-
-       phy_dev = of_phy_connect(port->dev, port->phy_node, mvpp2_link_event, 0,
-                                port->phy_interface);
-       if (!phy_dev) {
-               netdev_err(port->dev, "cannot connect to phy\n");
-               return -ENODEV;
-       }
-       phy_dev->supported &= PHY_GBIT_FEATURES;
-       phy_dev->advertising = phy_dev->supported;
-
-       port->link    = 0;
-       port->duplex  = 0;
-       port->speed   = 0;
-
-       return 0;
-}
-
-static void mvpp2_phy_disconnect(struct mvpp2_port *port)
-{
-       struct net_device *ndev = port->dev;
-
-       if (!ndev->phydev)
-               return;
-
-       phy_disconnect(ndev->phydev);
-}
-
 static int mvpp2_irqs_init(struct mvpp2_port *port)
 {
        int err, i;
@@ -7349,6 +7157,7 @@ static int mvpp2_open(struct net_device *dev)
        struct mvpp2 *priv = port->priv;
        unsigned char mac_bcast[ETH_ALEN] = {
                        0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
+       bool valid = false;
        int err;
 
        err = mvpp2_prs_mac_da_accept(port, mac_bcast, true);
@@ -7391,7 +7200,19 @@ static int mvpp2_open(struct net_device *dev)
                goto err_cleanup_txqs;
        }
 
-       if (priv->hw_version == MVPP22 && !port->phy_node && port->link_irq) {
+       /* Phylink isn't supported yet in ACPI mode */
+       if (port->of_node) {
+               err = phylink_of_phy_connect(port->phylink, port->of_node, 0);
+               if (err) {
+                       netdev_err(port->dev, "could not attach PHY (%d)\n",
+                                  err);
+                       goto err_free_irq;
+               }
+
+               valid = true;
+       }
+
+       if (priv->hw_version == MVPP22 && port->link_irq && !port->phylink) {
                err = request_irq(port->link_irq, mvpp2_link_status_isr, 0,
                                  dev->name, port);
                if (err) {
@@ -7401,14 +7222,20 @@ static int mvpp2_open(struct net_device *dev)
                }
 
                mvpp22_gop_setup_irq(port);
-       }
 
-       /* In default link is down */
-       netif_carrier_off(port->dev);
+               /* In default link is down */
+               netif_carrier_off(port->dev);
 
-       err = mvpp2_phy_connect(port);
-       if (err < 0)
-               goto err_free_link_irq;
+               valid = true;
+       } else {
+               port->link_irq = 0;
+       }
+
+       if (!valid) {
+               netdev_err(port->dev,
+                          "invalid configuration: no dt or link IRQ");
+               goto err_free_irq;
+       }
 
        /* Unmask interrupts on all CPUs */
        on_each_cpu(mvpp2_interrupts_unmask, port, 1);
@@ -7425,9 +7252,6 @@ static int mvpp2_open(struct net_device *dev)
 
        return 0;
 
-err_free_link_irq:
-       if (priv->hw_version == MVPP22 && !port->phy_node && port->link_irq)
-               free_irq(port->link_irq, port);
 err_free_irq:
        mvpp2_irqs_deinit(port);
 err_cleanup_txqs:
@@ -7441,17 +7265,17 @@ static int mvpp2_stop(struct net_device *dev)
 {
        struct mvpp2_port *port = netdev_priv(dev);
        struct mvpp2_port_pcpu *port_pcpu;
-       struct mvpp2 *priv = port->priv;
        int cpu;
 
        mvpp2_stop_dev(port);
-       mvpp2_phy_disconnect(port);
 
        /* Mask interrupts on all CPUs */
        on_each_cpu(mvpp2_interrupts_mask, port, 1);
        mvpp2_shared_interrupt_mask_unmask(port, true);
 
-       if (priv->hw_version == MVPP22 && !port->phy_node && port->link_irq)
+       if (port->phylink)
+               phylink_disconnect_phy(port->phylink);
+       if (port->link_irq)
                free_irq(port->link_irq, port);
 
        mvpp2_irqs_deinit(port);
@@ -7534,42 +7358,18 @@ static void mvpp2_set_rx_mode(struct net_device *dev)
 
 static int mvpp2_set_mac_address(struct net_device *dev, void *p)
 {
-       struct mvpp2_port *port = netdev_priv(dev);
        const struct sockaddr *addr = p;
        int err;
 
-       if (!is_valid_ether_addr(addr->sa_data)) {
-               err = -EADDRNOTAVAIL;
-               goto log_error;
-       }
-
-       if (!netif_running(dev)) {
-               err = mvpp2_prs_update_mac_da(dev, addr->sa_data);
-               if (!err)
-                       return 0;
-               /* Reconfigure parser to accept the original MAC address */
-               err = mvpp2_prs_update_mac_da(dev, dev->dev_addr);
-               if (err)
-                       goto log_error;
-       }
-
-       mvpp2_stop_dev(port);
+       if (!is_valid_ether_addr(addr->sa_data))
+               return -EADDRNOTAVAIL;
 
        err = mvpp2_prs_update_mac_da(dev, addr->sa_data);
-       if (!err)
-               goto out_start;
-
-       /* Reconfigure parser accept the original MAC address */
-       err = mvpp2_prs_update_mac_da(dev, dev->dev_addr);
-       if (err)
-               goto log_error;
-out_start:
-       mvpp2_start_dev(port);
-       mvpp2_egress_enable(port);
-       mvpp2_ingress_enable(port);
-       return 0;
-log_error:
-       netdev_err(dev, "failed to change MAC address\n");
+       if (err) {
+               /* Reconfigure parser accept the original MAC address */
+               mvpp2_prs_update_mac_da(dev, dev->dev_addr);
+               netdev_err(dev, "failed to change MAC address\n");
+       }
        return err;
 }
 
@@ -7657,16 +7457,12 @@ mvpp2_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
 
 static int mvpp2_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
-       int ret;
+       struct mvpp2_port *port = netdev_priv(dev);
 
-       if (!dev->phydev)
+       if (!port->phylink)
                return -ENOTSUPP;
 
-       ret = phy_mii_ioctl(dev->phydev, ifr, cmd);
-       if (!ret)
-               mvpp2_link_event(dev);
-
-       return ret;
+       return phylink_mii_ioctl(port->phylink, ifr, cmd);
 }
 
 static int mvpp2_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid)
@@ -7713,6 +7509,16 @@ static int mvpp2_set_features(struct net_device *dev,
 
 /* Ethtool methods */
 
+static int mvpp2_ethtool_nway_reset(struct net_device *dev)
+{
+       struct mvpp2_port *port = netdev_priv(dev);
+
+       if (!port->phylink)
+               return -ENOTSUPP;
+
+       return phylink_ethtool_nway_reset(port->phylink);
+}
+
 /* Set interrupt coalescing for ethtools */
 static int mvpp2_ethtool_set_coalesce(struct net_device *dev,
                                      struct ethtool_coalesce *c)
@@ -7841,6 +7647,50 @@ static int mvpp2_ethtool_set_ringparam(struct net_device *dev,
        return err;
 }
 
+static void mvpp2_ethtool_get_pause_param(struct net_device *dev,
+                                         struct ethtool_pauseparam *pause)
+{
+       struct mvpp2_port *port = netdev_priv(dev);
+
+       if (!port->phylink)
+               return;
+
+       phylink_ethtool_get_pauseparam(port->phylink, pause);
+}
+
+static int mvpp2_ethtool_set_pause_param(struct net_device *dev,
+                                        struct ethtool_pauseparam *pause)
+{
+       struct mvpp2_port *port = netdev_priv(dev);
+
+       if (!port->phylink)
+               return -ENOTSUPP;
+
+       return phylink_ethtool_set_pauseparam(port->phylink, pause);
+}
+
+static int mvpp2_ethtool_get_link_ksettings(struct net_device *dev,
+                                           struct ethtool_link_ksettings *cmd)
+{
+       struct mvpp2_port *port = netdev_priv(dev);
+
+       if (!port->phylink)
+               return -ENOTSUPP;
+
+       return phylink_ethtool_ksettings_get(port->phylink, cmd);
+}
+
+static int mvpp2_ethtool_set_link_ksettings(struct net_device *dev,
+                                           const struct ethtool_link_ksettings *cmd)
+{
+       struct mvpp2_port *port = netdev_priv(dev);
+
+       if (!port->phylink)
+               return -ENOTSUPP;
+
+       return phylink_ethtool_ksettings_set(port->phylink, cmd);
+}
+
 /* Device ops */
 
 static const struct net_device_ops mvpp2_netdev_ops = {
@@ -7858,18 +7708,20 @@ static const struct net_device_ops mvpp2_netdev_ops = {
 };
 
 static const struct ethtool_ops mvpp2_eth_tool_ops = {
-       .nway_reset     = phy_ethtool_nway_reset,
-       .get_link       = ethtool_op_get_link,
-       .set_coalesce   = mvpp2_ethtool_set_coalesce,
-       .get_coalesce   = mvpp2_ethtool_get_coalesce,
-       .get_drvinfo    = mvpp2_ethtool_get_drvinfo,
-       .get_ringparam  = mvpp2_ethtool_get_ringparam,
-       .set_ringparam  = mvpp2_ethtool_set_ringparam,
-       .get_strings    = mvpp2_ethtool_get_strings,
-       .get_ethtool_stats = mvpp2_ethtool_get_stats,
-       .get_sset_count = mvpp2_ethtool_get_sset_count,
-       .get_link_ksettings = phy_ethtool_get_link_ksettings,
-       .set_link_ksettings = phy_ethtool_set_link_ksettings,
+       .nway_reset             = mvpp2_ethtool_nway_reset,
+       .get_link               = ethtool_op_get_link,
+       .set_coalesce           = mvpp2_ethtool_set_coalesce,
+       .get_coalesce           = mvpp2_ethtool_get_coalesce,
+       .get_drvinfo            = mvpp2_ethtool_get_drvinfo,
+       .get_ringparam          = mvpp2_ethtool_get_ringparam,
+       .set_ringparam          = mvpp2_ethtool_set_ringparam,
+       .get_strings            = mvpp2_ethtool_get_strings,
+       .get_ethtool_stats      = mvpp2_ethtool_get_stats,
+       .get_sset_count         = mvpp2_ethtool_get_sset_count,
+       .get_pauseparam         = mvpp2_ethtool_get_pause_param,
+       .set_pauseparam         = mvpp2_ethtool_set_pause_param,
+       .get_link_ksettings     = mvpp2_ethtool_get_link_ksettings,
+       .set_link_ksettings     = mvpp2_ethtool_set_link_ksettings,
 };
 
 /* Used for PPv2.1, or PPv2.2 with the old Device Tree binding that
@@ -8171,18 +8023,361 @@ static void mvpp2_port_copy_mac_addr(struct net_device *dev, struct mvpp2 *priv,
        eth_hw_addr_random(dev);
 }
 
+static void mvpp2_phylink_validate(struct net_device *dev,
+                                  unsigned long *supported,
+                                  struct phylink_link_state *state)
+{
+       __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, };
+
+       phylink_set(mask, Autoneg);
+       phylink_set_port_modes(mask);
+       phylink_set(mask, Pause);
+       phylink_set(mask, Asym_Pause);
+
+       switch (state->interface) {
+       case PHY_INTERFACE_MODE_10GKR:
+               phylink_set(mask, 10000baseCR_Full);
+               phylink_set(mask, 10000baseSR_Full);
+               phylink_set(mask, 10000baseLR_Full);
+               phylink_set(mask, 10000baseLRM_Full);
+               phylink_set(mask, 10000baseER_Full);
+               phylink_set(mask, 10000baseKR_Full);
+               /* Fall-through */
+       default:
+               phylink_set(mask, 10baseT_Half);
+               phylink_set(mask, 10baseT_Full);
+               phylink_set(mask, 100baseT_Half);
+               phylink_set(mask, 100baseT_Full);
+               phylink_set(mask, 10000baseT_Full);
+               /* Fall-through */
+       case PHY_INTERFACE_MODE_1000BASEX:
+       case PHY_INTERFACE_MODE_2500BASEX:
+               phylink_set(mask, 1000baseT_Full);
+               phylink_set(mask, 1000baseX_Full);
+               phylink_set(mask, 2500baseX_Full);
+       }
+
+       bitmap_and(supported, supported, mask, __ETHTOOL_LINK_MODE_MASK_NBITS);
+       bitmap_and(state->advertising, state->advertising, mask,
+                  __ETHTOOL_LINK_MODE_MASK_NBITS);
+}
+
+static void mvpp22_xlg_link_state(struct mvpp2_port *port,
+                                 struct phylink_link_state *state)
+{
+       u32 val;
+
+       state->speed = SPEED_10000;
+       state->duplex = 1;
+       state->an_complete = 1;
+
+       val = readl(port->base + MVPP22_XLG_STATUS);
+       state->link = !!(val & MVPP22_XLG_STATUS_LINK_UP);
+
+       state->pause = 0;
+       val = readl(port->base + MVPP22_XLG_CTRL0_REG);
+       if (val & MVPP22_XLG_CTRL0_TX_FLOW_CTRL_EN)
+               state->pause |= MLO_PAUSE_TX;
+       if (val & MVPP22_XLG_CTRL0_RX_FLOW_CTRL_EN)
+               state->pause |= MLO_PAUSE_RX;
+}
+
+static void mvpp2_gmac_link_state(struct mvpp2_port *port,
+                                 struct phylink_link_state *state)
+{
+       u32 val;
+
+       val = readl(port->base + MVPP2_GMAC_STATUS0);
+
+       state->an_complete = !!(val & MVPP2_GMAC_STATUS0_AN_COMPLETE);
+       state->link = !!(val & MVPP2_GMAC_STATUS0_LINK_UP);
+       state->duplex = !!(val & MVPP2_GMAC_STATUS0_FULL_DUPLEX);
+
+       switch (port->phy_interface) {
+       case PHY_INTERFACE_MODE_1000BASEX:
+               state->speed = SPEED_1000;
+               break;
+       case PHY_INTERFACE_MODE_2500BASEX:
+               state->speed = SPEED_2500;
+               break;
+       default:
+               if (val & MVPP2_GMAC_STATUS0_GMII_SPEED)
+                       state->speed = SPEED_1000;
+               else if (val & MVPP2_GMAC_STATUS0_MII_SPEED)
+                       state->speed = SPEED_100;
+               else
+                       state->speed = SPEED_10;
+       }
+
+       state->pause = 0;
+       if (val & MVPP2_GMAC_STATUS0_RX_PAUSE)
+               state->pause |= MLO_PAUSE_RX;
+       if (val & MVPP2_GMAC_STATUS0_TX_PAUSE)
+               state->pause |= MLO_PAUSE_TX;
+}
+
+static int mvpp2_phylink_mac_link_state(struct net_device *dev,
+                                       struct phylink_link_state *state)
+{
+       struct mvpp2_port *port = netdev_priv(dev);
+
+       if (port->priv->hw_version == MVPP22 && port->gop_id == 0) {
+               u32 mode = readl(port->base + MVPP22_XLG_CTRL3_REG);
+               mode &= MVPP22_XLG_CTRL3_MACMODESELECT_MASK;
+
+               if (mode == MVPP22_XLG_CTRL3_MACMODESELECT_10G) {
+                       mvpp22_xlg_link_state(port, state);
+                       return 1;
+               }
+       }
+
+       mvpp2_gmac_link_state(port, state);
+       return 1;
+}
+
+static void mvpp2_mac_an_restart(struct net_device *dev)
+{
+       struct mvpp2_port *port = netdev_priv(dev);
+       u32 val;
+
+       if (port->phy_interface != PHY_INTERFACE_MODE_SGMII)
+               return;
+
+       val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+       /* The RESTART_AN bit is cleared by the h/w after restarting the AN
+        * process.
+        */
+       val |= MVPP2_GMAC_IN_BAND_RESTART_AN | MVPP2_GMAC_IN_BAND_AUTONEG;
+       writel(val, port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+}
+
+static void mvpp2_xlg_config(struct mvpp2_port *port, unsigned int mode,
+                            const struct phylink_link_state *state)
+{
+       u32 ctrl0, ctrl4;
+
+       ctrl0 = readl(port->base + MVPP22_XLG_CTRL0_REG);
+       ctrl4 = readl(port->base + MVPP22_XLG_CTRL4_REG);
+
+       if (state->pause & MLO_PAUSE_TX)
+               ctrl0 |= MVPP22_XLG_CTRL0_TX_FLOW_CTRL_EN;
+       if (state->pause & MLO_PAUSE_RX)
+               ctrl0 |= MVPP22_XLG_CTRL0_RX_FLOW_CTRL_EN;
+
+       ctrl4 &= ~MVPP22_XLG_CTRL4_MACMODSELECT_GMAC;
+       ctrl4 |= MVPP22_XLG_CTRL4_FWD_FC | MVPP22_XLG_CTRL4_FWD_PFC |
+                MVPP22_XLG_CTRL4_EN_IDLE_CHECK;
+
+       writel(ctrl0, port->base + MVPP22_XLG_CTRL0_REG);
+       writel(ctrl4, port->base + MVPP22_XLG_CTRL4_REG);
+}
+
+static void mvpp2_gmac_config(struct mvpp2_port *port, unsigned int mode,
+                             const struct phylink_link_state *state)
+{
+       u32 an, ctrl0, ctrl2, ctrl4;
+
+       an = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+       ctrl0 = readl(port->base + MVPP2_GMAC_CTRL_0_REG);
+       ctrl2 = readl(port->base + MVPP2_GMAC_CTRL_2_REG);
+       ctrl4 = readl(port->base + MVPP22_GMAC_CTRL_4_REG);
+
+       /* Force link down */
+       an &= ~MVPP2_GMAC_FORCE_LINK_PASS;
+       an |= MVPP2_GMAC_FORCE_LINK_DOWN;
+       writel(an, port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+
+       /* Set the GMAC in a reset state */
+       ctrl2 |= MVPP2_GMAC_PORT_RESET_MASK;
+       writel(ctrl2, port->base + MVPP2_GMAC_CTRL_2_REG);
+
+       an &= ~(MVPP2_GMAC_CONFIG_MII_SPEED | MVPP2_GMAC_CONFIG_GMII_SPEED |
+               MVPP2_GMAC_AN_SPEED_EN | MVPP2_GMAC_FC_ADV_EN |
+               MVPP2_GMAC_FC_ADV_ASM_EN | MVPP2_GMAC_FLOW_CTRL_AUTONEG |
+               MVPP2_GMAC_CONFIG_FULL_DUPLEX | MVPP2_GMAC_AN_DUPLEX_EN |
+               MVPP2_GMAC_FORCE_LINK_DOWN);
+       ctrl0 &= ~MVPP2_GMAC_PORT_TYPE_MASK;
+       ctrl2 &= ~(MVPP2_GMAC_PORT_RESET_MASK | MVPP2_GMAC_PCS_ENABLE_MASK);
+
+       if (state->interface == PHY_INTERFACE_MODE_1000BASEX ||
+           state->interface == PHY_INTERFACE_MODE_2500BASEX) {
+               /* 1000BaseX and 2500BaseX ports cannot negotiate speed nor can
+                * they negotiate duplex: they are always operating with a fixed
+                * speed of 1000/2500Mbps in full duplex, so force 1000/2500
+                * speed and full duplex here.
+                */
+               ctrl0 |= MVPP2_GMAC_PORT_TYPE_MASK;
+               an |= MVPP2_GMAC_CONFIG_GMII_SPEED |
+                     MVPP2_GMAC_CONFIG_FULL_DUPLEX;
+       } else if (!phy_interface_mode_is_rgmii(state->interface)) {
+               an |= MVPP2_GMAC_AN_SPEED_EN | MVPP2_GMAC_FLOW_CTRL_AUTONEG;
+       }
+
+       if (state->duplex)
+               an |= MVPP2_GMAC_CONFIG_FULL_DUPLEX;
+       if (phylink_test(state->advertising, Pause))
+               an |= MVPP2_GMAC_FC_ADV_EN;
+       if (phylink_test(state->advertising, Asym_Pause))
+               an |= MVPP2_GMAC_FC_ADV_ASM_EN;
+
+       if (state->interface == PHY_INTERFACE_MODE_SGMII ||
+           state->interface == PHY_INTERFACE_MODE_1000BASEX ||
+           state->interface == PHY_INTERFACE_MODE_2500BASEX) {
+               an |= MVPP2_GMAC_IN_BAND_AUTONEG;
+               ctrl2 |= MVPP2_GMAC_INBAND_AN_MASK | MVPP2_GMAC_PCS_ENABLE_MASK;
+
+               ctrl4 &= ~(MVPP22_CTRL4_EXT_PIN_GMII_SEL |
+                          MVPP22_CTRL4_RX_FC_EN | MVPP22_CTRL4_TX_FC_EN);
+               ctrl4 |= MVPP22_CTRL4_SYNC_BYPASS_DIS |
+                        MVPP22_CTRL4_DP_CLK_SEL |
+                        MVPP22_CTRL4_QSGMII_BYPASS_ACTIVE;
+
+               if (state->pause & MLO_PAUSE_TX)
+                       ctrl4 |= MVPP22_CTRL4_TX_FC_EN;
+               if (state->pause & MLO_PAUSE_RX)
+                       ctrl4 |= MVPP22_CTRL4_RX_FC_EN;
+       } else if (phy_interface_mode_is_rgmii(state->interface)) {
+               an |= MVPP2_GMAC_IN_BAND_AUTONEG_BYPASS;
+
+               if (state->speed == SPEED_1000)
+                       an |= MVPP2_GMAC_CONFIG_GMII_SPEED;
+               else if (state->speed == SPEED_100)
+                       an |= MVPP2_GMAC_CONFIG_MII_SPEED;
+
+               ctrl4 &= ~MVPP22_CTRL4_DP_CLK_SEL;
+               ctrl4 |= MVPP22_CTRL4_EXT_PIN_GMII_SEL |
+                        MVPP22_CTRL4_SYNC_BYPASS_DIS |
+                        MVPP22_CTRL4_QSGMII_BYPASS_ACTIVE;
+       }
+
+       writel(ctrl0, port->base + MVPP2_GMAC_CTRL_0_REG);
+       writel(ctrl2, port->base + MVPP2_GMAC_CTRL_2_REG);
+       writel(ctrl4, port->base + MVPP22_GMAC_CTRL_4_REG);
+       writel(an, port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+}
+
+static void mvpp2_mac_config(struct net_device *dev, unsigned int mode,
+                            const struct phylink_link_state *state)
+{
+       struct mvpp2_port *port = netdev_priv(dev);
+
+       /* Check for invalid configuration */
+       if (state->interface == PHY_INTERFACE_MODE_10GKR && port->gop_id != 0) {
+               netdev_err(dev, "Invalid mode on %s\n", dev->name);
+               return;
+       }
+
+       netif_tx_stop_all_queues(port->dev);
+       if (!port->has_phy)
+               netif_carrier_off(port->dev);
+
+       /* Make sure the port is disabled when reconfiguring the mode */
+       mvpp2_port_disable(port);
+
+       if (port->priv->hw_version == MVPP22 &&
+           port->phy_interface != state->interface) {
+               port->phy_interface = state->interface;
+
+               /* Reconfigure the serdes lanes */
+               phy_power_off(port->comphy);
+               mvpp22_mode_reconfigure(port);
+       }
+
+       /* mac (re)configuration */
+       if (state->interface == PHY_INTERFACE_MODE_10GKR)
+               mvpp2_xlg_config(port, mode, state);
+       else if (phy_interface_mode_is_rgmii(state->interface) ||
+                state->interface == PHY_INTERFACE_MODE_SGMII ||
+                state->interface == PHY_INTERFACE_MODE_1000BASEX ||
+                state->interface == PHY_INTERFACE_MODE_2500BASEX)
+               mvpp2_gmac_config(port, mode, state);
+
+       if (port->priv->hw_version == MVPP21 && port->flags & MVPP2_F_LOOPBACK)
+               mvpp2_port_loopback_set(port, state);
+
+       /* If the port already was up, make sure it's still in the same state */
+       if (state->link || !port->has_phy) {
+               mvpp2_port_enable(port);
+
+               mvpp2_egress_enable(port);
+               mvpp2_ingress_enable(port);
+               if (!port->has_phy)
+                       netif_carrier_on(dev);
+               netif_tx_wake_all_queues(dev);
+       }
+}
+
+static void mvpp2_mac_link_up(struct net_device *dev, unsigned int mode,
+                             phy_interface_t interface, struct phy_device *phy)
+{
+       struct mvpp2_port *port = netdev_priv(dev);
+       u32 val;
+
+       if (!phylink_autoneg_inband(mode) &&
+           interface != PHY_INTERFACE_MODE_10GKR) {
+               val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+               val &= ~MVPP2_GMAC_FORCE_LINK_DOWN;
+               if (phy_interface_mode_is_rgmii(interface))
+                       val |= MVPP2_GMAC_FORCE_LINK_PASS;
+               writel(val, port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+       }
+
+       mvpp2_port_enable(port);
+
+       mvpp2_egress_enable(port);
+       mvpp2_ingress_enable(port);
+       netif_tx_wake_all_queues(dev);
+}
+
+static void mvpp2_mac_link_down(struct net_device *dev, unsigned int mode,
+                               phy_interface_t interface)
+{
+       struct mvpp2_port *port = netdev_priv(dev);
+       u32 val;
+
+       if (!phylink_autoneg_inband(mode) &&
+           interface != PHY_INTERFACE_MODE_10GKR) {
+               val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+               val &= ~MVPP2_GMAC_FORCE_LINK_PASS;
+               val |= MVPP2_GMAC_FORCE_LINK_DOWN;
+               writel(val, port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+       }
+
+       netif_tx_stop_all_queues(dev);
+       mvpp2_egress_disable(port);
+       mvpp2_ingress_disable(port);
+
+       /* When using link interrupts to notify phylink of a MAC state change,
+        * we do not want the port to be disabled (we want to receive further
+        * interrupts, to be notified when the port will have a link later).
+        */
+       if (!port->has_phy)
+               return;
+
+       mvpp2_port_disable(port);
+}
+
+static const struct phylink_mac_ops mvpp2_phylink_ops = {
+       .validate = mvpp2_phylink_validate,
+       .mac_link_state = mvpp2_phylink_mac_link_state,
+       .mac_an_restart = mvpp2_mac_an_restart,
+       .mac_config = mvpp2_mac_config,
+       .mac_link_up = mvpp2_mac_link_up,
+       .mac_link_down = mvpp2_mac_link_down,
+};
+
 /* Ports initialization */
 static int mvpp2_port_probe(struct platform_device *pdev,
                            struct fwnode_handle *port_fwnode,
                            struct mvpp2 *priv)
 {
-       struct device_node *phy_node;
        struct phy *comphy = NULL;
        struct mvpp2_port *port;
        struct mvpp2_port_pcpu *port_pcpu;
        struct device_node *port_node = to_of_node(port_fwnode);
        struct net_device *dev;
        struct resource *res;
+       struct phylink *phylink;
        char *mac_from = "";
        unsigned int ntxqs, nrxqs;
        bool has_tx_irqs;
@@ -8211,11 +8406,6 @@ static int mvpp2_port_probe(struct platform_device *pdev,
        if (!dev)
                return -ENOMEM;
 
-       if (port_node)
-               phy_node = of_parse_phandle(port_node, "phy", 0);
-       else
-               phy_node = NULL;
-
        phy_mode = fwnode_get_phy_mode(port_fwnode);
        if (phy_mode < 0) {
                dev_err(&pdev->dev, "incorrect phy mode\n");
@@ -8248,6 +8438,7 @@ static int mvpp2_port_probe(struct platform_device *pdev,
        port = netdev_priv(dev);
        port->dev = dev;
        port->fwnode = port_fwnode;
+       port->has_phy = !!of_find_property(port_node, "phy", NULL);
        port->ntxqs = ntxqs;
        port->nrxqs = nrxqs;
        port->priv = priv;
@@ -8278,7 +8469,7 @@ static int mvpp2_port_probe(struct platform_device *pdev,
        else
                port->first_rxq = port->id * priv->max_port_rxqs;
 
-       port->phy_node = phy_node;
+       port->of_node = port_node;
        port->phy_interface = phy_mode;
        port->comphy = comphy;
 
@@ -8339,9 +8530,6 @@ static int mvpp2_port_probe(struct platform_device *pdev,
 
        mvpp2_port_periodic_xon_disable(port);
 
-       if (priv->hw_version == MVPP21)
-               mvpp2_port_fc_adv_enable(port);
-
        mvpp2_port_reset(port);
 
        port->pcpu = alloc_percpu(struct mvpp2_port_pcpu);
@@ -8385,10 +8573,23 @@ static int mvpp2_port_probe(struct platform_device *pdev,
        /* 9704 == 9728 - 20 and rounding to 8 */
        dev->max_mtu = MVPP2_BM_JUMBO_PKT_SIZE;
 
+       /* Phylink isn't used w/ ACPI as of now */
+       if (port_node) {
+               phylink = phylink_create(dev, port_fwnode, phy_mode,
+                                        &mvpp2_phylink_ops);
+               if (IS_ERR(phylink)) {
+                       err = PTR_ERR(phylink);
+                       goto err_free_port_pcpu;
+               }
+               port->phylink = phylink;
+       } else {
+               port->phylink = NULL;
+       }
+
        err = register_netdev(dev);
        if (err < 0) {
                dev_err(&pdev->dev, "failed to register netdev\n");
-               goto err_free_port_pcpu;
+               goto err_phylink;
        }
        netdev_info(dev, "Using %s mac address %pM\n", mac_from, dev->dev_addr);
 
@@ -8396,6 +8597,9 @@ static int mvpp2_port_probe(struct platform_device *pdev,
 
        return 0;
 
+err_phylink:
+       if (port->phylink)
+               phylink_destroy(port->phylink);
 err_free_port_pcpu:
        free_percpu(port->pcpu);
 err_free_txq_pcpu:
@@ -8409,7 +8613,6 @@ static int mvpp2_port_probe(struct platform_device *pdev,
 err_deinit_qvecs:
        mvpp2_queue_vectors_deinit(port);
 err_free_netdev:
-       of_node_put(phy_node);
        free_netdev(dev);
        return err;
 }
@@ -8420,7 +8623,8 @@ static void mvpp2_port_remove(struct mvpp2_port *port)
        int i;
 
        unregister_netdev(port->dev);
-       of_node_put(port->phy_node);
+       if (port->phylink)
+               phylink_destroy(port->phylink);
        free_percpu(port->pcpu);
        free_percpu(port->stats);
        for (i = 0; i < port->ntxqs; i++)
@@ -8768,18 +8972,27 @@ static int mvpp2_probe(struct platform_device *pdev)
                        err = clk_prepare_enable(priv->mg_clk);
                        if (err < 0)
                                goto err_gop_clk;
+
+                       priv->mg_core_clk = devm_clk_get(&pdev->dev, "mg_core_clk");
+                       if (IS_ERR(priv->mg_core_clk)) {
+                               priv->mg_core_clk = NULL;
+                       } else {
+                               err = clk_prepare_enable(priv->mg_core_clk);
+                               if (err < 0)
+                                       goto err_mg_clk;
+                       }
                }
 
                priv->axi_clk = devm_clk_get(&pdev->dev, "axi_clk");
                if (IS_ERR(priv->axi_clk)) {
                        err = PTR_ERR(priv->axi_clk);
                        if (err == -EPROBE_DEFER)
-                               goto err_gop_clk;
+                               goto err_mg_core_clk;
                        priv->axi_clk = NULL;
                } else {
                        err = clk_prepare_enable(priv->axi_clk);
                        if (err < 0)
-                               goto err_gop_clk;
+                               goto err_mg_core_clk;
                }
 
                /* Get system's tclk rate */
@@ -8793,7 +9006,7 @@ static int mvpp2_probe(struct platform_device *pdev)
        if (priv->hw_version == MVPP22) {
                err = dma_set_mask(&pdev->dev, MVPP2_DESC_DMA_MASK);
                if (err)
-                       goto err_mg_clk;
+                       goto err_axi_clk;
                /* Sadly, the BM pools all share the same register to
                 * store the high 32 bits of their address. So they
                 * must all have the same high 32 bits, which forces
@@ -8801,14 +9014,14 @@ static int mvpp2_probe(struct platform_device *pdev)
                 */
                err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
                if (err)
-                       goto err_mg_clk;
+                       goto err_axi_clk;
        }
 
        /* Initialize network controller */
        err = mvpp2_init(pdev, priv);
        if (err < 0) {
                dev_err(&pdev->dev, "failed to initialize controller\n");
-               goto err_mg_clk;
+               goto err_axi_clk;
        }
 
        /* Initialize ports */
@@ -8821,7 +9034,7 @@ static int mvpp2_probe(struct platform_device *pdev)
        if (priv->port_count == 0) {
                dev_err(&pdev->dev, "no ports enabled\n");
                err = -ENODEV;
-               goto err_mg_clk;
+               goto err_axi_clk;
        }
 
        /* Statistics must be gathered regularly because some of them (like
@@ -8849,8 +9062,13 @@ static int mvpp2_probe(struct platform_device *pdev)
                        mvpp2_port_remove(priv->port_list[i]);
                i++;
        }
-err_mg_clk:
+err_axi_clk:
        clk_disable_unprepare(priv->axi_clk);
+
+err_mg_core_clk:
+       if (priv->hw_version == MVPP22)
+               clk_disable_unprepare(priv->mg_core_clk);
+err_mg_clk:
        if (priv->hw_version == MVPP22)
                clk_disable_unprepare(priv->mg_clk);
 err_gop_clk:
@@ -8897,6 +9115,7 @@ static int mvpp2_remove(struct platform_device *pdev)
                return 0;
 
        clk_disable_unprepare(priv->axi_clk);
+       clk_disable_unprepare(priv->mg_core_clk);
        clk_disable_unprepare(priv->mg_clk);
        clk_disable_unprepare(priv->pp_clk);
        clk_disable_unprepare(priv->gop_clk);
index a30a2e95d13f56ed58055ac119d8359dd3052fd2..f11b45001cad8c5635684e820a03f183e12d6ef5 100644 (file)
@@ -1027,6 +1027,22 @@ static int mlx4_en_set_coalesce(struct net_device *dev,
        if (!coal->tx_max_coalesced_frames_irq)
                return -EINVAL;
 
+       if (coal->tx_coalesce_usecs > MLX4_EN_MAX_COAL_TIME ||
+           coal->rx_coalesce_usecs > MLX4_EN_MAX_COAL_TIME ||
+           coal->rx_coalesce_usecs_low > MLX4_EN_MAX_COAL_TIME ||
+           coal->rx_coalesce_usecs_high > MLX4_EN_MAX_COAL_TIME) {
+               netdev_info(dev, "%s: maximum coalesce time supported is %d usecs\n",
+                           __func__, MLX4_EN_MAX_COAL_TIME);
+               return -ERANGE;
+       }
+
+       if (coal->tx_max_coalesced_frames > MLX4_EN_MAX_COAL_PKTS ||
+           coal->rx_max_coalesced_frames > MLX4_EN_MAX_COAL_PKTS) {
+               netdev_info(dev, "%s: maximum coalesced frames supported is %d\n",
+                           __func__, MLX4_EN_MAX_COAL_PKTS);
+               return -ERANGE;
+       }
+
        priv->rx_frames = (coal->rx_max_coalesced_frames ==
                           MLX4_EN_AUTO_CONF) ?
                                MLX4_EN_RX_COAL_TARGET :
index e0adac4a9a191f923e68d896293c04dce5b67fc7..9670b33fc9b1ffd64a160bb1186af2f47419ab38 100644 (file)
@@ -3324,12 +3324,11 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
                                           MAX_TX_RINGS, GFP_KERNEL);
                if (!priv->tx_ring[t]) {
                        err = -ENOMEM;
-                       goto err_free_tx;
+                       goto out;
                }
                priv->tx_cq[t] = kzalloc(sizeof(struct mlx4_en_cq *) *
                                         MAX_TX_RINGS, GFP_KERNEL);
                if (!priv->tx_cq[t]) {
-                       kfree(priv->tx_ring[t]);
                        err = -ENOMEM;
                        goto out;
                }
@@ -3582,11 +3581,6 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
 
        return 0;
 
-err_free_tx:
-       while (t--) {
-               kfree(priv->tx_ring[t]);
-               kfree(priv->tx_cq[t]);
-       }
 out:
        mlx4_en_destroy_netdev(dev);
        return err;
index efc55feddc5c36086c670fd873401dd0c5832c63..9f54ccbddea74b57973ee724acf360fa23434a3e 100644 (file)
@@ -593,30 +593,25 @@ static int get_fixed_ipv4_csum(__wsum hw_checksum, struct sk_buff *skb,
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
-/* In IPv6 packets, besides subtracting the pseudo header checksum,
- * we also compute/add the IP header checksum which
- * is not added by the HW.
+/* In IPv6 packets, hw_checksum lacks 6 bytes from IPv6 header:
+ * 4 first bytes : priority, version, flow_lbl
+ * and 2 additional bytes : nexthdr, hop_limit.
  */
 static int get_fixed_ipv6_csum(__wsum hw_checksum, struct sk_buff *skb,
                               struct ipv6hdr *ipv6h)
 {
        __u8 nexthdr = ipv6h->nexthdr;
-       __wsum csum_pseudo_hdr = 0;
+       __wsum temp;
 
        if (unlikely(nexthdr == IPPROTO_FRAGMENT ||
                     nexthdr == IPPROTO_HOPOPTS ||
                     nexthdr == IPPROTO_SCTP))
                return -1;
-       hw_checksum = csum_add(hw_checksum, (__force __wsum)htons(nexthdr));
 
-       csum_pseudo_hdr = csum_partial(&ipv6h->saddr,
-                                      sizeof(ipv6h->saddr) + sizeof(ipv6h->daddr), 0);
-       csum_pseudo_hdr = csum_add(csum_pseudo_hdr, (__force __wsum)ipv6h->payload_len);
-       csum_pseudo_hdr = csum_add(csum_pseudo_hdr,
-                                  (__force __wsum)htons(nexthdr));
-
-       skb->csum = csum_sub(hw_checksum, csum_pseudo_hdr);
-       skb->csum = csum_add(skb->csum, csum_partial(ipv6h, sizeof(struct ipv6hdr), 0));
+       /* priority, version, flow_lbl */
+       temp = csum_add(hw_checksum, *(__wsum *)ipv6h);
+       /* nexthdr and hop_limit */
+       skb->csum = csum_add(temp, (__force __wsum)*(__be16 *)&ipv6h->nexthdr);
        return 0;
 }
 #endif
index 6b68537738480eb649b962647fe8b52c65f4cb32..0227786308af5d70bdfbb19da3fb8d5760d0651f 100644 (file)
@@ -694,7 +694,7 @@ u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb,
        u16 rings_p_up = priv->num_tx_rings_p_up;
 
        if (netdev_get_num_tc(dev))
-               return skb_tx_hash(dev, skb);
+               return fallback(dev, skb);
 
        return fallback(dev, skb) % rings_p_up;
 }
index de6b3d41614887264ef5a8c45c84fd8f04056aa0..46dcbfbe4c5eb0133ce49c2d222ebd7a2c8e7d44 100644 (file)
@@ -165,6 +165,7 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags)
                [36] = "QinQ VST mode support",
                [37] = "sl to vl mapping table change event support",
                [38] = "user MAC support",
+               [39] = "Report driver version to FW support",
        };
        int i;
 
@@ -1038,6 +1039,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
                dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_ETH_BACKPL_AN_REP;
        if (field32 & (1 << 7))
                dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_RECOVERABLE_ERROR_EVENT;
+       if (field32 & (1 << 8))
+               dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_DRIVER_VERSION_TO_FW;
        MLX4_GET(field32, outbox, QUERY_DEV_CAP_DIAG_RPRT_PER_PORT);
        if (field32 & (1 << 17))
                dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT;
@@ -1860,6 +1863,8 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param)
 #define  INIT_HCA_UC_STEERING_OFFSET    (INIT_HCA_MCAST_OFFSET + 0x18)
 #define         INIT_HCA_LOG_MC_TABLE_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x1b)
 #define  INIT_HCA_DEVICE_MANAGED_FLOW_STEERING_EN      0x6
+#define  INIT_HCA_DRIVER_VERSION_OFFSET   0x140
+#define  INIT_HCA_DRIVER_VERSION_SZ       0x40
 #define  INIT_HCA_FS_PARAM_OFFSET         0x1d0
 #define  INIT_HCA_FS_BASE_OFFSET          (INIT_HCA_FS_PARAM_OFFSET + 0x00)
 #define  INIT_HCA_FS_LOG_ENTRY_SZ_OFFSET  (INIT_HCA_FS_PARAM_OFFSET + 0x12)
@@ -1950,6 +1955,13 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param)
        if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RECOVERABLE_ERROR_EVENT)
                *(inbox + INIT_HCA_RECOVERABLE_ERROR_EVENT_OFFSET / 4) |= cpu_to_be32(1 << 31);
 
+       if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DRIVER_VERSION_TO_FW) {
+               u8 *dst = (u8 *)(inbox + INIT_HCA_DRIVER_VERSION_OFFSET / 4);
+
+               strncpy(dst, DRV_NAME_FOR_FW, INIT_HCA_DRIVER_VERSION_SZ - 1);
+               mlx4_dbg(dev, "Reporting Driver Version to FW: %s\n", dst);
+       }
+
        /* QPC/EEC/CQC/EQC/RDMARC attributes */
 
        MLX4_PUT(inbox, param->qpc_base,      INIT_HCA_QPC_BASE_OFFSET);
index bfef69235d716bcf9d929a291957fc0f3ad3dc3e..0a30d81aab3ba3c94754a54e69e5051e2d562233 100644 (file)
@@ -73,7 +73,7 @@ MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0");
 
 static int msi_x = 1;
 module_param(msi_x, int, 0444);
-MODULE_PARM_DESC(msi_x, "attempt to use MSI-X if nonzero");
+MODULE_PARM_DESC(msi_x, "0 - don't use MSI-X, 1 - use MSI-X, >1 - limit number of MSI-X irqs to msi_x");
 
 #else /* CONFIG_PCI_MSI */
 
@@ -1317,7 +1317,7 @@ static int mlx4_mf_unbond(struct mlx4_dev *dev)
 
        ret = mlx4_unbond_fs_rules(dev);
        if (ret)
-               mlx4_warn(dev, "multifunction unbond for flow rules failedi (%d)\n", ret);
+               mlx4_warn(dev, "multifunction unbond for flow rules failed (%d)\n", ret);
        ret1 = mlx4_unbond_mac_table(dev);
        if (ret1) {
                mlx4_warn(dev, "multifunction unbond for MAC table failed (%d)\n", ret1);
@@ -2815,6 +2815,9 @@ static void mlx4_enable_msi_x(struct mlx4_dev *dev)
                                dev->caps.num_eqs - dev->caps.reserved_eqs,
                                MAX_MSIX);
 
+               if (msi_x > 1)
+                       nreq = min_t(int, nreq, msi_x);
+
                entries = kcalloc(nreq, sizeof(*entries), GFP_KERNEL);
                if (!entries)
                        goto no_msi;
@@ -2929,6 +2932,7 @@ static int mlx4_init_port_info(struct mlx4_dev *dev, int port)
                mlx4_err(dev, "Failed to create file for port %d\n", port);
                devlink_port_unregister(&info->devlink_port);
                info->port = -1;
+               return err;
        }
 
        sprintf(info->dev_mtu_name, "mlx4_port%d_mtu", port);
@@ -2950,9 +2954,10 @@ static int mlx4_init_port_info(struct mlx4_dev *dev, int port)
                                   &info->port_attr);
                devlink_port_unregister(&info->devlink_port);
                info->port = -1;
+               return err;
        }
 
-       return err;
+       return 0;
 }
 
 static void mlx4_cleanup_port_info(struct mlx4_port_info *info)
@@ -4125,17 +4130,68 @@ static const struct pci_error_handlers mlx4_err_handler = {
        .resume         = mlx4_pci_resume,
 };
 
+static int mlx4_suspend(struct pci_dev *pdev, pm_message_t state)
+{
+       struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
+       struct mlx4_dev *dev = persist->dev;
+
+       mlx4_err(dev, "suspend was called\n");
+       mutex_lock(&persist->interface_state_mutex);
+       if (persist->interface_state & MLX4_INTERFACE_STATE_UP)
+               mlx4_unload_one(pdev);
+       mutex_unlock(&persist->interface_state_mutex);
+
+       return 0;
+}
+
+static int mlx4_resume(struct pci_dev *pdev)
+{
+       struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
+       struct mlx4_dev *dev = persist->dev;
+       struct mlx4_priv *priv = mlx4_priv(dev);
+       int nvfs[MLX4_MAX_PORTS + 1] = {0, 0, 0};
+       int total_vfs;
+       int ret = 0;
+
+       mlx4_err(dev, "resume was called\n");
+       total_vfs = dev->persist->num_vfs;
+       memcpy(nvfs, dev->persist->nvfs, sizeof(dev->persist->nvfs));
+
+       mutex_lock(&persist->interface_state_mutex);
+       if (!(persist->interface_state & MLX4_INTERFACE_STATE_UP)) {
+               ret = mlx4_load_one(pdev, priv->pci_dev_data, total_vfs,
+                                   nvfs, priv, 1);
+               if (!ret) {
+                       ret = restore_current_port_types(dev,
+                                       dev->persist->curr_port_type,
+                                       dev->persist->curr_port_poss_type);
+                       if (ret)
+                               mlx4_err(dev, "resume: could not restore original port types (%d)\n", ret);
+               }
+       }
+       mutex_unlock(&persist->interface_state_mutex);
+
+       return ret;
+}
+
 static struct pci_driver mlx4_driver = {
        .name           = DRV_NAME,
        .id_table       = mlx4_pci_table,
        .probe          = mlx4_init_one,
        .shutdown       = mlx4_shutdown,
        .remove         = mlx4_remove_one,
+       .suspend        = mlx4_suspend,
+       .resume         = mlx4_resume,
        .err_handler    = &mlx4_err_handler,
 };
 
 static int __init mlx4_verify_params(void)
 {
+       if (msi_x < 0) {
+               pr_warn("mlx4_core: bad msi_x: %d\n", msi_x);
+               return -1;
+       }
+
        if ((log_num_mac < 0) || (log_num_mac > 7)) {
                pr_warn("mlx4_core: bad num_mac: %d\n", log_num_mac);
                return -1;
index c68da1986e51d46fbb03878f176e7afc8ba3f676..cb9e923e83996499c2d98c8023b5ebc069bb0731 100644 (file)
@@ -55,8 +55,8 @@
 #include "fw_qos.h"
 
 #define DRV_NAME       "mlx4_core"
-#define PFX            DRV_NAME ": "
 #define DRV_VERSION    "4.0-0"
+#define DRV_NAME_FOR_FW                "Linux," DRV_NAME "," DRV_VERSION
 
 #define MLX4_FS_UDP_UC_EN              (1 << 1)
 #define MLX4_FS_TCP_UC_EN              (1 << 2)
index f7c81133594f341cfb0a6e2b1dbb1cc54c3d72ac..ace6545f82e6b343d26acd6d0bb4c55cd6ae4809 100644 (file)
 #define MLX4_EN_TX_COAL_PKTS   16
 #define MLX4_EN_TX_COAL_TIME   0x10
 
+#define MLX4_EN_MAX_COAL_PKTS  U16_MAX
+#define MLX4_EN_MAX_COAL_TIME  U16_MAX
+
 #define MLX4_EN_RX_RATE_LOW            400000
 #define MLX4_EN_RX_COAL_TIME_LOW       0
 #define MLX4_EN_RX_RATE_HIGH           450000
@@ -552,8 +555,8 @@ struct mlx4_en_priv {
        u16 rx_usecs_low;
        u32 pkt_rate_high;
        u16 rx_usecs_high;
-       u16 sample_interval;
-       u16 adaptive_rx_coal;
+       u32 sample_interval;
+       u32 adaptive_rx_coal;
        u32 msg_enable;
        u32 loopback_ok;
        u32 validate_loopback;
index 12257034131eca50701733fc4e33e3673a788392..ee6684779d112478ee4061960f1cf24fd918560f 100644 (file)
@@ -86,3 +86,14 @@ config MLX5_EN_IPSEC
          Build support for IPsec cryptography-offload accelaration in the NIC.
          Note: Support for hardware with this capability needs to be selected
          for this option to become available.
+
+config MLX5_EN_TLS
+       bool "TLS cryptography-offload accelaration"
+       depends on MLX5_CORE_EN
+       depends on TLS_DEVICE
+       depends on MLX5_ACCEL
+       default n
+       ---help---
+         Build support for TLS cryptography-offload accelaration in the NIC.
+         Note: Support for hardware with this capability needs to be selected
+         for this option to become available.
index c805769d92a9e8f26663b0c0c4f2623ebcc1208e..a7135f5d5cf6976cfa034d1eb8ce9ab5b06db7b4 100644 (file)
@@ -8,10 +8,10 @@ mlx5_core-y :=        main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
                fs_counters.o rl.o lag.o dev.o wq.o lib/gid.o lib/clock.o \
                diag/fs_tracepoint.o
 
-mlx5_core-$(CONFIG_MLX5_ACCEL) += accel/ipsec.o
+mlx5_core-$(CONFIG_MLX5_ACCEL) += accel/ipsec.o accel/tls.o
 
 mlx5_core-$(CONFIG_MLX5_FPGA) += fpga/cmd.o fpga/core.o fpga/conn.o fpga/sdk.o \
-               fpga/ipsec.o
+               fpga/ipsec.o fpga/tls.o
 
 mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \
                en_tx.o en_rx.o en_dim.o en_txrx.o en_stats.o vxlan.o \
@@ -28,4 +28,6 @@ mlx5_core-$(CONFIG_MLX5_CORE_IPOIB) += ipoib/ipoib.o ipoib/ethtool.o ipoib/ipoib
 mlx5_core-$(CONFIG_MLX5_EN_IPSEC) += en_accel/ipsec.o en_accel/ipsec_rxtx.o \
                en_accel/ipsec_stats.o
 
+mlx5_core-$(CONFIG_MLX5_EN_TLS) +=  en_accel/tls.o en_accel/tls_rxtx.o en_accel/tls_stats.o
+
 CFLAGS_tracepoint.o := -I$(src)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c
new file mode 100644 (file)
index 0000000..77ac19f
--- /dev/null
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/mlx5/device.h>
+
+#include "accel/tls.h"
+#include "mlx5_core.h"
+#include "fpga/tls.h"
+
+int mlx5_accel_tls_add_tx_flow(struct mlx5_core_dev *mdev, void *flow,
+                              struct tls_crypto_info *crypto_info,
+                              u32 start_offload_tcp_sn, u32 *p_swid)
+{
+       return mlx5_fpga_tls_add_tx_flow(mdev, flow, crypto_info,
+                                        start_offload_tcp_sn, p_swid);
+}
+
+void mlx5_accel_tls_del_tx_flow(struct mlx5_core_dev *mdev, u32 swid)
+{
+       mlx5_fpga_tls_del_tx_flow(mdev, swid, GFP_KERNEL);
+}
+
+bool mlx5_accel_is_tls_device(struct mlx5_core_dev *mdev)
+{
+       return mlx5_fpga_is_tls_device(mdev);
+}
+
+u32 mlx5_accel_tls_device_caps(struct mlx5_core_dev *mdev)
+{
+       return mlx5_fpga_tls_device_caps(mdev);
+}
+
+int mlx5_accel_tls_init(struct mlx5_core_dev *mdev)
+{
+       return mlx5_fpga_tls_init(mdev);
+}
+
+void mlx5_accel_tls_cleanup(struct mlx5_core_dev *mdev)
+{
+       mlx5_fpga_tls_cleanup(mdev);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h
new file mode 100644 (file)
index 0000000..6f9c9f4
--- /dev/null
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __MLX5_ACCEL_TLS_H__
+#define __MLX5_ACCEL_TLS_H__
+
+#include <linux/mlx5/driver.h>
+#include <linux/tls.h>
+
+#ifdef CONFIG_MLX5_ACCEL
+
+enum {
+       MLX5_ACCEL_TLS_TX = BIT(0),
+       MLX5_ACCEL_TLS_RX = BIT(1),
+       MLX5_ACCEL_TLS_V12 = BIT(2),
+       MLX5_ACCEL_TLS_V13 = BIT(3),
+       MLX5_ACCEL_TLS_LRO = BIT(4),
+       MLX5_ACCEL_TLS_IPV6 = BIT(5),
+       MLX5_ACCEL_TLS_AES_GCM128 = BIT(30),
+       MLX5_ACCEL_TLS_AES_GCM256 = BIT(31),
+};
+
+struct mlx5_ifc_tls_flow_bits {
+       u8         src_port[0x10];
+       u8         dst_port[0x10];
+       union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits src_ipv4_src_ipv6;
+       union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits dst_ipv4_dst_ipv6;
+       u8         ipv6[0x1];
+       u8         direction_sx[0x1];
+       u8         reserved_at_2[0x1e];
+};
+
+int mlx5_accel_tls_add_tx_flow(struct mlx5_core_dev *mdev, void *flow,
+                              struct tls_crypto_info *crypto_info,
+                              u32 start_offload_tcp_sn, u32 *p_swid);
+void mlx5_accel_tls_del_tx_flow(struct mlx5_core_dev *mdev, u32 swid);
+bool mlx5_accel_is_tls_device(struct mlx5_core_dev *mdev);
+u32 mlx5_accel_tls_device_caps(struct mlx5_core_dev *mdev);
+int mlx5_accel_tls_init(struct mlx5_core_dev *mdev);
+void mlx5_accel_tls_cleanup(struct mlx5_core_dev *mdev);
+
+#else
+
+static inline int
+mlx5_accel_tls_add_tx_flow(struct mlx5_core_dev *mdev, void *flow,
+                          struct tls_crypto_info *crypto_info,
+                          u32 start_offload_tcp_sn, u32 *p_swid) { return 0; }
+static inline void mlx5_accel_tls_del_tx_flow(struct mlx5_core_dev *mdev, u32 swid) { }
+static inline bool mlx5_accel_is_tls_device(struct mlx5_core_dev *mdev) { return false; }
+static inline u32 mlx5_accel_tls_device_caps(struct mlx5_core_dev *mdev) { return 0; }
+static inline int mlx5_accel_tls_init(struct mlx5_core_dev *mdev) { return 0; }
+static inline void mlx5_accel_tls_cleanup(struct mlx5_core_dev *mdev) { }
+
+#endif
+
+#endif /* __MLX5_ACCEL_TLS_H__ */
index 21cd1703a86207787fc3b39ecb6653bc6c14fae5..487388aed98f22cc9ae814fd60d27b48d5105458 100644 (file)
@@ -135,6 +135,14 @@ static struct mlx5_cmd_layout *get_inst(struct mlx5_cmd *cmd, int idx)
        return cmd->cmd_buf + (idx << cmd->log_stride);
 }
 
+static int mlx5_calc_cmd_blocks(struct mlx5_cmd_msg *msg)
+{
+       int size = msg->len;
+       int blen = size - min_t(int, sizeof(msg->first.data), size);
+
+       return DIV_ROUND_UP(blen, MLX5_CMD_DATA_BLOCK_SIZE);
+}
+
 static u8 xor8_buf(void *buf, size_t offset, int len)
 {
        u8 *ptr = buf;
@@ -174,10 +182,7 @@ static void calc_block_sig(struct mlx5_cmd_prot_block *block)
 static void calc_chain_sig(struct mlx5_cmd_msg *msg)
 {
        struct mlx5_cmd_mailbox *next = msg->next;
-       int size = msg->len;
-       int blen = size - min_t(int, sizeof(msg->first.data), size);
-       int n = (blen + MLX5_CMD_DATA_BLOCK_SIZE - 1)
-               / MLX5_CMD_DATA_BLOCK_SIZE;
+       int n = mlx5_calc_cmd_blocks(msg);
        int i = 0;
 
        for (i = 0; i < n && next; i++)  {
@@ -220,12 +225,9 @@ static void free_cmd(struct mlx5_cmd_work_ent *ent)
 static int verify_signature(struct mlx5_cmd_work_ent *ent)
 {
        struct mlx5_cmd_mailbox *next = ent->out->next;
+       int n = mlx5_calc_cmd_blocks(ent->out);
        int err;
        u8 sig;
-       int size = ent->out->len;
-       int blen = size - min_t(int, sizeof(ent->out->first.data), size);
-       int n = (blen + MLX5_CMD_DATA_BLOCK_SIZE - 1)
-               / MLX5_CMD_DATA_BLOCK_SIZE;
        int i = 0;
 
        sig = xor8_buf(ent->lay, 0, sizeof(*ent->lay));
@@ -720,9 +722,11 @@ static void dump_command(struct mlx5_core_dev *dev,
        struct mlx5_cmd_msg *msg = input ? ent->in : ent->out;
        u16 op = MLX5_GET(mbox_in, ent->lay->in, opcode);
        struct mlx5_cmd_mailbox *next = msg->next;
+       int n = mlx5_calc_cmd_blocks(msg);
        int data_only;
        u32 offset = 0;
        int dump_len;
+       int i;
 
        data_only = !!(mlx5_core_debug_mask & (1 << MLX5_CMD_DATA));
 
@@ -749,7 +753,7 @@ static void dump_command(struct mlx5_core_dev *dev,
                offset += sizeof(*ent->lay);
        }
 
-       while (next && offset < msg->len) {
+       for (i = 0; i < n && next; i++)  {
                if (data_only) {
                        dump_len = min_t(int, MLX5_CMD_DATA_BLOCK_SIZE, msg->len - offset);
                        dump_buf(next->buf, dump_len, 1, offset);
@@ -1137,7 +1141,6 @@ static struct mlx5_cmd_msg *mlx5_alloc_cmd_msg(struct mlx5_core_dev *dev,
        struct mlx5_cmd_mailbox *tmp, *head = NULL;
        struct mlx5_cmd_prot_block *block;
        struct mlx5_cmd_msg *msg;
-       int blen;
        int err;
        int n;
        int i;
@@ -1146,8 +1149,8 @@ static struct mlx5_cmd_msg *mlx5_alloc_cmd_msg(struct mlx5_core_dev *dev,
        if (!msg)
                return ERR_PTR(-ENOMEM);
 
-       blen = size - min_t(int, sizeof(msg->first.data), size);
-       n = (blen + MLX5_CMD_DATA_BLOCK_SIZE - 1) / MLX5_CMD_DATA_BLOCK_SIZE;
+       msg->len = size;
+       n = mlx5_calc_cmd_blocks(msg);
 
        for (i = 0; i < n; i++) {
                tmp = alloc_cmd_box(dev, flags);
@@ -1165,7 +1168,6 @@ static struct mlx5_cmd_msg *mlx5_alloc_cmd_msg(struct mlx5_core_dev *dev,
                head = tmp;
        }
        msg->next = head;
-       msg->len = size;
        return msg;
 
 err_alloc:
index d93ff567b40d95ef5faebafae64730ce690fde8e..b3820a34e773a37370c322c530667c71c9992b25 100644 (file)
@@ -235,7 +235,7 @@ const char *parse_fs_dst(struct trace_seq *p,
 
        switch (dst->type) {
        case MLX5_FLOW_DESTINATION_TYPE_VPORT:
-               trace_seq_printf(p, "vport=%u\n", dst->vport_num);
+               trace_seq_printf(p, "vport=%u\n", dst->vport.num);
                break;
        case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE:
                trace_seq_printf(p, "ft=%p\n", dst->ft);
index 3317a4da87cbab3a82fab214e808c9f2c6ad563a..bc91a7335c93dab73100f385aa64b881ba093cc5 100644 (file)
@@ -55,6 +55,9 @@
 
 struct page_pool;
 
+#define MLX5E_METADATA_ETHER_TYPE (0x8CE4)
+#define MLX5E_METADATA_ETHER_LEN 8
+
 #define MLX5_SET_CFG(p, f, v) MLX5_SET(create_flow_group_in, p, f, v)
 
 #define MLX5E_ETH_HARD_MTU (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN)
@@ -241,6 +244,7 @@ struct mlx5e_params {
        bool vlan_strip_disable;
        bool scatter_fcs_en;
        bool rx_dim_enabled;
+       bool tx_dim_enabled;
        u32 lro_timeout;
        u32 pflags;
        struct bpf_prog *xdp_prog;
@@ -284,8 +288,6 @@ enum {
        MLX5E_RQ_STATE_AM,
 };
 
-#define MLX5E_TEST_BIT(state, nr) (state & BIT(nr))
-
 struct mlx5e_cq {
        /* data path - accessed per cqe */
        struct mlx5_cqwq           wq;
@@ -330,6 +332,8 @@ enum {
        MLX5E_SQ_STATE_ENABLED,
        MLX5E_SQ_STATE_RECOVERING,
        MLX5E_SQ_STATE_IPSEC,
+       MLX5E_SQ_STATE_AM,
+       MLX5E_SQ_STATE_TLS,
 };
 
 struct mlx5e_sq_wqe_info {
@@ -342,6 +346,7 @@ struct mlx5e_txqsq {
        /* dirtied @completion */
        u16                        cc;
        u32                        dma_fifo_cc;
+       struct net_dim             dim; /* Adaptive Moderation */
 
        /* dirtied @xmit */
        u16                        pc ____cacheline_aligned_in_smp;
@@ -629,7 +634,6 @@ struct mlx5e_flow_table {
 struct mlx5e_tc_table {
        struct mlx5_flow_table          *t;
 
-       struct rhashtable_params        ht_params;
        struct rhashtable               ht;
 
        DECLARE_HASHTABLE(mod_hdr_tbl, 8);
@@ -794,6 +798,9 @@ struct mlx5e_priv {
 #ifdef CONFIG_MLX5_EN_IPSEC
        struct mlx5e_ipsec        *ipsec;
 #endif
+#ifdef CONFIG_MLX5_EN_TLS
+       struct mlx5e_tls          *tls;
+#endif
 };
 
 struct mlx5e_profile {
@@ -824,6 +831,8 @@ void mlx5e_build_ptys2ethtool_map(void);
 u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
                       void *accel_priv, select_queue_fallback_t fallback);
 netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev);
+netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+                         struct mlx5e_tx_wqe *wqe, u16 pi);
 
 void mlx5e_completion_event(struct mlx5_core_cq *mcq);
 void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event);
@@ -939,6 +948,18 @@ static inline bool mlx5e_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev)
                MLX5_CAP_FLOWTABLE_NIC_RX(mdev, ft_field_support.inner_ip_version));
 }
 
+static inline void mlx5e_sq_fetch_wqe(struct mlx5e_txqsq *sq,
+                                     struct mlx5e_tx_wqe **wqe,
+                                     u16 *pi)
+{
+       struct mlx5_wq_cyc *wq;
+
+       wq = &sq->wq;
+       *pi = sq->pc & wq->sz_m1;
+       *wqe = mlx5_wq_cyc_get_wqe(wq, *pi);
+       memset(*wqe, 0, sizeof(**wqe));
+}
+
 static inline
 struct mlx5e_tx_wqe *mlx5e_post_nop(struct mlx5_wq_cyc *wq, u32 sqn, u16 *pc)
 {
@@ -1096,9 +1117,6 @@ int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv,
 int mlx5e_ethtool_flash_device(struct mlx5e_priv *priv,
                               struct ethtool_flash *flash);
 
-int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
-                           void *cb_priv);
-
 /* mlx5e generic netdev management API */
 struct net_device*
 mlx5e_create_netdev(struct mlx5_core_dev *mdev, const struct mlx5e_profile *profile,
@@ -1111,4 +1129,5 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev,
                            u16 max_channels, u16 mtu);
 u8 mlx5e_params_calculate_tx_min_inline(struct mlx5_core_dev *mdev);
 void mlx5e_rx_dim_work(struct work_struct *work);
+void mlx5e_tx_dim_work(struct work_struct *work);
 #endif /* __MLX5_EN_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
new file mode 100644 (file)
index 0000000..f20074d
--- /dev/null
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __MLX5E_EN_ACCEL_H__
+#define __MLX5E_EN_ACCEL_H__
+
+#ifdef CONFIG_MLX5_ACCEL
+
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include "en_accel/ipsec_rxtx.h"
+#include "en_accel/tls_rxtx.h"
+#include "en.h"
+
+static inline struct sk_buff *mlx5e_accel_handle_tx(struct sk_buff *skb,
+                                                   struct mlx5e_txqsq *sq,
+                                                   struct net_device *dev,
+                                                   struct mlx5e_tx_wqe **wqe,
+                                                   u16 *pi)
+{
+#ifdef CONFIG_MLX5_EN_TLS
+       if (test_bit(MLX5E_SQ_STATE_TLS, &sq->state)) {
+               skb = mlx5e_tls_handle_tx_skb(dev, sq, skb, wqe, pi);
+               if (unlikely(!skb))
+                       return NULL;
+       }
+#endif
+
+#ifdef CONFIG_MLX5_EN_IPSEC
+       if (test_bit(MLX5E_SQ_STATE_IPSEC, &sq->state)) {
+               skb = mlx5e_ipsec_handle_tx_skb(dev, *wqe, skb);
+               if (unlikely(!skb))
+                       return NULL;
+       }
+#endif
+
+       return skb;
+}
+
+#endif /* CONFIG_MLX5_ACCEL */
+
+#endif /* __MLX5E_EN_ACCEL_H__ */
index 1198fc1eba4c8a869f2cd232aefe04974f212b0d..93bf10e6508c4956eabf66cda15fcc6568ba1698 100644 (file)
@@ -45,9 +45,6 @@
 #define MLX5E_IPSEC_SADB_RX_BITS 10
 #define MLX5E_IPSEC_ESN_SCOPE_MID 0x80000000L
 
-#define MLX5E_METADATA_ETHER_TYPE (0x8CE4)
-#define MLX5E_METADATA_ETHER_LEN 8
-
 struct mlx5e_priv;
 
 struct mlx5e_ipsec_sw_stats {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c
new file mode 100644 (file)
index 0000000..d167845
--- /dev/null
@@ -0,0 +1,197 @@
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/netdevice.h>
+#include <net/ipv6.h>
+#include "en_accel/tls.h"
+#include "accel/tls.h"
+
+static void mlx5e_tls_set_ipv4_flow(void *flow, struct sock *sk)
+{
+       struct inet_sock *inet = inet_sk(sk);
+
+       MLX5_SET(tls_flow, flow, ipv6, 0);
+       memcpy(MLX5_ADDR_OF(tls_flow, flow, dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
+              &inet->inet_daddr, MLX5_FLD_SZ_BYTES(ipv4_layout, ipv4));
+       memcpy(MLX5_ADDR_OF(tls_flow, flow, src_ipv4_src_ipv6.ipv4_layout.ipv4),
+              &inet->inet_rcv_saddr, MLX5_FLD_SZ_BYTES(ipv4_layout, ipv4));
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static void mlx5e_tls_set_ipv6_flow(void *flow, struct sock *sk)
+{
+       struct ipv6_pinfo *np = inet6_sk(sk);
+
+       MLX5_SET(tls_flow, flow, ipv6, 1);
+       memcpy(MLX5_ADDR_OF(tls_flow, flow, dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+              &sk->sk_v6_daddr, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
+       memcpy(MLX5_ADDR_OF(tls_flow, flow, src_ipv4_src_ipv6.ipv6_layout.ipv6),
+              &np->saddr, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
+}
+#endif
+
+static void mlx5e_tls_set_flow_tcp_ports(void *flow, struct sock *sk)
+{
+       struct inet_sock *inet = inet_sk(sk);
+
+       memcpy(MLX5_ADDR_OF(tls_flow, flow, src_port), &inet->inet_sport,
+              MLX5_FLD_SZ_BYTES(tls_flow, src_port));
+       memcpy(MLX5_ADDR_OF(tls_flow, flow, dst_port), &inet->inet_dport,
+              MLX5_FLD_SZ_BYTES(tls_flow, dst_port));
+}
+
+static int mlx5e_tls_set_flow(void *flow, struct sock *sk, u32 caps)
+{
+       switch (sk->sk_family) {
+       case AF_INET:
+               mlx5e_tls_set_ipv4_flow(flow, sk);
+               break;
+#if IS_ENABLED(CONFIG_IPV6)
+       case AF_INET6:
+               if (!sk->sk_ipv6only &&
+                   ipv6_addr_type(&sk->sk_v6_daddr) == IPV6_ADDR_MAPPED) {
+                       mlx5e_tls_set_ipv4_flow(flow, sk);
+                       break;
+               }
+               if (!(caps & MLX5_ACCEL_TLS_IPV6))
+                       goto error_out;
+
+               mlx5e_tls_set_ipv6_flow(flow, sk);
+               break;
+#endif
+       default:
+               goto error_out;
+       }
+
+       mlx5e_tls_set_flow_tcp_ports(flow, sk);
+       return 0;
+error_out:
+       return -EINVAL;
+}
+
+static int mlx5e_tls_add(struct net_device *netdev, struct sock *sk,
+                        enum tls_offload_ctx_dir direction,
+                        struct tls_crypto_info *crypto_info,
+                        u32 start_offload_tcp_sn)
+{
+       struct mlx5e_priv *priv = netdev_priv(netdev);
+       struct tls_context *tls_ctx = tls_get_ctx(sk);
+       struct mlx5_core_dev *mdev = priv->mdev;
+       u32 caps = mlx5_accel_tls_device_caps(mdev);
+       int ret = -ENOMEM;
+       void *flow;
+
+       if (direction != TLS_OFFLOAD_CTX_DIR_TX)
+               return -EINVAL;
+
+       flow = kzalloc(MLX5_ST_SZ_BYTES(tls_flow), GFP_KERNEL);
+       if (!flow)
+               return ret;
+
+       ret = mlx5e_tls_set_flow(flow, sk, caps);
+       if (ret)
+               goto free_flow;
+
+       if (direction == TLS_OFFLOAD_CTX_DIR_TX) {
+               struct mlx5e_tls_offload_context *tx_ctx =
+                   mlx5e_get_tls_tx_context(tls_ctx);
+               u32 swid;
+
+               ret = mlx5_accel_tls_add_tx_flow(mdev, flow, crypto_info,
+                                                start_offload_tcp_sn, &swid);
+               if (ret < 0)
+                       goto free_flow;
+
+               tx_ctx->swid = htonl(swid);
+               tx_ctx->expected_seq = start_offload_tcp_sn;
+       }
+
+       return 0;
+free_flow:
+       kfree(flow);
+       return ret;
+}
+
+static void mlx5e_tls_del(struct net_device *netdev,
+                         struct tls_context *tls_ctx,
+                         enum tls_offload_ctx_dir direction)
+{
+       struct mlx5e_priv *priv = netdev_priv(netdev);
+
+       if (direction == TLS_OFFLOAD_CTX_DIR_TX) {
+               u32 swid = ntohl(mlx5e_get_tls_tx_context(tls_ctx)->swid);
+
+               mlx5_accel_tls_del_tx_flow(priv->mdev, swid);
+       } else {
+               netdev_err(netdev, "unsupported direction %d\n", direction);
+       }
+}
+
+static const struct tlsdev_ops mlx5e_tls_ops = {
+       .tls_dev_add = mlx5e_tls_add,
+       .tls_dev_del = mlx5e_tls_del,
+};
+
+void mlx5e_tls_build_netdev(struct mlx5e_priv *priv)
+{
+       struct net_device *netdev = priv->netdev;
+
+       if (!mlx5_accel_is_tls_device(priv->mdev))
+               return;
+
+       netdev->features |= NETIF_F_HW_TLS_TX;
+       netdev->hw_features |= NETIF_F_HW_TLS_TX;
+       netdev->tlsdev_ops = &mlx5e_tls_ops;
+}
+
+int mlx5e_tls_init(struct mlx5e_priv *priv)
+{
+       struct mlx5e_tls *tls = kzalloc(sizeof(*tls), GFP_KERNEL);
+
+       if (!tls)
+               return -ENOMEM;
+
+       priv->tls = tls;
+       return 0;
+}
+
+void mlx5e_tls_cleanup(struct mlx5e_priv *priv)
+{
+       struct mlx5e_tls *tls = priv->tls;
+
+       if (!tls)
+               return;
+
+       kfree(tls);
+       priv->tls = NULL;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h
new file mode 100644 (file)
index 0000000..b616217
--- /dev/null
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#ifndef __MLX5E_TLS_H__
+#define __MLX5E_TLS_H__
+
+#ifdef CONFIG_MLX5_EN_TLS
+
+#include <net/tls.h>
+#include "en.h"
+
+struct mlx5e_tls_sw_stats {
+       atomic64_t tx_tls_drop_metadata;
+       atomic64_t tx_tls_drop_resync_alloc;
+       atomic64_t tx_tls_drop_no_sync_data;
+       atomic64_t tx_tls_drop_bypass_required;
+};
+
+struct mlx5e_tls {
+       struct mlx5e_tls_sw_stats sw_stats;
+};
+
+struct mlx5e_tls_offload_context {
+       struct tls_offload_context base;
+       u32 expected_seq;
+       __be32 swid;
+};
+
+static inline struct mlx5e_tls_offload_context *
+mlx5e_get_tls_tx_context(struct tls_context *tls_ctx)
+{
+       BUILD_BUG_ON(sizeof(struct mlx5e_tls_offload_context) >
+                    TLS_OFFLOAD_CONTEXT_SIZE);
+       return container_of(tls_offload_ctx(tls_ctx),
+                           struct mlx5e_tls_offload_context,
+                           base);
+}
+
+void mlx5e_tls_build_netdev(struct mlx5e_priv *priv);
+int mlx5e_tls_init(struct mlx5e_priv *priv);
+void mlx5e_tls_cleanup(struct mlx5e_priv *priv);
+
+int mlx5e_tls_get_count(struct mlx5e_priv *priv);
+int mlx5e_tls_get_strings(struct mlx5e_priv *priv, uint8_t *data);
+int mlx5e_tls_get_stats(struct mlx5e_priv *priv, u64 *data);
+
+#else
+
+static inline void mlx5e_tls_build_netdev(struct mlx5e_priv *priv) { }
+static inline int mlx5e_tls_init(struct mlx5e_priv *priv) { return 0; }
+static inline void mlx5e_tls_cleanup(struct mlx5e_priv *priv) { }
+static inline int mlx5e_tls_get_count(struct mlx5e_priv *priv) { return 0; }
+static inline int mlx5e_tls_get_strings(struct mlx5e_priv *priv, uint8_t *data) { return 0; }
+static inline int mlx5e_tls_get_stats(struct mlx5e_priv *priv, u64 *data) { return 0; }
+
+#endif
+
+#endif /* __MLX5E_TLS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c
new file mode 100644 (file)
index 0000000..ad2790f
--- /dev/null
@@ -0,0 +1,278 @@
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include "en_accel/tls.h"
+#include "en_accel/tls_rxtx.h"
+
+#define SYNDROME_OFFLOAD_REQUIRED 32
+#define SYNDROME_SYNC 33
+
+struct sync_info {
+       u64 rcd_sn;
+       s32 sync_len;
+       int nr_frags;
+       skb_frag_t frags[MAX_SKB_FRAGS];
+};
+
+struct mlx5e_tls_metadata {
+       /* One byte of syndrome followed by 3 bytes of swid */
+       __be32 syndrome_swid;
+       __be16 first_seq;
+       /* packet type ID field */
+       __be16 ethertype;
+} __packed;
+
+static int mlx5e_tls_add_metadata(struct sk_buff *skb, __be32 swid)
+{
+       struct mlx5e_tls_metadata *pet;
+       struct ethhdr *eth;
+
+       if (skb_cow_head(skb, sizeof(struct mlx5e_tls_metadata)))
+               return -ENOMEM;
+
+       eth = (struct ethhdr *)skb_push(skb, sizeof(struct mlx5e_tls_metadata));
+       skb->mac_header -= sizeof(struct mlx5e_tls_metadata);
+       pet = (struct mlx5e_tls_metadata *)(eth + 1);
+
+       memmove(skb->data, skb->data + sizeof(struct mlx5e_tls_metadata),
+               2 * ETH_ALEN);
+
+       eth->h_proto = cpu_to_be16(MLX5E_METADATA_ETHER_TYPE);
+       pet->syndrome_swid = htonl(SYNDROME_OFFLOAD_REQUIRED << 24) | swid;
+
+       return 0;
+}
+
+static int mlx5e_tls_get_sync_data(struct mlx5e_tls_offload_context *context,
+                                  u32 tcp_seq, struct sync_info *info)
+{
+       int remaining, i = 0, ret = -EINVAL;
+       struct tls_record_info *record;
+       unsigned long flags;
+       s32 sync_size;
+
+       spin_lock_irqsave(&context->base.lock, flags);
+       record = tls_get_record(&context->base, tcp_seq, &info->rcd_sn);
+
+       if (unlikely(!record))
+               goto out;
+
+       sync_size = tcp_seq - tls_record_start_seq(record);
+       info->sync_len = sync_size;
+       if (unlikely(sync_size < 0)) {
+               if (tls_record_is_start_marker(record))
+                       goto done;
+
+               goto out;
+       }
+
+       remaining = sync_size;
+       while (remaining > 0) {
+               info->frags[i] = record->frags[i];
+               __skb_frag_ref(&info->frags[i]);
+               remaining -= skb_frag_size(&info->frags[i]);
+
+               if (remaining < 0)
+                       skb_frag_size_add(&info->frags[i], remaining);
+
+               i++;
+       }
+       info->nr_frags = i;
+done:
+       ret = 0;
+out:
+       spin_unlock_irqrestore(&context->base.lock, flags);
+       return ret;
+}
+
+static void mlx5e_tls_complete_sync_skb(struct sk_buff *skb,
+                                       struct sk_buff *nskb, u32 tcp_seq,
+                                       int headln, __be64 rcd_sn)
+{
+       struct mlx5e_tls_metadata *pet;
+       u8 syndrome = SYNDROME_SYNC;
+       struct iphdr *iph;
+       struct tcphdr *th;
+       int data_len, mss;
+
+       nskb->dev = skb->dev;
+       skb_reset_mac_header(nskb);
+       skb_set_network_header(nskb, skb_network_offset(skb));
+       skb_set_transport_header(nskb, skb_transport_offset(skb));
+       memcpy(nskb->data, skb->data, headln);
+       memcpy(nskb->data + headln, &rcd_sn, sizeof(rcd_sn));
+
+       iph = ip_hdr(nskb);
+       iph->tot_len = htons(nskb->len - skb_network_offset(nskb));
+       th = tcp_hdr(nskb);
+       data_len = nskb->len - headln;
+       tcp_seq -= data_len;
+       th->seq = htonl(tcp_seq);
+
+       mss = nskb->dev->mtu - (headln - skb_network_offset(nskb));
+       skb_shinfo(nskb)->gso_size = 0;
+       if (data_len > mss) {
+               skb_shinfo(nskb)->gso_size = mss;
+               skb_shinfo(nskb)->gso_segs = DIV_ROUND_UP(data_len, mss);
+       }
+       skb_shinfo(nskb)->gso_type = skb_shinfo(skb)->gso_type;
+
+       pet = (struct mlx5e_tls_metadata *)(nskb->data + sizeof(struct ethhdr));
+       memcpy(pet, &syndrome, sizeof(syndrome));
+       pet->first_seq = htons(tcp_seq);
+
+       /* MLX5 devices don't care about the checksum partial start, offset
+        * and pseudo header
+        */
+       nskb->ip_summed = CHECKSUM_PARTIAL;
+
+       nskb->xmit_more = 1;
+       nskb->queue_mapping = skb->queue_mapping;
+}
+
+static struct sk_buff *
+mlx5e_tls_handle_ooo(struct mlx5e_tls_offload_context *context,
+                    struct mlx5e_txqsq *sq, struct sk_buff *skb,
+                    struct mlx5e_tx_wqe **wqe,
+                    u16 *pi,
+                    struct mlx5e_tls *tls)
+{
+       u32 tcp_seq = ntohl(tcp_hdr(skb)->seq);
+       struct sync_info info;
+       struct sk_buff *nskb;
+       int linear_len = 0;
+       int headln;
+       int i;
+
+       sq->stats.tls_ooo++;
+
+       if (mlx5e_tls_get_sync_data(context, tcp_seq, &info)) {
+               /* We might get here if a retransmission reaches the driver
+                * after the relevant record is acked.
+                * It should be safe to drop the packet in this case
+                */
+               atomic64_inc(&tls->sw_stats.tx_tls_drop_no_sync_data);
+               goto err_out;
+       }
+
+       if (unlikely(info.sync_len < 0)) {
+               u32 payload;
+
+               headln = skb_transport_offset(skb) + tcp_hdrlen(skb);
+               payload = skb->len - headln;
+               if (likely(payload <= -info.sync_len))
+                       /* SKB payload doesn't require offload
+                        */
+                       return skb;
+
+               atomic64_inc(&tls->sw_stats.tx_tls_drop_bypass_required);
+               goto err_out;
+       }
+
+       if (unlikely(mlx5e_tls_add_metadata(skb, context->swid))) {
+               atomic64_inc(&tls->sw_stats.tx_tls_drop_metadata);
+               goto err_out;
+       }
+
+       headln = skb_transport_offset(skb) + tcp_hdrlen(skb);
+       linear_len += headln + sizeof(info.rcd_sn);
+       nskb = alloc_skb(linear_len, GFP_ATOMIC);
+       if (unlikely(!nskb)) {
+               atomic64_inc(&tls->sw_stats.tx_tls_drop_resync_alloc);
+               goto err_out;
+       }
+
+       context->expected_seq = tcp_seq + skb->len - headln;
+       skb_put(nskb, linear_len);
+       for (i = 0; i < info.nr_frags; i++)
+               skb_shinfo(nskb)->frags[i] = info.frags[i];
+
+       skb_shinfo(nskb)->nr_frags = info.nr_frags;
+       nskb->data_len = info.sync_len;
+       nskb->len += info.sync_len;
+       sq->stats.tls_resync_bytes += nskb->len;
+       mlx5e_tls_complete_sync_skb(skb, nskb, tcp_seq, headln,
+                                   cpu_to_be64(info.rcd_sn));
+       mlx5e_sq_xmit(sq, nskb, *wqe, *pi);
+       mlx5e_sq_fetch_wqe(sq, wqe, pi);
+       return skb;
+
+err_out:
+       dev_kfree_skb_any(skb);
+       return NULL;
+}
+
+struct sk_buff *mlx5e_tls_handle_tx_skb(struct net_device *netdev,
+                                       struct mlx5e_txqsq *sq,
+                                       struct sk_buff *skb,
+                                       struct mlx5e_tx_wqe **wqe,
+                                       u16 *pi)
+{
+       struct mlx5e_priv *priv = netdev_priv(netdev);
+       struct mlx5e_tls_offload_context *context;
+       struct tls_context *tls_ctx;
+       u32 expected_seq;
+       int datalen;
+       u32 skb_seq;
+
+       if (!skb->sk || !tls_is_sk_tx_device_offloaded(skb->sk))
+               goto out;
+
+       datalen = skb->len - (skb_transport_offset(skb) + tcp_hdrlen(skb));
+       if (!datalen)
+               goto out;
+
+       tls_ctx = tls_get_ctx(skb->sk);
+       if (unlikely(tls_ctx->netdev != netdev))
+               goto out;
+
+       skb_seq = ntohl(tcp_hdr(skb)->seq);
+       context = mlx5e_get_tls_tx_context(tls_ctx);
+       expected_seq = context->expected_seq;
+
+       if (unlikely(expected_seq != skb_seq)) {
+               skb = mlx5e_tls_handle_ooo(context, sq, skb, wqe, pi, priv->tls);
+               goto out;
+       }
+
+       if (unlikely(mlx5e_tls_add_metadata(skb, context->swid))) {
+               atomic64_inc(&priv->tls->sw_stats.tx_tls_drop_metadata);
+               dev_kfree_skb_any(skb);
+               skb = NULL;
+               goto out;
+       }
+
+       context->expected_seq = skb_seq + datalen;
+out:
+       return skb;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h
new file mode 100644 (file)
index 0000000..405dfd3
--- /dev/null
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __MLX5E_TLS_RXTX_H__
+#define __MLX5E_TLS_RXTX_H__
+
+#ifdef CONFIG_MLX5_EN_TLS
+
+#include <linux/skbuff.h>
+#include "en.h"
+
+struct sk_buff *mlx5e_tls_handle_tx_skb(struct net_device *netdev,
+                                       struct mlx5e_txqsq *sq,
+                                       struct sk_buff *skb,
+                                       struct mlx5e_tx_wqe **wqe,
+                                       u16 *pi);
+
+#endif /* CONFIG_MLX5_EN_TLS */
+
+#endif /* __MLX5E_TLS_RXTX_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_stats.c
new file mode 100644 (file)
index 0000000..01468ec
--- /dev/null
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/ethtool.h>
+#include <net/sock.h>
+
+#include "en.h"
+#include "accel/tls.h"
+#include "fpga/sdk.h"
+#include "en_accel/tls.h"
+
+static const struct counter_desc mlx5e_tls_sw_stats_desc[] = {
+       { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_drop_metadata) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_drop_resync_alloc) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_drop_no_sync_data) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_drop_bypass_required) },
+};
+
+#define MLX5E_READ_CTR_ATOMIC64(ptr, dsc, i) \
+       atomic64_read((atomic64_t *)((char *)(ptr) + (dsc)[i].offset))
+
+#define NUM_TLS_SW_COUNTERS ARRAY_SIZE(mlx5e_tls_sw_stats_desc)
+
+int mlx5e_tls_get_count(struct mlx5e_priv *priv)
+{
+       if (!priv->tls)
+               return 0;
+
+       return NUM_TLS_SW_COUNTERS;
+}
+
+int mlx5e_tls_get_strings(struct mlx5e_priv *priv, uint8_t *data)
+{
+       unsigned int i, idx = 0;
+
+       if (!priv->tls)
+               return 0;
+
+       for (i = 0; i < NUM_TLS_SW_COUNTERS; i++)
+               strcpy(data + (idx++) * ETH_GSTRING_LEN,
+                      mlx5e_tls_sw_stats_desc[i].format);
+
+       return NUM_TLS_SW_COUNTERS;
+}
+
+int mlx5e_tls_get_stats(struct mlx5e_priv *priv, u64 *data)
+{
+       int i, idx = 0;
+
+       if (!priv->tls)
+               return 0;
+
+       for (i = 0; i < NUM_TLS_SW_COUNTERS; i++)
+               data[idx++] =
+                   MLX5E_READ_CTR_ATOMIC64(&priv->tls->sw_stats,
+                                           mlx5e_tls_sw_stats_desc, i);
+
+       return NUM_TLS_SW_COUNTERS;
+}
index 610d485c4b038864273aa382592aedaff405af69..f64b5e78519bb8a750a160411bc92cf41c3bdfec 100644 (file)
@@ -565,7 +565,7 @@ static void arfs_modify_rule_rq(struct mlx5e_priv *priv,
        err =  mlx5_modify_rule_destination(rule, &dst, NULL);
        if (err)
                netdev_warn(priv->netdev,
-                           "Failed to modfiy aRFS rule destination to rq=%d\n", rxq);
+                           "Failed to modify aRFS rule destination to rq=%d\n", rxq);
 }
 
 static void arfs_handle_work(struct work_struct *work)
index 3d46ef48d5b82ef0d1f85e93a0ac0694080173b4..c641d5656b2de3c8028b435057cf39b202cd5b6b 100644 (file)
@@ -1007,12 +1007,14 @@ static void mlx5e_trust_update_sq_inline_mode(struct mlx5e_priv *priv)
 
        mutex_lock(&priv->state_lock);
 
-       if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
-               goto out;
-
        new_channels.params = priv->channels.params;
        mlx5e_trust_update_tx_min_inline_mode(priv, &new_channels.params);
 
+       if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+               priv->channels.params = new_channels.params;
+               goto out;
+       }
+
        /* Skip if tx_min_inline is the same */
        if (new_channels.params.tx_min_inline_mode ==
            priv->channels.params.tx_min_inline_mode)
index 602851ab5b14546cca29aef71a3344641bc230c2..d67adf70a97bf082d9e1f677d77af7d0371d1a59 100644 (file)
 #include <linux/net_dim.h>
 #include "en.h"
 
+static void
+mlx5e_complete_dim_work(struct net_dim *dim, struct net_dim_cq_moder moder,
+                       struct mlx5_core_dev *mdev, struct mlx5_core_cq *mcq)
+{
+       mlx5_core_modify_cq_moderation(mdev, mcq, moder.usec, moder.pkts);
+       dim->state = NET_DIM_START_MEASURE;
+}
+
 void mlx5e_rx_dim_work(struct work_struct *work)
 {
-       struct net_dim *dim = container_of(work, struct net_dim,
-                                          work);
+       struct net_dim *dim = container_of(work, struct net_dim, work);
        struct mlx5e_rq *rq = container_of(dim, struct mlx5e_rq, dim);
-       struct net_dim_cq_moder cur_profile = net_dim_get_profile(dim->mode,
-                                                                 dim->profile_ix);
+       struct net_dim_cq_moder cur_moder =
+               net_dim_get_rx_moderation(dim->mode, dim->profile_ix);
 
-       mlx5_core_modify_cq_moderation(rq->mdev, &rq->cq.mcq,
-                                      cur_profile.usec, cur_profile.pkts);
+       mlx5e_complete_dim_work(dim, cur_moder, rq->mdev, &rq->cq.mcq);
+}
 
-       dim->state = NET_DIM_START_MEASURE;
+void mlx5e_tx_dim_work(struct work_struct *work)
+{
+       struct net_dim *dim = container_of(work, struct net_dim, work);
+       struct mlx5e_txqsq *sq = container_of(dim, struct mlx5e_txqsq, dim);
+       struct net_dim_cq_moder cur_moder =
+               net_dim_get_tx_moderation(dim->mode, dim->profile_ix);
+
+       mlx5e_complete_dim_work(dim, cur_moder, sq->cq.mdev, &sq->cq.mcq);
 }
index 37fd0245b6c12567745ec0d9d0f6ebcfb555b7b4..2b786c4d3dabed29a4d7813c09dd9c0d41f49af7 100644 (file)
@@ -389,14 +389,20 @@ static int mlx5e_set_channels(struct net_device *dev,
 int mlx5e_ethtool_get_coalesce(struct mlx5e_priv *priv,
                               struct ethtool_coalesce *coal)
 {
+       struct net_dim_cq_moder *rx_moder, *tx_moder;
+
        if (!MLX5_CAP_GEN(priv->mdev, cq_moderation))
                return -EOPNOTSUPP;
 
-       coal->rx_coalesce_usecs       = priv->channels.params.rx_cq_moderation.usec;
-       coal->rx_max_coalesced_frames = priv->channels.params.rx_cq_moderation.pkts;
-       coal->tx_coalesce_usecs       = priv->channels.params.tx_cq_moderation.usec;
-       coal->tx_max_coalesced_frames = priv->channels.params.tx_cq_moderation.pkts;
-       coal->use_adaptive_rx_coalesce = priv->channels.params.rx_dim_enabled;
+       rx_moder = &priv->channels.params.rx_cq_moderation;
+       coal->rx_coalesce_usecs         = rx_moder->usec;
+       coal->rx_max_coalesced_frames   = rx_moder->pkts;
+       coal->use_adaptive_rx_coalesce  = priv->channels.params.rx_dim_enabled;
+
+       tx_moder = &priv->channels.params.tx_cq_moderation;
+       coal->tx_coalesce_usecs         = tx_moder->usec;
+       coal->tx_max_coalesced_frames   = tx_moder->pkts;
+       coal->use_adaptive_tx_coalesce  = priv->channels.params.tx_dim_enabled;
 
        return 0;
 }
@@ -438,6 +444,7 @@ mlx5e_set_priv_channels_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesc
 int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv,
                               struct ethtool_coalesce *coal)
 {
+       struct net_dim_cq_moder *rx_moder, *tx_moder;
        struct mlx5_core_dev *mdev = priv->mdev;
        struct mlx5e_channels new_channels = {};
        int err = 0;
@@ -463,11 +470,15 @@ int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv,
        mutex_lock(&priv->state_lock);
        new_channels.params = priv->channels.params;
 
-       new_channels.params.tx_cq_moderation.usec = coal->tx_coalesce_usecs;
-       new_channels.params.tx_cq_moderation.pkts = coal->tx_max_coalesced_frames;
-       new_channels.params.rx_cq_moderation.usec = coal->rx_coalesce_usecs;
-       new_channels.params.rx_cq_moderation.pkts = coal->rx_max_coalesced_frames;
-       new_channels.params.rx_dim_enabled        = !!coal->use_adaptive_rx_coalesce;
+       rx_moder          = &new_channels.params.rx_cq_moderation;
+       rx_moder->usec    = coal->rx_coalesce_usecs;
+       rx_moder->pkts    = coal->rx_max_coalesced_frames;
+       new_channels.params.rx_dim_enabled = !!coal->use_adaptive_rx_coalesce;
+
+       tx_moder          = &new_channels.params.tx_cq_moderation;
+       tx_moder->usec    = coal->tx_coalesce_usecs;
+       tx_moder->pkts    = coal->tx_max_coalesced_frames;
+       new_channels.params.tx_dim_enabled = !!coal->use_adaptive_tx_coalesce;
 
        if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
                priv->channels.params = new_channels.params;
@@ -475,7 +486,9 @@ int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv,
        }
        /* we are opened */
 
-       reset = !!coal->use_adaptive_rx_coalesce != priv->channels.params.rx_dim_enabled;
+       reset = (!!coal->use_adaptive_rx_coalesce != priv->channels.params.rx_dim_enabled) ||
+               (!!coal->use_adaptive_tx_coalesce != priv->channels.params.tx_dim_enabled);
+
        if (!reset) {
                mlx5e_set_priv_channels_coalesce(priv, coal);
                priv->channels.params = new_channels.params;
index f64dda2bed316e3bbedf22d280713e9874034ffe..76cc10e44080b012d50da503cd6ed60520aa6154 100644 (file)
@@ -277,7 +277,6 @@ static void mlx5e_del_vlan_rule(struct mlx5e_priv *priv,
                }
                break;
        case MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID:
-               mlx5e_vport_context_update_vlans(priv);
                if (priv->fs.vlan.active_cvlans_rule[vid]) {
                        mlx5_del_flow_rules(priv->fs.vlan.active_cvlans_rule[vid]);
                        priv->fs.vlan.active_cvlans_rule[vid] = NULL;
index f100374199784a2f0892dd0a4e4ff31aa8ec9d73..b5a7580b12fe60c1c5d97e1b5510ee75d5f0dc5a 100644 (file)
@@ -42,7 +42,9 @@
 #include "en_rep.h"
 #include "en_accel/ipsec.h"
 #include "en_accel/ipsec_rxtx.h"
+#include "en_accel/tls.h"
 #include "accel/ipsec.h"
+#include "accel/tls.h"
 #include "vxlan.h"
 
 struct mlx5e_rq_param {
@@ -745,23 +747,24 @@ static void mlx5e_destroy_rq(struct mlx5e_rq *rq)
        mlx5_core_destroy_rq(rq->mdev, rq->rqn);
 }
 
-static int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq)
+static int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time)
 {
-       unsigned long exp_time = jiffies + msecs_to_jiffies(20000);
+       unsigned long exp_time = jiffies + msecs_to_jiffies(wait_time);
        struct mlx5e_channel *c = rq->channel;
 
        struct mlx5_wq_ll *wq = &rq->wq;
        u16 min_wqes = mlx5_min_rx_wqes(rq->wq_type, mlx5_wq_ll_get_size(wq));
 
-       while (time_before(jiffies, exp_time)) {
+       do {
                if (wq->cur_sz >= min_wqes)
                        return 0;
 
                msleep(20);
-       }
+       } while (time_before(jiffies, exp_time));
+
+       netdev_warn(c->netdev, "Failed to get min RX wqes on Channel[%d] RQN[0x%x] wq cur_sz(%d) min_rx_wqes(%d)\n",
+                   c->ix, rq->rqn, wq->cur_sz, min_wqes);
 
-       netdev_warn(c->netdev, "Failed to get min RX wqes on RQN[0x%x] wq cur_sz(%d) min_rx_wqes(%d)\n",
-                   rq->rqn, wq->cur_sz, min_wqes);
        return -ETIMEDOUT;
 }
 
@@ -817,7 +820,7 @@ static int mlx5e_open_rq(struct mlx5e_channel *c,
                goto err_destroy_rq;
 
        if (params->rx_dim_enabled)
-               c->rq.state |= BIT(MLX5E_RQ_STATE_AM);
+               __set_bit(MLX5E_RQ_STATE_AM, &c->rq.state);
 
        return 0;
 
@@ -1014,6 +1017,8 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
        INIT_WORK(&sq->recover.recover_work, mlx5e_sq_recover);
        if (MLX5_IPSEC_DEV(c->priv->mdev))
                set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state);
+       if (mlx5_accel_is_tls_device(c->priv->mdev))
+               set_bit(MLX5E_SQ_STATE_TLS, &sq->state);
 
        param->wq.db_numa_node = cpu_to_node(c->cpu);
        err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq, &sq->wq_ctrl);
@@ -1025,6 +1030,9 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
        if (err)
                goto err_sq_wq_destroy;
 
+       INIT_WORK(&sq->dim.work, mlx5e_tx_dim_work);
+       sq->dim.mode = params->tx_cq_moderation.cq_period_mode;
+
        sq->edge = (sq->wq.sz_m1 + 1) - MLX5_SEND_WQE_MAX_WQEBBS;
 
        return 0;
@@ -1188,6 +1196,9 @@ static int mlx5e_open_txqsq(struct mlx5e_channel *c,
        if (tx_rate)
                mlx5e_set_sq_maxrate(c->netdev, sq, tx_rate);
 
+       if (params->tx_dim_enabled)
+               sq->state |= BIT(MLX5E_SQ_STATE_AM);
+
        return 0;
 
 err_free_txqsq:
@@ -1931,7 +1942,6 @@ static void mlx5e_build_rq_param(struct mlx5e_priv *priv,
        MLX5_SET(rqc, rqc, scatter_fcs,    params->scatter_fcs_en);
 
        param->wq.buf_numa_node = dev_to_node(&mdev->pdev->dev);
-       param->wq.linear = 1;
 }
 
 static void mlx5e_build_drop_rq_param(struct mlx5e_priv *priv,
@@ -2119,13 +2129,11 @@ static int mlx5e_wait_channels_min_rx_wqes(struct mlx5e_channels *chs)
        int err = 0;
        int i;
 
-       for (i = 0; i < chs->num; i++) {
-               err = mlx5e_wait_for_min_rx_wqes(&chs->c[i]->rq);
-               if (err)
-                       break;
-       }
+       for (i = 0; i < chs->num; i++)
+               err |= mlx5e_wait_for_min_rx_wqes(&chs->c[i]->rq,
+                                                 err ? 0 : 20000);
 
-       return err;
+       return err ? -ETIMEDOUT : 0;
 }
 
 static void mlx5e_deactivate_channels(struct mlx5e_channels *chs)
@@ -3128,22 +3136,23 @@ static int mlx5e_setup_tc_mqprio(struct net_device *netdev,
 
 #ifdef CONFIG_MLX5_ESWITCH
 static int mlx5e_setup_tc_cls_flower(struct mlx5e_priv *priv,
-                                    struct tc_cls_flower_offload *cls_flower)
+                                    struct tc_cls_flower_offload *cls_flower,
+                                    int flags)
 {
        switch (cls_flower->command) {
        case TC_CLSFLOWER_REPLACE:
-               return mlx5e_configure_flower(priv, cls_flower);
+               return mlx5e_configure_flower(priv, cls_flower, flags);
        case TC_CLSFLOWER_DESTROY:
-               return mlx5e_delete_flower(priv, cls_flower);
+               return mlx5e_delete_flower(priv, cls_flower, flags);
        case TC_CLSFLOWER_STATS:
-               return mlx5e_stats_flower(priv, cls_flower);
+               return mlx5e_stats_flower(priv, cls_flower, flags);
        default:
                return -EOPNOTSUPP;
        }
 }
 
-int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
-                           void *cb_priv)
+static int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
+                                  void *cb_priv)
 {
        struct mlx5e_priv *priv = cb_priv;
 
@@ -3152,7 +3161,7 @@ int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
 
        switch (type) {
        case TC_SETUP_CLSFLOWER:
-               return mlx5e_setup_tc_cls_flower(priv, type_data);
+               return mlx5e_setup_tc_cls_flower(priv, type_data, MLX5E_TC_INGRESS);
        default:
                return -EOPNOTSUPP;
        }
@@ -4084,18 +4093,48 @@ static bool slow_pci_heuristic(struct mlx5_core_dev *mdev)
                link_speed > MLX5E_SLOW_PCI_RATIO * pci_bw;
 }
 
-void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode)
+static struct net_dim_cq_moder mlx5e_get_def_tx_moderation(u8 cq_period_mode)
 {
-       params->tx_cq_moderation.cq_period_mode = cq_period_mode;
+       struct net_dim_cq_moder moder;
 
-       params->tx_cq_moderation.pkts =
-               MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS;
-       params->tx_cq_moderation.usec =
-               MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC;
+       moder.cq_period_mode = cq_period_mode;
+       moder.pkts = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS;
+       moder.usec = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC;
+       if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE)
+               moder.usec = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC_FROM_CQE;
+
+       return moder;
+}
 
+static struct net_dim_cq_moder mlx5e_get_def_rx_moderation(u8 cq_period_mode)
+{
+       struct net_dim_cq_moder moder;
+
+       moder.cq_period_mode = cq_period_mode;
+       moder.pkts = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS;
+       moder.usec = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC;
        if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE)
-               params->tx_cq_moderation.usec =
-                       MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC_FROM_CQE;
+               moder.usec = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE;
+
+       return moder;
+}
+
+static u8 mlx5_to_net_dim_cq_period_mode(u8 cq_period_mode)
+{
+       return cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE ?
+               NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE :
+               NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+}
+
+void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode)
+{
+       if (params->tx_dim_enabled) {
+               u8 dim_period_mode = mlx5_to_net_dim_cq_period_mode(cq_period_mode);
+
+               params->tx_cq_moderation = net_dim_get_def_tx_moderation(dim_period_mode);
+       } else {
+               params->tx_cq_moderation = mlx5e_get_def_tx_moderation(cq_period_mode);
+       }
 
        MLX5E_SET_PFLAG(params, MLX5E_PFLAG_TX_CQE_BASED_MODER,
                        params->tx_cq_moderation.cq_period_mode ==
@@ -4104,28 +4143,12 @@ void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode)
 
 void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode)
 {
-       params->rx_cq_moderation.cq_period_mode = cq_period_mode;
-
-       params->rx_cq_moderation.pkts =
-               MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS;
-       params->rx_cq_moderation.usec =
-               MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC;
-
-       if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE)
-               params->rx_cq_moderation.usec =
-                       MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE;
-
        if (params->rx_dim_enabled) {
-               switch (cq_period_mode) {
-               case MLX5_CQ_PERIOD_MODE_START_FROM_CQE:
-                       params->rx_cq_moderation =
-                               net_dim_get_def_profile(NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE);
-                       break;
-               case MLX5_CQ_PERIOD_MODE_START_FROM_EQE:
-               default:
-                       params->rx_cq_moderation =
-                               net_dim_get_def_profile(NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE);
-               }
+               u8 dim_period_mode = mlx5_to_net_dim_cq_period_mode(cq_period_mode);
+
+               params->rx_cq_moderation = net_dim_get_def_rx_moderation(dim_period_mode);
+       } else {
+               params->rx_cq_moderation = mlx5e_get_def_rx_moderation(cq_period_mode);
        }
 
        MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_BASED_MODER,
@@ -4189,6 +4212,7 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev,
                        MLX5_CQ_PERIOD_MODE_START_FROM_CQE :
                        MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
        params->rx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation);
+       params->tx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation);
        mlx5e_set_rx_cq_mode_params(params, rx_cq_period_mode);
        mlx5e_set_tx_cq_mode_params(params, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
 
@@ -4355,6 +4379,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
 #endif
 
        mlx5e_ipsec_build_netdev(priv);
+       mlx5e_tls_build_netdev(priv);
 }
 
 static void mlx5e_create_q_counters(struct mlx5e_priv *priv)
@@ -4396,12 +4421,16 @@ static void mlx5e_nic_init(struct mlx5_core_dev *mdev,
        err = mlx5e_ipsec_init(priv);
        if (err)
                mlx5_core_err(mdev, "IPSec initialization failed, %d\n", err);
+       err = mlx5e_tls_init(priv);
+       if (err)
+               mlx5_core_err(mdev, "TLS initialization failed, %d\n", err);
        mlx5e_build_nic_netdev(netdev);
        mlx5e_vxlan_init(priv);
 }
 
 static void mlx5e_nic_cleanup(struct mlx5e_priv *priv)
 {
+       mlx5e_tls_cleanup(priv);
        mlx5e_ipsec_cleanup(priv);
        mlx5e_vxlan_cleanup(priv);
 }
@@ -4433,7 +4462,7 @@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv)
                goto err_destroy_direct_tirs;
        }
 
-       err = mlx5e_tc_init(priv);
+       err = mlx5e_tc_nic_init(priv);
        if (err)
                goto err_destroy_flow_steering;
 
@@ -4454,7 +4483,7 @@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv)
 
 static void mlx5e_cleanup_nic_rx(struct mlx5e_priv *priv)
 {
-       mlx5e_tc_cleanup(priv);
+       mlx5e_tc_nic_cleanup(priv);
        mlx5e_destroy_flow_steering(priv);
        mlx5e_destroy_direct_tirs(priv);
        mlx5e_destroy_indirect_tirs(priv);
index d8f68e4d1018f3bda079ef2dd19a82fc85621792..c3034f58aa339a16c2ba616a14480b10ebf48c08 100644 (file)
@@ -66,18 +66,36 @@ static const struct counter_desc sw_rep_stats_desc[] = {
        { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_bytes) },
 };
 
-#define NUM_VPORT_REP_COUNTERS ARRAY_SIZE(sw_rep_stats_desc)
+struct vport_stats {
+       u64 vport_rx_packets;
+       u64 vport_tx_packets;
+       u64 vport_rx_bytes;
+       u64 vport_tx_bytes;
+};
+
+static const struct counter_desc vport_rep_stats_desc[] = {
+       { MLX5E_DECLARE_STAT(struct vport_stats, vport_rx_packets) },
+       { MLX5E_DECLARE_STAT(struct vport_stats, vport_rx_bytes) },
+       { MLX5E_DECLARE_STAT(struct vport_stats, vport_tx_packets) },
+       { MLX5E_DECLARE_STAT(struct vport_stats, vport_tx_bytes) },
+};
+
+#define NUM_VPORT_REP_SW_COUNTERS ARRAY_SIZE(sw_rep_stats_desc)
+#define NUM_VPORT_REP_HW_COUNTERS ARRAY_SIZE(vport_rep_stats_desc)
 
 static void mlx5e_rep_get_strings(struct net_device *dev,
                                  u32 stringset, uint8_t *data)
 {
-       int i;
+       int i, j;
 
        switch (stringset) {
        case ETH_SS_STATS:
-               for (i = 0; i < NUM_VPORT_REP_COUNTERS; i++)
+               for (i = 0; i < NUM_VPORT_REP_SW_COUNTERS; i++)
                        strcpy(data + (i * ETH_GSTRING_LEN),
                               sw_rep_stats_desc[i].format);
+               for (j = 0; j < NUM_VPORT_REP_HW_COUNTERS; j++, i++)
+                       strcpy(data + (i * ETH_GSTRING_LEN),
+                              vport_rep_stats_desc[j].format);
                break;
        }
 }
@@ -140,7 +158,7 @@ static void mlx5e_rep_get_ethtool_stats(struct net_device *dev,
                                        struct ethtool_stats *stats, u64 *data)
 {
        struct mlx5e_priv *priv = netdev_priv(dev);
-       int i;
+       int i, j;
 
        if (!data)
                return;
@@ -148,18 +166,23 @@ static void mlx5e_rep_get_ethtool_stats(struct net_device *dev,
        mutex_lock(&priv->state_lock);
        if (test_bit(MLX5E_STATE_OPENED, &priv->state))
                mlx5e_rep_update_sw_counters(priv);
+       mlx5e_rep_update_hw_counters(priv);
        mutex_unlock(&priv->state_lock);
 
-       for (i = 0; i < NUM_VPORT_REP_COUNTERS; i++)
+       for (i = 0; i < NUM_VPORT_REP_SW_COUNTERS; i++)
                data[i] = MLX5E_READ_CTR64_CPU(&priv->stats.sw,
                                               sw_rep_stats_desc, i);
+
+       for (j = 0; j < NUM_VPORT_REP_HW_COUNTERS; j++, i++)
+               data[i] = MLX5E_READ_CTR64_CPU(&priv->stats.vf_vport,
+                                              vport_rep_stats_desc, j);
 }
 
 static int mlx5e_rep_get_sset_count(struct net_device *dev, int sset)
 {
        switch (sset) {
        case ETH_SS_STATS:
-               return NUM_VPORT_REP_COUNTERS;
+               return NUM_VPORT_REP_SW_COUNTERS + NUM_VPORT_REP_HW_COUNTERS;
        default:
                return -EOPNOTSUPP;
        }
@@ -681,8 +704,8 @@ static int mlx5e_rep_open(struct net_device *dev)
                goto unlock;
 
        if (!mlx5_modify_vport_admin_state(priv->mdev,
-                       MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT,
-                       rep->vport, MLX5_ESW_VPORT_ADMIN_STATE_UP))
+                                          MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT,
+                                          rep->vport, MLX5_ESW_VPORT_ADMIN_STATE_UP))
                netif_carrier_on(dev);
 
 unlock:
@@ -699,8 +722,8 @@ static int mlx5e_rep_close(struct net_device *dev)
 
        mutex_lock(&priv->state_lock);
        mlx5_modify_vport_admin_state(priv->mdev,
-                       MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT,
-                       rep->vport, MLX5_ESW_VPORT_ADMIN_STATE_DOWN);
+                                     MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT,
+                                     rep->vport, MLX5_ESW_VPORT_ADMIN_STATE_DOWN);
        ret = mlx5e_close_locked(dev);
        mutex_unlock(&priv->state_lock);
        return ret;
@@ -723,15 +746,31 @@ static int mlx5e_rep_get_phys_port_name(struct net_device *dev,
 
 static int
 mlx5e_rep_setup_tc_cls_flower(struct mlx5e_priv *priv,
-                             struct tc_cls_flower_offload *cls_flower)
+                             struct tc_cls_flower_offload *cls_flower, int flags)
 {
        switch (cls_flower->command) {
        case TC_CLSFLOWER_REPLACE:
-               return mlx5e_configure_flower(priv, cls_flower);
+               return mlx5e_configure_flower(priv, cls_flower, flags);
        case TC_CLSFLOWER_DESTROY:
-               return mlx5e_delete_flower(priv, cls_flower);
+               return mlx5e_delete_flower(priv, cls_flower, flags);
        case TC_CLSFLOWER_STATS:
-               return mlx5e_stats_flower(priv, cls_flower);
+               return mlx5e_stats_flower(priv, cls_flower, flags);
+       default:
+               return -EOPNOTSUPP;
+       }
+}
+
+static int mlx5e_rep_setup_tc_cb_egdev(enum tc_setup_type type, void *type_data,
+                                      void *cb_priv)
+{
+       struct mlx5e_priv *priv = cb_priv;
+
+       if (!tc_cls_can_offload_and_chain0(priv->netdev, type_data))
+               return -EOPNOTSUPP;
+
+       switch (type) {
+       case TC_SETUP_CLSFLOWER:
+               return mlx5e_rep_setup_tc_cls_flower(priv, type_data, MLX5E_TC_EGRESS);
        default:
                return -EOPNOTSUPP;
        }
@@ -747,7 +786,7 @@ static int mlx5e_rep_setup_tc_cb(enum tc_setup_type type, void *type_data,
 
        switch (type) {
        case TC_SETUP_CLSFLOWER:
-               return mlx5e_rep_setup_tc_cls_flower(priv, type_data);
+               return mlx5e_rep_setup_tc_cls_flower(priv, type_data, MLX5E_TC_INGRESS);
        default:
                return -EOPNOTSUPP;
        }
@@ -877,13 +916,14 @@ static const struct net_device_ops mlx5e_netdev_ops_rep = {
 };
 
 static void mlx5e_build_rep_params(struct mlx5_core_dev *mdev,
-                                  struct mlx5e_params *params)
+                                  struct mlx5e_params *params, u16 mtu)
 {
        u8 cq_period_mode = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ?
                                         MLX5_CQ_PERIOD_MODE_START_FROM_CQE :
                                         MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
 
        params->hard_mtu    = MLX5E_ETH_HARD_MTU;
+       params->sw_mtu      = mtu;
        params->log_sq_size = MLX5E_REP_PARAMS_LOG_SQ_SIZE;
        params->rq_wq_type  = MLX5_WQ_TYPE_LINKED_LIST;
        params->log_rq_mtu_frames = MLX5E_REP_PARAMS_LOG_RQ_SIZE;
@@ -931,7 +971,7 @@ static void mlx5e_init_rep(struct mlx5_core_dev *mdev,
 
        priv->channels.params.num_channels = profile->max_nch(mdev);
 
-       mlx5e_build_rep_params(mdev, &priv->channels.params);
+       mlx5e_build_rep_params(mdev, &priv->channels.params, netdev->mtu);
        mlx5e_build_rep_netdev(netdev);
 
        mlx5e_timestamp_init(priv);
@@ -964,14 +1004,8 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
        }
        rpriv->vport_rx_rule = flow_rule;
 
-       err = mlx5e_tc_init(priv);
-       if (err)
-               goto err_del_flow_rule;
-
        return 0;
 
-err_del_flow_rule:
-       mlx5_del_flow_rules(rpriv->vport_rx_rule);
 err_destroy_direct_tirs:
        mlx5e_destroy_direct_tirs(priv);
 err_destroy_direct_rqts:
@@ -983,7 +1017,6 @@ static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv)
 {
        struct mlx5e_rep_priv *rpriv = priv->ppriv;
 
-       mlx5e_tc_cleanup(priv);
        mlx5_del_flow_rules(rpriv->vport_rx_rule);
        mlx5e_destroy_direct_tirs(priv);
        mlx5e_destroy_direct_rqts(priv);
@@ -1041,8 +1074,15 @@ mlx5e_nic_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
        if (err)
                goto err_remove_sqs;
 
+       /* init shared tc flow table */
+       err = mlx5e_tc_esw_init(&rpriv->tc_ht);
+       if (err)
+               goto  err_neigh_cleanup;
+
        return 0;
 
+err_neigh_cleanup:
+       mlx5e_rep_neigh_cleanup(rpriv);
 err_remove_sqs:
        mlx5e_remove_sqs_fwd_rules(priv);
        return err;
@@ -1057,9 +1097,8 @@ mlx5e_nic_rep_unload(struct mlx5_eswitch_rep *rep)
        if (test_bit(MLX5E_STATE_OPENED, &priv->state))
                mlx5e_remove_sqs_fwd_rules(priv);
 
-       /* clean (and re-init) existing uplink offloaded TC rules */
-       mlx5e_tc_cleanup(priv);
-       mlx5e_tc_init(priv);
+       /* clean uplink offloaded TC rules, delete shared tc flow table */
+       mlx5e_tc_esw_cleanup(&rpriv->tc_ht);
 
        mlx5e_rep_neigh_cleanup(rpriv);
 }
@@ -1106,7 +1145,7 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
 
        uplink_rpriv = mlx5_eswitch_get_uplink_priv(dev->priv.eswitch, REP_ETH);
        upriv = netdev_priv(uplink_rpriv->netdev);
-       err = tc_setup_cb_egdev_register(netdev, mlx5e_setup_tc_block_cb,
+       err = tc_setup_cb_egdev_register(netdev, mlx5e_rep_setup_tc_cb_egdev,
                                         upriv);
        if (err)
                goto err_neigh_cleanup;
@@ -1121,7 +1160,7 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
        return 0;
 
 err_egdev_cleanup:
-       tc_setup_cb_egdev_unregister(netdev, mlx5e_setup_tc_block_cb,
+       tc_setup_cb_egdev_unregister(netdev, mlx5e_rep_setup_tc_cb_egdev,
                                     upriv);
 
 err_neigh_cleanup:
@@ -1150,7 +1189,7 @@ mlx5e_vport_rep_unload(struct mlx5_eswitch_rep *rep)
        uplink_rpriv = mlx5_eswitch_get_uplink_priv(priv->mdev->priv.eswitch,
                                                    REP_ETH);
        upriv = netdev_priv(uplink_rpriv->netdev);
-       tc_setup_cb_egdev_unregister(netdev, mlx5e_setup_tc_block_cb,
+       tc_setup_cb_egdev_unregister(netdev, mlx5e_rep_setup_tc_cb_egdev,
                                     upriv);
        mlx5e_rep_neigh_cleanup(rpriv);
        mlx5e_detach_netdev(priv);
index b9b481f2833a4fe8f825cb397b8fc539434cf09d..844d32d5c29f051386c525c0a8f89063340c7550 100644 (file)
@@ -59,6 +59,7 @@ struct mlx5e_rep_priv {
        struct net_device      *netdev;
        struct mlx5_flow_handle *vport_rx_rule;
        struct list_head       vport_sqs_list;
+       struct rhashtable      tc_ht; /* valid for uplink rep */
 };
 
 static inline
index 7bbf0db27a01eead64452e5aa9f328acee5fe658..53f72923b1642b8ca9158ba768c87a86885062f8 100644 (file)
@@ -450,7 +450,7 @@ bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq)
        struct mlx5_wq_ll *wq = &rq->wq;
        int err;
 
-       if (unlikely(!MLX5E_TEST_BIT(rq->state, MLX5E_RQ_STATE_ENABLED)))
+       if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state)))
                return false;
 
        if (mlx5_wq_ll_is_full(wq))
@@ -508,7 +508,7 @@ static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq)
        struct mlx5e_icosq *sq = container_of(cq, struct mlx5e_icosq, cq);
        struct mlx5_cqe64 *cqe;
 
-       if (unlikely(!MLX5E_TEST_BIT(sq->state, MLX5E_SQ_STATE_ENABLED)))
+       if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state)))
                return;
 
        cqe = mlx5_cqwq_get_cqe(&cq->wq);
@@ -525,7 +525,7 @@ bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq)
 {
        struct mlx5_wq_ll *wq = &rq->wq;
 
-       if (unlikely(!MLX5E_TEST_BIT(rq->state, MLX5E_RQ_STATE_ENABLED)))
+       if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state)))
                return false;
 
        mlx5e_poll_ico_cq(&rq->channel->icosq.cq, rq);
@@ -681,11 +681,10 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe,
                                      struct mlx5e_rq *rq,
                                      struct sk_buff *skb)
 {
+       u8 lro_num_seg = be32_to_cpu(cqe->srqn) >> 24;
        struct net_device *netdev = rq->netdev;
-       int lro_num_seg;
 
        skb->mac_len = ETH_HLEN;
-       lro_num_seg = be32_to_cpu(cqe->srqn) >> 24;
        if (lro_num_seg > 1) {
                mlx5e_lro_update_hdr(skb, cqe, cqe_bcnt);
                skb_shinfo(skb)->gso_size = DIV_ROUND_UP(cqe_bcnt, lro_num_seg);
@@ -808,9 +807,9 @@ static inline bool mlx5e_xmit_xdp_frame(struct mlx5e_rq *rq,
 }
 
 /* returns true if packet was consumed by xdp */
-static inline int mlx5e_xdp_handle(struct mlx5e_rq *rq,
-                                  struct mlx5e_dma_info *di,
-                                  void *va, u16 *rx_headroom, u32 *len)
+static inline bool mlx5e_xdp_handle(struct mlx5e_rq *rq,
+                                   struct mlx5e_dma_info *di,
+                                   void *va, u16 *rx_headroom, u32 *len)
 {
        struct bpf_prog *prog = READ_ONCE(rq->xdp_prog);
        struct xdp_buff xdp;
@@ -1133,7 +1132,7 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
        struct mlx5_cqe64 *cqe;
        int work_done = 0;
 
-       if (unlikely(!MLX5E_TEST_BIT(rq->state, MLX5E_RQ_STATE_ENABLED)))
+       if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state)))
                return 0;
 
        if (cq->decmprs_left)
@@ -1186,7 +1185,7 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq)
 
        sq = container_of(cq, struct mlx5e_xdpsq, cq);
 
-       if (unlikely(!MLX5E_TEST_BIT(sq->state, MLX5E_SQ_STATE_ENABLED)))
+       if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state)))
                return false;
 
        cqe = mlx5_cqwq_get_cqe(&cq->wq);
index 707976482c0987c33b5183804e152309cc57d6d4..027f54ac1ca2c82b4c7a3797bcb7d0b022429a8c 100644 (file)
@@ -290,7 +290,7 @@ static int mlx5e_test_loopback(struct mlx5e_priv *priv)
 
        if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
                netdev_err(priv->netdev,
-                          "\tCan't perform loobpack test while device is down\n");
+                          "\tCan't perform loopback test while device is down\n");
                return -ENODEV;
        }
 
index b08c94422907e42f8f916c6bdefbb66bc3904091..e17919c0af088a0ad2f70d2a8230fa786724ab47 100644 (file)
@@ -32,6 +32,7 @@
 
 #include "en.h"
 #include "en_accel/ipsec.h"
+#include "en_accel/tls.h"
 
 static const struct counter_desc sw_stats_desc[] = {
        { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_packets) },
@@ -43,6 +44,12 @@ static const struct counter_desc sw_stats_desc[] = {
        { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_inner_packets) },
        { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_inner_bytes) },
        { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_added_vlan_packets) },
+
+#ifdef CONFIG_MLX5_EN_TLS
+       { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_ooo) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_resync_bytes) },
+#endif
+
        { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_lro_packets) },
        { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_lro_bytes) },
        { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_removed_vlan_packets) },
@@ -161,6 +168,10 @@ static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv)
                        s->tx_csum_partial_inner += sq_stats->csum_partial_inner;
                        s->tx_csum_none         += sq_stats->csum_none;
                        s->tx_csum_partial      += sq_stats->csum_partial;
+#ifdef CONFIG_MLX5_EN_TLS
+                       s->tx_tls_ooo           += sq_stats->tls_ooo;
+                       s->tx_tls_resync_bytes  += sq_stats->tls_resync_bytes;
+#endif
                }
        }
 
@@ -1065,6 +1076,22 @@ static void mlx5e_grp_ipsec_update_stats(struct mlx5e_priv *priv)
        mlx5e_ipsec_update_stats(priv);
 }
 
+static int mlx5e_grp_tls_get_num_stats(struct mlx5e_priv *priv)
+{
+       return mlx5e_tls_get_count(priv);
+}
+
+static int mlx5e_grp_tls_fill_strings(struct mlx5e_priv *priv, u8 *data,
+                                     int idx)
+{
+       return idx + mlx5e_tls_get_strings(priv, data + idx * ETH_GSTRING_LEN);
+}
+
+static int mlx5e_grp_tls_fill_stats(struct mlx5e_priv *priv, u64 *data, int idx)
+{
+       return idx + mlx5e_tls_get_stats(priv, data + idx);
+}
+
 static const struct counter_desc rq_stats_desc[] = {
        { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, packets) },
        { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, bytes) },
@@ -1267,6 +1294,11 @@ const struct mlx5e_stats_grp mlx5e_stats_grps[] = {
                .fill_stats = mlx5e_grp_ipsec_fill_stats,
                .update_stats = mlx5e_grp_ipsec_update_stats,
        },
+       {
+               .get_num_stats = mlx5e_grp_tls_get_num_stats,
+               .fill_strings = mlx5e_grp_tls_fill_strings,
+               .fill_stats = mlx5e_grp_tls_fill_stats,
+       },
        {
                .get_num_stats = mlx5e_grp_channels_get_num_stats,
                .fill_strings = mlx5e_grp_channels_fill_strings,
index 53111a2df5871cea5196c1055b0518376dbc322f..a36e6a87066b15925a31f9a67b66afb7900181d1 100644 (file)
@@ -93,6 +93,11 @@ struct mlx5e_sw_stats {
        u64 rx_cache_waive;
        u64 ch_eq_rearm;
 
+#ifdef CONFIG_MLX5_EN_TLS
+       u64 tx_tls_ooo;
+       u64 tx_tls_resync_bytes;
+#endif
+
        /* Special handling counters */
        u64 link_down_events_phy;
 };
@@ -194,6 +199,10 @@ struct mlx5e_sq_stats {
        u64 csum_partial_inner;
        u64 added_vlan_packets;
        u64 nop;
+#ifdef CONFIG_MLX5_EN_TLS
+       u64 tls_ooo;
+       u64 tls_resync_bytes;
+#endif
        /* less likely accessed in data path */
        u64 csum_none;
        u64 stopped;
index 4197001f98015377a7bd534f192616a13aead70d..674f1d7d273785ad6ba72fbee3404738982bd89d 100644 (file)
@@ -58,19 +58,25 @@ struct mlx5_nic_flow_attr {
        u32 flow_tag;
        u32 mod_hdr_id;
        u32 hairpin_tirn;
+       u8 match_level;
        struct mlx5_flow_table  *hairpin_ft;
 };
 
+#define MLX5E_TC_FLOW_BASE (MLX5E_TC_LAST_EXPORTED_BIT + 1)
+
 enum {
-       MLX5E_TC_FLOW_ESWITCH   = BIT(0),
-       MLX5E_TC_FLOW_NIC       = BIT(1),
-       MLX5E_TC_FLOW_OFFLOADED = BIT(2),
-       MLX5E_TC_FLOW_HAIRPIN   = BIT(3),
-       MLX5E_TC_FLOW_HAIRPIN_RSS = BIT(4),
+       MLX5E_TC_FLOW_INGRESS   = MLX5E_TC_INGRESS,
+       MLX5E_TC_FLOW_EGRESS    = MLX5E_TC_EGRESS,
+       MLX5E_TC_FLOW_ESWITCH   = BIT(MLX5E_TC_FLOW_BASE),
+       MLX5E_TC_FLOW_NIC       = BIT(MLX5E_TC_FLOW_BASE + 1),
+       MLX5E_TC_FLOW_OFFLOADED = BIT(MLX5E_TC_FLOW_BASE + 2),
+       MLX5E_TC_FLOW_HAIRPIN   = BIT(MLX5E_TC_FLOW_BASE + 3),
+       MLX5E_TC_FLOW_HAIRPIN_RSS = BIT(MLX5E_TC_FLOW_BASE + 4),
 };
 
 struct mlx5e_tc_flow {
        struct rhash_head       node;
+       struct mlx5e_priv       *priv;
        u64                     cookie;
        u8                      flags;
        struct mlx5_flow_handle *rule;
@@ -97,7 +103,7 @@ enum {
 };
 
 #define MLX5E_TC_TABLE_NUM_GROUPS 4
-#define MLX5E_TC_TABLE_MAX_GROUP_SIZE (1 << 16)
+#define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(16)
 
 struct mlx5e_hairpin {
        struct mlx5_hairpin *pair;
@@ -753,7 +759,9 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
                table_created = true;
        }
 
-       parse_attr->spec.match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+       if (attr->match_level != MLX5_MATCH_NONE)
+               parse_attr->spec.match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+
        rule = mlx5_add_flow_rules(priv->fs.tc.t, &parse_attr->spec,
                                   &flow_act, dest, dest_ix);
 
@@ -789,7 +797,7 @@ static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv,
        mlx5_del_flow_rules(flow->rule);
        mlx5_fc_destroy(priv->mdev, counter);
 
-       if (!mlx5e_tc_num_filters(priv) && (priv->fs.tc.t)) {
+       if (!mlx5e_tc_num_filters(priv) && priv->fs.tc.t) {
                mlx5_destroy_flow_table(priv->fs.tc.t);
                priv->fs.tc.t = NULL;
        }
@@ -836,6 +844,7 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
                out_priv = netdev_priv(encap_dev);
                rpriv = out_priv->ppriv;
                attr->out_rep = rpriv->rep;
+               attr->out_mdev = out_priv->mdev;
        }
 
        err = mlx5_eswitch_add_vlan_action(esw, attr);
@@ -982,6 +991,8 @@ void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
                                }
                        }
                }
+               if (neigh_used)
+                       break;
        }
 
        if (neigh_used) {
@@ -1190,7 +1201,7 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv,
 static int __parse_cls_flower(struct mlx5e_priv *priv,
                              struct mlx5_flow_spec *spec,
                              struct tc_cls_flower_offload *f,
-                             u8 *min_inline)
+                             u8 *match_level)
 {
        void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
                                       outer_headers);
@@ -1199,7 +1210,7 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
        u16 addr_type = 0;
        u8 ip_proto = 0;
 
-       *min_inline = MLX5_INLINE_MODE_L2;
+       *match_level = MLX5_MATCH_NONE;
 
        if (f->dissector->used_keys &
            ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) |
@@ -1249,54 +1260,6 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
                                         inner_headers);
        }
 
-       if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) {
-               struct flow_dissector_key_control *key =
-                       skb_flow_dissector_target(f->dissector,
-                                                 FLOW_DISSECTOR_KEY_CONTROL,
-                                                 f->key);
-
-               struct flow_dissector_key_control *mask =
-                       skb_flow_dissector_target(f->dissector,
-                                                 FLOW_DISSECTOR_KEY_CONTROL,
-                                                 f->mask);
-               addr_type = key->addr_type;
-
-               if (mask->flags & FLOW_DIS_IS_FRAGMENT) {
-                       MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1);
-                       MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag,
-                                key->flags & FLOW_DIS_IS_FRAGMENT);
-
-                       /* the HW doesn't need L3 inline to match on frag=no */
-                       if (key->flags & FLOW_DIS_IS_FRAGMENT)
-                               *min_inline = MLX5_INLINE_MODE_IP;
-               }
-       }
-
-       if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
-               struct flow_dissector_key_basic *key =
-                       skb_flow_dissector_target(f->dissector,
-                                                 FLOW_DISSECTOR_KEY_BASIC,
-                                                 f->key);
-               struct flow_dissector_key_basic *mask =
-                       skb_flow_dissector_target(f->dissector,
-                                                 FLOW_DISSECTOR_KEY_BASIC,
-                                                 f->mask);
-               ip_proto = key->ip_proto;
-
-               MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype,
-                        ntohs(mask->n_proto));
-               MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
-                        ntohs(key->n_proto));
-
-               MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
-                        mask->ip_proto);
-               MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
-                        key->ip_proto);
-
-               if (mask->ip_proto)
-                       *min_inline = MLX5_INLINE_MODE_IP;
-       }
-
        if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
                struct flow_dissector_key_eth_addrs *key =
                        skb_flow_dissector_target(f->dissector,
@@ -1320,6 +1283,9 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
                ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
                                             smac_47_16),
                                key->src);
+
+               if (!is_zero_ether_addr(mask->src) || !is_zero_ether_addr(mask->dst))
+                       *match_level = MLX5_MATCH_L2;
        }
 
        if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_VLAN)) {
@@ -1340,9 +1306,79 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
 
                        MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_prio, mask->vlan_priority);
                        MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_prio, key->vlan_priority);
+
+                       *match_level = MLX5_MATCH_L2;
                }
        }
 
+       if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
+               struct flow_dissector_key_basic *key =
+                       skb_flow_dissector_target(f->dissector,
+                                                 FLOW_DISSECTOR_KEY_BASIC,
+                                                 f->key);
+               struct flow_dissector_key_basic *mask =
+                       skb_flow_dissector_target(f->dissector,
+                                                 FLOW_DISSECTOR_KEY_BASIC,
+                                                 f->mask);
+               MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype,
+                        ntohs(mask->n_proto));
+               MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
+                        ntohs(key->n_proto));
+
+               if (mask->n_proto)
+                       *match_level = MLX5_MATCH_L2;
+       }
+
+       if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) {
+               struct flow_dissector_key_control *key =
+                       skb_flow_dissector_target(f->dissector,
+                                                 FLOW_DISSECTOR_KEY_CONTROL,
+                                                 f->key);
+
+               struct flow_dissector_key_control *mask =
+                       skb_flow_dissector_target(f->dissector,
+                                                 FLOW_DISSECTOR_KEY_CONTROL,
+                                                 f->mask);
+               addr_type = key->addr_type;
+
+               /* the HW doesn't support frag first/later */
+               if (mask->flags & FLOW_DIS_FIRST_FRAG)
+                       return -EOPNOTSUPP;
+
+               if (mask->flags & FLOW_DIS_IS_FRAGMENT) {
+                       MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1);
+                       MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag,
+                                key->flags & FLOW_DIS_IS_FRAGMENT);
+
+                       /* the HW doesn't need L3 inline to match on frag=no */
+                       if (!(key->flags & FLOW_DIS_IS_FRAGMENT))
+                               *match_level = MLX5_INLINE_MODE_L2;
+       /* ***  L2 attributes parsing up to here *** */
+                       else
+                               *match_level = MLX5_INLINE_MODE_IP;
+               }
+       }
+
+       if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
+               struct flow_dissector_key_basic *key =
+                       skb_flow_dissector_target(f->dissector,
+                                                 FLOW_DISSECTOR_KEY_BASIC,
+                                                 f->key);
+               struct flow_dissector_key_basic *mask =
+                       skb_flow_dissector_target(f->dissector,
+                                                 FLOW_DISSECTOR_KEY_BASIC,
+                                                 f->mask);
+               ip_proto = key->ip_proto;
+
+               MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
+                        mask->ip_proto);
+               MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
+                        key->ip_proto);
+
+               if (mask->ip_proto)
+                       *match_level = MLX5_MATCH_L3;
+       }
+
        if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
                struct flow_dissector_key_ipv4_addrs *key =
                        skb_flow_dissector_target(f->dissector,
@@ -1367,7 +1403,7 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
                       &key->dst, sizeof(key->dst));
 
                if (mask->src || mask->dst)
-                       *min_inline = MLX5_INLINE_MODE_IP;
+                       *match_level = MLX5_MATCH_L3;
        }
 
        if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
@@ -1396,7 +1432,7 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
 
                if (ipv6_addr_type(&mask->src) != IPV6_ADDR_ANY ||
                    ipv6_addr_type(&mask->dst) != IPV6_ADDR_ANY)
-                       *min_inline = MLX5_INLINE_MODE_IP;
+                       *match_level = MLX5_MATCH_L3;
        }
 
        if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_IP)) {
@@ -1424,9 +1460,11 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
                        return -EOPNOTSUPP;
 
                if (mask->tos || mask->ttl)
-                       *min_inline = MLX5_INLINE_MODE_IP;
+                       *match_level = MLX5_MATCH_L3;
        }
 
+       /* ***  L3 attributes parsing up to here *** */
+
        if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_PORTS)) {
                struct flow_dissector_key_ports *key =
                        skb_flow_dissector_target(f->dissector,
@@ -1467,7 +1505,7 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
                }
 
                if (mask->src || mask->dst)
-                       *min_inline = MLX5_INLINE_MODE_TCP_UDP;
+                       *match_level = MLX5_MATCH_L4;
        }
 
        if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_TCP)) {
@@ -1486,7 +1524,7 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
                         ntohs(key->flags));
 
                if (mask->flags)
-                       *min_inline = MLX5_INLINE_MODE_TCP_UDP;
+                       *match_level = MLX5_MATCH_L4;
        }
 
        return 0;
@@ -1501,23 +1539,28 @@ static int parse_cls_flower(struct mlx5e_priv *priv,
        struct mlx5_eswitch *esw = dev->priv.eswitch;
        struct mlx5e_rep_priv *rpriv = priv->ppriv;
        struct mlx5_eswitch_rep *rep;
-       u8 min_inline;
+       u8 match_level;
        int err;
 
-       err = __parse_cls_flower(priv, spec, f, &min_inline);
+       err = __parse_cls_flower(priv, spec, f, &match_level);
 
        if (!err && (flow->flags & MLX5E_TC_FLOW_ESWITCH)) {
                rep = rpriv->rep;
                if (rep->vport != FDB_UPLINK_VPORT &&
                    (esw->offloads.inline_mode != MLX5_INLINE_MODE_NONE &&
-                   esw->offloads.inline_mode < min_inline)) {
+                   esw->offloads.inline_mode < match_level)) {
                        netdev_warn(priv->netdev,
                                    "Flow is not offloaded due to min inline setting, required %d actual %d\n",
-                                   min_inline, esw->offloads.inline_mode);
+                                   match_level, esw->offloads.inline_mode);
                        return -EOPNOTSUPP;
                }
        }
 
+       if (flow->flags & MLX5E_TC_FLOW_ESWITCH)
+               flow->esw_attr->match_level = match_level;
+       else
+               flow->nic_attr->match_level = match_level;
+
        return err;
 }
 
@@ -1573,7 +1616,6 @@ struct mlx5_fields {
                {MLX5_ACTION_IN_FIELD_OUT_ ## fw_field, size, offsetof(struct pedit_headers, field) + (off)}
 
 static struct mlx5_fields fields[] = {
-       OFFLOAD(DMAC_47_16, 4, eth.h_dest[0], 0),
        OFFLOAD(DMAC_47_16, 4, eth.h_dest[0], 0),
        OFFLOAD(DMAC_15_0,  2, eth.h_dest[4], 0),
        OFFLOAD(SMAC_47_16, 4, eth.h_source[0], 0),
@@ -1760,12 +1802,12 @@ static int parse_tc_pedit_action(struct mlx5e_priv *priv,
                err = -EOPNOTSUPP; /* can't be all optimistic */
 
                if (htype == TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK) {
-                       printk(KERN_WARNING "mlx5: legacy pedit isn't offloaded\n");
+                       netdev_warn(priv->netdev, "legacy pedit isn't offloaded\n");
                        goto out_err;
                }
 
                if (cmd != TCA_PEDIT_KEY_EX_CMD_SET && cmd != TCA_PEDIT_KEY_EX_CMD_ADD) {
-                       printk(KERN_WARNING "mlx5: pedit cmd %d isn't offloaded\n", cmd);
+                       netdev_warn(priv->netdev, "pedit cmd %d isn't offloaded\n", cmd);
                        goto out_err;
                }
 
@@ -1789,8 +1831,7 @@ static int parse_tc_pedit_action(struct mlx5e_priv *priv,
        for (cmd = 0; cmd < __PEDIT_CMD_MAX; cmd++) {
                cmd_masks = &masks[cmd];
                if (memcmp(cmd_masks, &zero_masks, sizeof(zero_masks))) {
-                       printk(KERN_WARNING "mlx5: attempt to offload an unsupported field (cmd %d)\n",
-                              cmd);
+                       netdev_warn(priv->netdev, "attempt to offload an unsupported field (cmd %d)\n", cmd);
                        print_hex_dump(KERN_WARNING, "mask: ", DUMP_PREFIX_ADDRESS,
                                       16, 1, cmd_masks, sizeof(zero_masks), true);
                        err = -EOPNOTSUPP;
@@ -1864,7 +1905,8 @@ static bool modify_header_match_supported(struct mlx5_flow_spec *spec,
        }
 
        ip_proto = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_protocol);
-       if (modify_ip_header && ip_proto != IPPROTO_TCP && ip_proto != IPPROTO_UDP) {
+       if (modify_ip_header && ip_proto != IPPROTO_TCP &&
+           ip_proto != IPPROTO_UDP && ip_proto != IPPROTO_ICMP) {
                pr_info("can't offload re-write of ip proto %d\n", ip_proto);
                return false;
        }
@@ -1912,21 +1954,21 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
        struct mlx5_nic_flow_attr *attr = flow->nic_attr;
        const struct tc_action *a;
        LIST_HEAD(actions);
+       u32 action = 0;
        int err;
 
        if (!tcf_exts_has_actions(exts))
                return -EINVAL;
 
        attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
-       attr->action = 0;
 
        tcf_exts_to_list(exts, &actions);
        list_for_each_entry(a, &actions, list) {
                if (is_tcf_gact_shot(a)) {
-                       attr->action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
+                       action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
                        if (MLX5_CAP_FLOWTABLE(priv->mdev,
                                               flow_table_properties_nic_receive.flow_counter))
-                               attr->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
+                               action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
                        continue;
                }
 
@@ -1936,13 +1978,13 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
                        if (err)
                                return err;
 
-                       attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR |
-                                       MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+                       action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR |
+                                 MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
                        continue;
                }
 
                if (is_tcf_csum(a)) {
-                       if (csum_offload_supported(priv, attr->action,
+                       if (csum_offload_supported(priv, action,
                                                   tcf_csum_update_flags(a)))
                                continue;
 
@@ -1956,8 +1998,8 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
                            same_hw_devs(priv, netdev_priv(peer_dev))) {
                                parse_attr->mirred_ifindex = peer_dev->ifindex;
                                flow->flags |= MLX5E_TC_FLOW_HAIRPIN;
-                               attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
-                                               MLX5_FLOW_CONTEXT_ACTION_COUNT;
+                               action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+                                         MLX5_FLOW_CONTEXT_ACTION_COUNT;
                        } else {
                                netdev_warn(priv->netdev, "device %s not on same HW, can't offload\n",
                                            peer_dev->name);
@@ -1976,13 +2018,14 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
                        }
 
                        attr->flow_tag = mark;
-                       attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+                       action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
                        continue;
                }
 
                return -EINVAL;
        }
 
+       attr->action = action;
        if (!actions_match_supported(priv, exts, parse_attr, flow))
                return -EOPNOTSUPP;
 
@@ -2039,6 +2082,20 @@ static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv,
        return 0;
 }
 
+static bool is_merged_eswitch_dev(struct mlx5e_priv *priv,
+                                 struct net_device *peer_netdev)
+{
+       struct mlx5e_priv *peer_priv;
+
+       peer_priv = netdev_priv(peer_netdev);
+
+       return (MLX5_CAP_ESW(priv->mdev, merged_eswitch) &&
+               (priv->netdev->netdev_ops == peer_netdev->netdev_ops) &&
+               same_hw_devs(priv, peer_priv) &&
+               MLX5_VPORT_MANAGER(peer_priv->mdev) &&
+               (peer_priv->mdev->priv.eswitch->mode == SRIOV_OFFLOADS));
+}
+
 static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv,
                                   struct net_device *mirred_dev,
                                   struct net_device **out_dev,
@@ -2454,34 +2511,36 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
        const struct tc_action *a;
        LIST_HEAD(actions);
        bool encap = false;
-       int err = 0;
+       u32 action = 0;
 
        if (!tcf_exts_has_actions(exts))
                return -EINVAL;
 
-       memset(attr, 0, sizeof(*attr));
        attr->in_rep = rpriv->rep;
+       attr->in_mdev = priv->mdev;
 
        tcf_exts_to_list(exts, &actions);
        list_for_each_entry(a, &actions, list) {
                if (is_tcf_gact_shot(a)) {
-                       attr->action |= MLX5_FLOW_CONTEXT_ACTION_DROP |
-                                       MLX5_FLOW_CONTEXT_ACTION_COUNT;
+                       action |= MLX5_FLOW_CONTEXT_ACTION_DROP |
+                                 MLX5_FLOW_CONTEXT_ACTION_COUNT;
                        continue;
                }
 
                if (is_tcf_pedit(a)) {
+                       int err;
+
                        err = parse_tc_pedit_action(priv, a, MLX5_FLOW_NAMESPACE_FDB,
                                                    parse_attr);
                        if (err)
                                return err;
 
-                       attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+                       action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
                        continue;
                }
 
                if (is_tcf_csum(a)) {
-                       if (csum_offload_supported(priv, attr->action,
+                       if (csum_offload_supported(priv, action,
                                                   tcf_csum_update_flags(a)))
                                continue;
 
@@ -2495,19 +2554,21 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
                        out_dev = tcf_mirred_dev(a);
 
                        if (switchdev_port_same_parent_id(priv->netdev,
-                                                         out_dev)) {
-                               attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
-                                       MLX5_FLOW_CONTEXT_ACTION_COUNT;
+                                                         out_dev) ||
+                           is_merged_eswitch_dev(priv, out_dev)) {
+                               action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+                                         MLX5_FLOW_CONTEXT_ACTION_COUNT;
                                out_priv = netdev_priv(out_dev);
                                rpriv = out_priv->ppriv;
                                attr->out_rep = rpriv->rep;
+                               attr->out_mdev = out_priv->mdev;
                        } else if (encap) {
                                parse_attr->mirred_ifindex = out_dev->ifindex;
                                parse_attr->tun_info = *info;
                                attr->parse_attr = parse_attr;
-                               attr->action |= MLX5_FLOW_CONTEXT_ACTION_ENCAP |
-                                       MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
-                                       MLX5_FLOW_CONTEXT_ACTION_COUNT;
+                               action |= MLX5_FLOW_CONTEXT_ACTION_ENCAP |
+                                         MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+                                         MLX5_FLOW_CONTEXT_ACTION_COUNT;
                                /* attr->out_rep is resolved when we handle encap */
                        } else {
                                pr_err("devices %s %s not on same switch HW, can't offload forwarding\n",
@@ -2528,9 +2589,9 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
 
                if (is_tcf_vlan(a)) {
                        if (tcf_vlan_action(a) == TCA_VLAN_ACT_POP) {
-                               attr->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
+                               action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
                        } else if (tcf_vlan_action(a) == TCA_VLAN_ACT_PUSH) {
-                               attr->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH;
+                               action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH;
                                attr->vlan_vid = tcf_vlan_push_vid(a);
                                if (mlx5_eswitch_vlan_actions_supported(priv->mdev)) {
                                        attr->vlan_prio = tcf_vlan_push_prio(a);
@@ -2548,34 +2609,74 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
                }
 
                if (is_tcf_tunnel_release(a)) {
-                       attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
+                       action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
                        continue;
                }
 
                return -EINVAL;
        }
 
+       attr->action = action;
        if (!actions_match_supported(priv, exts, parse_attr, flow))
                return -EOPNOTSUPP;
 
-       return err;
+       return 0;
+}
+
+static void get_flags(int flags, u8 *flow_flags)
+{
+       u8 __flow_flags = 0;
+
+       if (flags & MLX5E_TC_INGRESS)
+               __flow_flags |= MLX5E_TC_FLOW_INGRESS;
+       if (flags & MLX5E_TC_EGRESS)
+               __flow_flags |= MLX5E_TC_FLOW_EGRESS;
+
+       *flow_flags = __flow_flags;
+}
+
+static const struct rhashtable_params tc_ht_params = {
+       .head_offset = offsetof(struct mlx5e_tc_flow, node),
+       .key_offset = offsetof(struct mlx5e_tc_flow, cookie),
+       .key_len = sizeof(((struct mlx5e_tc_flow *)0)->cookie),
+       .automatic_shrinking = true,
+};
+
+static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv)
+{
+       struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+       struct mlx5e_rep_priv *uplink_rpriv;
+
+       if (MLX5_VPORT_MANAGER(priv->mdev) && esw->mode == SRIOV_OFFLOADS) {
+               uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+               return &uplink_rpriv->tc_ht;
+       } else
+               return &priv->fs.tc.ht;
 }
 
 int mlx5e_configure_flower(struct mlx5e_priv *priv,
-                          struct tc_cls_flower_offload *f)
+                          struct tc_cls_flower_offload *f, int flags)
 {
        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
        struct mlx5e_tc_flow_parse_attr *parse_attr;
-       struct mlx5e_tc_table *tc = &priv->fs.tc;
+       struct rhashtable *tc_ht = get_tc_ht(priv);
        struct mlx5e_tc_flow *flow;
        int attr_size, err = 0;
        u8 flow_flags = 0;
 
+       get_flags(flags, &flow_flags);
+
+       flow = rhashtable_lookup_fast(tc_ht, &f->cookie, tc_ht_params);
+       if (flow) {
+               netdev_warn_once(priv->netdev, "flow cookie %lx already exists, ignoring\n", f->cookie);
+               return 0;
+       }
+
        if (esw && esw->mode == SRIOV_OFFLOADS) {
-               flow_flags = MLX5E_TC_FLOW_ESWITCH;
+               flow_flags |= MLX5E_TC_FLOW_ESWITCH;
                attr_size  = sizeof(struct mlx5_esw_flow_attr);
        } else {
-               flow_flags = MLX5E_TC_FLOW_NIC;
+               flow_flags |= MLX5E_TC_FLOW_NIC;
                attr_size  = sizeof(struct mlx5_nic_flow_attr);
        }
 
@@ -2588,6 +2689,7 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv,
 
        flow->cookie = f->cookie;
        flow->flags = flow_flags;
+       flow->priv = priv;
 
        err = parse_cls_flower(priv, flow, &parse_attr->spec, f);
        if (err < 0)
@@ -2618,8 +2720,7 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv,
            !(flow->esw_attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP))
                kvfree(parse_attr);
 
-       err = rhashtable_insert_fast(&tc->ht, &flow->node,
-                                    tc->ht_params);
+       err = rhashtable_insert_fast(tc_ht, &flow->node, tc_ht_params);
        if (err) {
                mlx5e_tc_del_flow(priv, flow);
                kfree(flow);
@@ -2633,18 +2734,28 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv,
        return err;
 }
 
+#define DIRECTION_MASK (MLX5E_TC_INGRESS | MLX5E_TC_EGRESS)
+#define FLOW_DIRECTION_MASK (MLX5E_TC_FLOW_INGRESS | MLX5E_TC_FLOW_EGRESS)
+
+static bool same_flow_direction(struct mlx5e_tc_flow *flow, int flags)
+{
+       if ((flow->flags & FLOW_DIRECTION_MASK) == (flags & DIRECTION_MASK))
+               return true;
+
+       return false;
+}
+
 int mlx5e_delete_flower(struct mlx5e_priv *priv,
-                       struct tc_cls_flower_offload *f)
+                       struct tc_cls_flower_offload *f, int flags)
 {
+       struct rhashtable *tc_ht = get_tc_ht(priv);
        struct mlx5e_tc_flow *flow;
-       struct mlx5e_tc_table *tc = &priv->fs.tc;
 
-       flow = rhashtable_lookup_fast(&tc->ht, &f->cookie,
-                                     tc->ht_params);
-       if (!flow)
+       flow = rhashtable_lookup_fast(tc_ht, &f->cookie, tc_ht_params);
+       if (!flow || !same_flow_direction(flow, flags))
                return -EINVAL;
 
-       rhashtable_remove_fast(&tc->ht, &flow->node, tc->ht_params);
+       rhashtable_remove_fast(tc_ht, &flow->node, tc_ht_params);
 
        mlx5e_tc_del_flow(priv, flow);
 
@@ -2654,18 +2765,17 @@ int mlx5e_delete_flower(struct mlx5e_priv *priv,
 }
 
 int mlx5e_stats_flower(struct mlx5e_priv *priv,
-                      struct tc_cls_flower_offload *f)
+                      struct tc_cls_flower_offload *f, int flags)
 {
-       struct mlx5e_tc_table *tc = &priv->fs.tc;
+       struct rhashtable *tc_ht = get_tc_ht(priv);
        struct mlx5e_tc_flow *flow;
        struct mlx5_fc *counter;
        u64 bytes;
        u64 packets;
        u64 lastuse;
 
-       flow = rhashtable_lookup_fast(&tc->ht, &f->cookie,
-                                     tc->ht_params);
-       if (!flow)
+       flow = rhashtable_lookup_fast(tc_ht, &f->cookie, tc_ht_params);
+       if (!flow || !same_flow_direction(flow, flags))
                return -EINVAL;
 
        if (!(flow->flags & MLX5E_TC_FLOW_OFFLOADED))
@@ -2682,41 +2792,43 @@ int mlx5e_stats_flower(struct mlx5e_priv *priv,
        return 0;
 }
 
-static const struct rhashtable_params mlx5e_tc_flow_ht_params = {
-       .head_offset = offsetof(struct mlx5e_tc_flow, node),
-       .key_offset = offsetof(struct mlx5e_tc_flow, cookie),
-       .key_len = sizeof(((struct mlx5e_tc_flow *)0)->cookie),
-       .automatic_shrinking = true,
-};
-
-int mlx5e_tc_init(struct mlx5e_priv *priv)
+int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
 {
        struct mlx5e_tc_table *tc = &priv->fs.tc;
 
        hash_init(tc->mod_hdr_tbl);
        hash_init(tc->hairpin_tbl);
 
-       tc->ht_params = mlx5e_tc_flow_ht_params;
-       return rhashtable_init(&tc->ht, &tc->ht_params);
+       return rhashtable_init(&tc->ht, &tc_ht_params);
 }
 
 static void _mlx5e_tc_del_flow(void *ptr, void *arg)
 {
        struct mlx5e_tc_flow *flow = ptr;
-       struct mlx5e_priv *priv = arg;
+       struct mlx5e_priv *priv = flow->priv;
 
        mlx5e_tc_del_flow(priv, flow);
        kfree(flow);
 }
 
-void mlx5e_tc_cleanup(struct mlx5e_priv *priv)
+void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv)
 {
        struct mlx5e_tc_table *tc = &priv->fs.tc;
 
-       rhashtable_free_and_destroy(&tc->ht, _mlx5e_tc_del_flow, priv);
+       rhashtable_free_and_destroy(&tc->ht, _mlx5e_tc_del_flow, NULL);
 
        if (!IS_ERR_OR_NULL(tc->t)) {
                mlx5_destroy_flow_table(tc->t);
                tc->t = NULL;
        }
 }
+
+int mlx5e_tc_esw_init(struct rhashtable *tc_ht)
+{
+       return rhashtable_init(tc_ht, &tc_ht_params);
+}
+
+void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht)
+{
+       rhashtable_free_and_destroy(tc_ht, _mlx5e_tc_del_flow, NULL);
+}
index c14c263a739bddfc339744f99a0a5e5c117c19bd..59e52b845beb3e395210b4af3d92196d1b040de4 100644 (file)
 #define MLX5E_TC_FLOW_ID_MASK 0x0000ffff
 
 #ifdef CONFIG_MLX5_ESWITCH
-int mlx5e_tc_init(struct mlx5e_priv *priv);
-void mlx5e_tc_cleanup(struct mlx5e_priv *priv);
+
+enum {
+       MLX5E_TC_INGRESS = BIT(0),
+       MLX5E_TC_EGRESS  = BIT(1),
+       MLX5E_TC_LAST_EXPORTED_BIT = 1,
+};
+
+int mlx5e_tc_nic_init(struct mlx5e_priv *priv);
+void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv);
+
+int mlx5e_tc_esw_init(struct rhashtable *tc_ht);
+void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht);
 
 int mlx5e_configure_flower(struct mlx5e_priv *priv,
-                          struct tc_cls_flower_offload *f);
+                          struct tc_cls_flower_offload *f, int flags);
 int mlx5e_delete_flower(struct mlx5e_priv *priv,
-                       struct tc_cls_flower_offload *f);
+                       struct tc_cls_flower_offload *f, int flags);
 
 int mlx5e_stats_flower(struct mlx5e_priv *priv,
-                      struct tc_cls_flower_offload *f);
+                      struct tc_cls_flower_offload *f, int flags);
 
 struct mlx5e_encap_entry;
 void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
@@ -64,8 +74,8 @@ static inline int mlx5e_tc_num_filters(struct mlx5e_priv *priv)
 }
 
 #else /* CONFIG_MLX5_ESWITCH */
-static inline int  mlx5e_tc_init(struct mlx5e_priv *priv) { return 0; }
-static inline void mlx5e_tc_cleanup(struct mlx5e_priv *priv) {}
+static inline int  mlx5e_tc_nic_init(struct mlx5e_priv *priv) { return 0; }
+static inline void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) {}
 static inline int  mlx5e_tc_num_filters(struct mlx5e_priv *priv) { return 0; }
 #endif
 
index 20297108528a78681a2a6c393a9886b50621490f..2d3f17da5f5c3b54410917df494ff6860382a680 100644 (file)
 #include <net/dsfield.h>
 #include "en.h"
 #include "ipoib/ipoib.h"
-#include "en_accel/ipsec_rxtx.h"
+#include "en_accel/en_accel.h"
 #include "lib/clock.h"
 
 #define MLX5E_SQ_NOPS_ROOM  MLX5_SEND_WQE_MAX_WQEBBS
+
+#ifndef CONFIG_MLX5_EN_TLS
 #define MLX5E_SQ_STOP_ROOM (MLX5_SEND_WQE_MAX_WQEBBS +\
                            MLX5E_SQ_NOPS_ROOM)
+#else
+/* TLS offload requires MLX5E_SQ_STOP_ROOM to have
+ * enough room for a resync SKB, a normal SKB and a NOP
+ */
+#define MLX5E_SQ_STOP_ROOM (2 * MLX5_SEND_WQE_MAX_WQEBBS +\
+                           MLX5E_SQ_NOPS_ROOM)
+#endif
 
 static inline void mlx5e_tx_dma_unmap(struct device *pdev,
                                      struct mlx5e_sq_dma *dma)
@@ -255,7 +264,7 @@ mlx5e_txwqe_build_dsegs(struct mlx5e_txqsq *sq, struct sk_buff *skb,
                dma_addr = dma_map_single(sq->pdev, skb_data, headlen,
                                          DMA_TO_DEVICE);
                if (unlikely(dma_mapping_error(sq->pdev, dma_addr)))
-                       return -ENOMEM;
+                       goto dma_unmap_wqe_err;
 
                dseg->addr       = cpu_to_be64(dma_addr);
                dseg->lkey       = sq->mkey_be;
@@ -273,7 +282,7 @@ mlx5e_txwqe_build_dsegs(struct mlx5e_txqsq *sq, struct sk_buff *skb,
                dma_addr = skb_frag_dma_map(sq->pdev, frag, 0, fsz,
                                            DMA_TO_DEVICE);
                if (unlikely(dma_mapping_error(sq->pdev, dma_addr)))
-                       return -ENOMEM;
+                       goto dma_unmap_wqe_err;
 
                dseg->addr       = cpu_to_be64(dma_addr);
                dseg->lkey       = sq->mkey_be;
@@ -285,6 +294,10 @@ mlx5e_txwqe_build_dsegs(struct mlx5e_txqsq *sq, struct sk_buff *skb,
        }
 
        return num_dma;
+
+dma_unmap_wqe_err:
+       mlx5e_dma_unmap_wqe_err(sq, num_dma);
+       return -ENOMEM;
 }
 
 static inline void
@@ -325,8 +338,8 @@ mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb,
        }
 }
 
-static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
-                                struct mlx5e_tx_wqe *wqe, u16 pi)
+netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+                         struct mlx5e_tx_wqe *wqe, u16 pi)
 {
        struct mlx5e_tx_wqe_info *wi   = &sq->db.wqe_info[pi];
 
@@ -380,17 +393,15 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
        num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb_data, headlen,
                                          (struct mlx5_wqe_data_seg *)cseg + ds_cnt);
        if (unlikely(num_dma < 0))
-               goto dma_unmap_wqe_err;
+               goto err_drop;
 
        mlx5e_txwqe_complete(sq, skb, opcode, ds_cnt + num_dma,
                             num_bytes, num_dma, wi, cseg);
 
        return NETDEV_TX_OK;
 
-dma_unmap_wqe_err:
+err_drop:
        sq->stats.dropped++;
-       mlx5e_dma_unmap_wqe_err(sq, wi->num_dma);
-
        dev_kfree_skb_any(skb);
 
        return NETDEV_TX_OK;
@@ -399,21 +410,19 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
 netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev)
 {
        struct mlx5e_priv *priv = netdev_priv(dev);
-       struct mlx5e_txqsq *sq = priv->txq2sq[skb_get_queue_mapping(skb)];
-       struct mlx5_wq_cyc *wq = &sq->wq;
-       u16 pi = sq->pc & wq->sz_m1;
-       struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi);
+       struct mlx5e_tx_wqe *wqe;
+       struct mlx5e_txqsq *sq;
+       u16 pi;
 
-       memset(wqe, 0, sizeof(*wqe));
+       sq = priv->txq2sq[skb_get_queue_mapping(skb)];
+       mlx5e_sq_fetch_wqe(sq, &wqe, &pi);
 
-#ifdef CONFIG_MLX5_EN_IPSEC
-       if (sq->state & BIT(MLX5E_SQ_STATE_IPSEC)) {
-               skb = mlx5e_ipsec_handle_tx_skb(dev, wqe, skb);
-               if (unlikely(!skb))
-                       return NETDEV_TX_OK;
-       }
+#ifdef CONFIG_MLX5_ACCEL
+       /* might send skbs and update wqe and pi */
+       skb = mlx5e_accel_handle_tx(skb, sq, dev, &wqe, &pi);
+       if (unlikely(!skb))
+               return NETDEV_TX_OK;
 #endif
-
        return mlx5e_sq_xmit(sq, skb, wqe, pi);
 }
 
@@ -441,7 +450,7 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
 
        sq = container_of(cq, struct mlx5e_txqsq, cq);
 
-       if (unlikely(!MLX5E_TEST_BIT(sq->state, MLX5E_SQ_STATE_ENABLED)))
+       if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state)))
                return false;
 
        cqe = mlx5_cqwq_get_cqe(&cq->wq);
@@ -645,17 +654,15 @@ netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
        num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb_data, headlen,
                                          (struct mlx5_wqe_data_seg *)cseg + ds_cnt);
        if (unlikely(num_dma < 0))
-               goto dma_unmap_wqe_err;
+               goto err_drop;
 
        mlx5e_txwqe_complete(sq, skb, opcode, ds_cnt + num_dma,
                             num_bytes, num_dma, wi, cseg);
 
        return NETDEV_TX_OK;
 
-dma_unmap_wqe_err:
+err_drop:
        sq->stats.dropped++;
-       mlx5e_dma_unmap_wqe_err(sq, wi->num_dma);
-
        dev_kfree_skb_any(skb);
 
        return NETDEV_TX_OK;
index f292bb346985b7a8a84fad690a6005452e1ae22f..5d6f9ce2bf80ff65e3ff697341fc0e388046e10b 100644 (file)
@@ -44,6 +44,30 @@ static inline bool mlx5e_channel_no_affinity_change(struct mlx5e_channel *c)
        return cpumask_test_cpu(current_cpu, aff);
 }
 
+static void mlx5e_handle_tx_dim(struct mlx5e_txqsq *sq)
+{
+       struct net_dim_sample dim_sample;
+
+       if (unlikely(!test_bit(MLX5E_SQ_STATE_AM, &sq->state)))
+               return;
+
+       net_dim_sample(sq->cq.event_ctr, sq->stats.packets, sq->stats.bytes,
+                      &dim_sample);
+       net_dim(&sq->dim, dim_sample);
+}
+
+static void mlx5e_handle_rx_dim(struct mlx5e_rq *rq)
+{
+       struct net_dim_sample dim_sample;
+
+       if (unlikely(!test_bit(MLX5E_RQ_STATE_AM, &rq->state)))
+               return;
+
+       net_dim_sample(rq->cq.event_ctr, rq->stats.packets, rq->stats.bytes,
+                      &dim_sample);
+       net_dim(&rq->dim, dim_sample);
+}
+
 int mlx5e_napi_poll(struct napi_struct *napi, int budget)
 {
        struct mlx5e_channel *c = container_of(napi, struct mlx5e_channel,
@@ -75,18 +99,13 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
        if (unlikely(!napi_complete_done(napi, work_done)))
                return work_done;
 
-       for (i = 0; i < c->num_tc; i++)
+       for (i = 0; i < c->num_tc; i++) {
+               mlx5e_handle_tx_dim(&c->sq[i]);
                mlx5e_cq_arm(&c->sq[i].cq);
-
-       if (MLX5E_TEST_BIT(c->rq.state, MLX5E_RQ_STATE_AM)) {
-               struct net_dim_sample dim_sample;
-               net_dim_sample(c->rq.cq.event_ctr,
-                              c->rq.stats.packets,
-                              c->rq.stats.bytes,
-                              &dim_sample);
-               net_dim(&c->rq.dim, dim_sample);
        }
 
+       mlx5e_handle_rx_dim(&c->rq);
+
        mlx5e_cq_arm(&c->rq.cq);
        mlx5e_cq_arm(&c->icosq.cq);
 
index c1c94974e16b13b1d1e2cf7961601ffa4632d9f7..1814f803bd2cbf8b00baa4b9091cc4e954dd2304 100644 (file)
@@ -34,6 +34,9 @@
 #include <linux/module.h>
 #include <linux/mlx5/driver.h>
 #include <linux/mlx5/cmd.h>
+#ifdef CONFIG_RFS_ACCEL
+#include <linux/cpu_rmap.h>
+#endif
 #include "mlx5_core.h"
 #include "fpga/core.h"
 #include "eswitch.h"
@@ -923,3 +926,28 @@ int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
        MLX5_SET(query_eq_in, in, eq_number, eq->eqn);
        return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
 }
+
+/* This function should only be called after mlx5_cmd_force_teardown_hca */
+void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev)
+{
+       struct mlx5_eq_table *table = &dev->priv.eq_table;
+       struct mlx5_eq *eq;
+
+#ifdef CONFIG_RFS_ACCEL
+       if (dev->rmap) {
+               free_irq_cpu_rmap(dev->rmap);
+               dev->rmap = NULL;
+       }
+#endif
+       list_for_each_entry(eq, &table->comp_eqs_list, list)
+               free_irq(eq->irqn, eq);
+
+       free_irq(table->pages_eq.irqn, &table->pages_eq);
+       free_irq(table->async_eq.irqn, &table->async_eq);
+       free_irq(table->cmd_eq.irqn, &table->cmd_eq);
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+       if (MLX5_CAP_GEN(dev, pg))
+               free_irq(table->pfault_eq.irqn, &table->pfault_eq);
+#endif
+       pci_free_irq_vectors(dev->pdev);
+}
index 332bc56306bf5cb22b2a7283f80a3b2c1f4e9a10..09f0e11c6ffc9085a24b0f7d0e9034f0ef5323c1 100644 (file)
@@ -192,7 +192,7 @@ __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule,
        }
 
        dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
-       dest.vport_num = vport;
+       dest.vport.num = vport;
 
        esw_debug(esw->dev,
                  "\tFDB add rule dmac_v(%pM) dmac_c(%pM) -> vport(%d)\n",
@@ -2175,26 +2175,35 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
        memset(vf_stats, 0, sizeof(*vf_stats));
        vf_stats->rx_packets =
                MLX5_GET_CTR(out, received_eth_unicast.packets) +
+               MLX5_GET_CTR(out, received_ib_unicast.packets) +
                MLX5_GET_CTR(out, received_eth_multicast.packets) +
+               MLX5_GET_CTR(out, received_ib_multicast.packets) +
                MLX5_GET_CTR(out, received_eth_broadcast.packets);
 
        vf_stats->rx_bytes =
                MLX5_GET_CTR(out, received_eth_unicast.octets) +
+               MLX5_GET_CTR(out, received_ib_unicast.octets) +
                MLX5_GET_CTR(out, received_eth_multicast.octets) +
+               MLX5_GET_CTR(out, received_ib_multicast.octets) +
                MLX5_GET_CTR(out, received_eth_broadcast.octets);
 
        vf_stats->tx_packets =
                MLX5_GET_CTR(out, transmitted_eth_unicast.packets) +
+               MLX5_GET_CTR(out, transmitted_ib_unicast.packets) +
                MLX5_GET_CTR(out, transmitted_eth_multicast.packets) +
+               MLX5_GET_CTR(out, transmitted_ib_multicast.packets) +
                MLX5_GET_CTR(out, transmitted_eth_broadcast.packets);
 
        vf_stats->tx_bytes =
                MLX5_GET_CTR(out, transmitted_eth_unicast.octets) +
+               MLX5_GET_CTR(out, transmitted_ib_unicast.octets) +
                MLX5_GET_CTR(out, transmitted_eth_multicast.octets) +
+               MLX5_GET_CTR(out, transmitted_ib_multicast.octets) +
                MLX5_GET_CTR(out, transmitted_eth_broadcast.octets);
 
        vf_stats->multicast =
-               MLX5_GET_CTR(out, received_eth_multicast.packets);
+               MLX5_GET_CTR(out, received_eth_multicast.packets) +
+               MLX5_GET_CTR(out, received_ib_multicast.packets);
 
        vf_stats->broadcast =
                MLX5_GET_CTR(out, received_eth_broadcast.packets);
index 4cd773fa55e333f20dad15619a0509bdb3006f1a..f47a14e31b7d43a29f10d05fadd5a906bfd418d7 100644 (file)
@@ -227,9 +227,18 @@ enum {
        SET_VLAN_INSERT = BIT(1)
 };
 
+enum mlx5_flow_match_level {
+       MLX5_MATCH_NONE = MLX5_INLINE_MODE_NONE,
+       MLX5_MATCH_L2   = MLX5_INLINE_MODE_L2,
+       MLX5_MATCH_L3   = MLX5_INLINE_MODE_IP,
+       MLX5_MATCH_L4   = MLX5_INLINE_MODE_TCP_UDP,
+};
+
 struct mlx5_esw_flow_attr {
        struct mlx5_eswitch_rep *in_rep;
        struct mlx5_eswitch_rep *out_rep;
+       struct mlx5_core_dev    *out_mdev;
+       struct mlx5_core_dev    *in_mdev;
 
        int     action;
        __be16  vlan_proto;
@@ -238,6 +247,7 @@ struct mlx5_esw_flow_attr {
        bool    vlan_handled;
        u32     encap_id;
        u32     mod_hdr_id;
+       u8      match_level;
        struct mlx5e_tc_flow_parse_attr *parse_attr;
 };
 
index 35e256eb2f6e40de5cc4a78fed19327fafacec7c..b9ea464bcfa9afbbcf8f2d8aa4080cdab43ab619 100644 (file)
@@ -71,7 +71,12 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
 
        if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
                dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
-               dest[i].vport_num = attr->out_rep->vport;
+               dest[i].vport.num = attr->out_rep->vport;
+               if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) {
+                       dest[i].vport.vhca_id =
+                               MLX5_CAP_GEN(attr->out_mdev, vhca_id);
+                       dest[i].vport.vhca_id_valid = 1;
+               }
                i++;
        }
        if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
@@ -88,11 +93,23 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
        misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
        MLX5_SET(fte_match_set_misc, misc, source_port, attr->in_rep->vport);
 
+       if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
+               MLX5_SET(fte_match_set_misc, misc,
+                        source_eswitch_owner_vhca_id,
+                        MLX5_CAP_GEN(attr->in_mdev, vhca_id));
+
        misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
        MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+       if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
+               MLX5_SET_TO_ONES(fte_match_set_misc, misc,
+                                source_eswitch_owner_vhca_id);
+
+       if (attr->match_level == MLX5_MATCH_NONE)
+               spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
+       else
+               spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS |
+                                             MLX5_MATCH_MISC_PARAMETERS;
 
-       spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS |
-                                     MLX5_MATCH_MISC_PARAMETERS;
        if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DECAP)
                spec->match_criteria_enable |= MLX5_MATCH_INNER_HEADERS;
 
@@ -343,7 +360,7 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn
 
        spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
        dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
-       dest.vport_num = vport;
+       dest.vport.num = vport;
        flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
 
        flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.fdb, spec,
@@ -387,7 +404,7 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw)
        dmac_c[0] = 0x01;
 
        dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
-       dest.vport_num = 0;
+       dest.vport.num = 0;
        flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
 
        flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.fdb, spec,
@@ -663,7 +680,7 @@ static int esw_create_vport_rx_group(struct mlx5_eswitch *esw)
 
        esw->offloads.vport_rx_group = g;
 out:
-       kfree(flow_group_in);
+       kvfree(flow_group_in);
        return err;
 }
 
index 82405ed847255030d1c042420569d4c61b7a1c47..3e2355c8df3ffd3e6b2b5da4cb734d976f011528 100644 (file)
@@ -53,6 +53,7 @@ struct mlx5_fpga_device {
        } conn_res;
 
        struct mlx5_fpga_ipsec *ipsec;
+       struct mlx5_fpga_tls *tls;
 };
 
 #define mlx5_fpga_dbg(__adev, format, ...) \
index 0f5da499a22339fa11eb30fa73334b5d8ec4c039..3c4f1f326e132d655307ed22ad34d8acaa471060 100644 (file)
@@ -43,9 +43,6 @@
 #include "fpga/sdk.h"
 #include "fpga/core.h"
 
-#define SBU_QP_QUEUE_SIZE 8
-#define MLX5_FPGA_IPSEC_CMD_TIMEOUT_MSEC       (60 * 1000)
-
 enum mlx5_fpga_ipsec_cmd_status {
        MLX5_FPGA_IPSEC_CMD_PENDING,
        MLX5_FPGA_IPSEC_CMD_SEND_FAIL,
@@ -258,7 +255,7 @@ static int mlx5_fpga_ipsec_cmd_wait(void *ctx)
 {
        struct mlx5_fpga_ipsec_cmd_context *context = ctx;
        unsigned long timeout =
-               msecs_to_jiffies(MLX5_FPGA_IPSEC_CMD_TIMEOUT_MSEC);
+               msecs_to_jiffies(MLX5_FPGA_CMD_TIMEOUT_MSEC);
        int res;
 
        res = wait_for_completion_timeout(&context->complete, timeout);
index baa537e54a49296e3dd4658f45e1f23e0cb3dabd..a0573cc2fc9bc799fde4267e5707c2d140e3fcc8 100644 (file)
@@ -41,6 +41,8 @@
  * DOC: Innova SDK
  * This header defines the in-kernel API for Innova FPGA client drivers.
  */
+#define SBU_QP_QUEUE_SIZE 8
+#define MLX5_FPGA_CMD_TIMEOUT_MSEC (60 * 1000)
 
 enum mlx5_fpga_access_type {
        MLX5_FPGA_ACCESS_TYPE_I2C = 0x0,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c
new file mode 100644 (file)
index 0000000..2104801
--- /dev/null
@@ -0,0 +1,562 @@
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/mlx5/device.h>
+#include "fpga/tls.h"
+#include "fpga/cmd.h"
+#include "fpga/sdk.h"
+#include "fpga/core.h"
+#include "accel/tls.h"
+
+struct mlx5_fpga_tls_command_context;
+
+typedef void (*mlx5_fpga_tls_command_complete)
+       (struct mlx5_fpga_conn *conn, struct mlx5_fpga_device *fdev,
+        struct mlx5_fpga_tls_command_context *ctx,
+        struct mlx5_fpga_dma_buf *resp);
+
+struct mlx5_fpga_tls_command_context {
+       struct list_head list;
+       /* There is no guarantee on the order between the TX completion
+        * and the command response.
+        * The TX completion is going to touch cmd->buf even in
+        * the case of successful transmission.
+        * So instead of requiring separate allocations for cmd
+        * and cmd->buf we've decided to use a reference counter
+        */
+       refcount_t ref;
+       struct mlx5_fpga_dma_buf buf;
+       mlx5_fpga_tls_command_complete complete;
+};
+
+static void
+mlx5_fpga_tls_put_command_ctx(struct mlx5_fpga_tls_command_context *ctx)
+{
+       if (refcount_dec_and_test(&ctx->ref))
+               kfree(ctx);
+}
+
+static void mlx5_fpga_tls_cmd_complete(struct mlx5_fpga_device *fdev,
+                                      struct mlx5_fpga_dma_buf *resp)
+{
+       struct mlx5_fpga_conn *conn = fdev->tls->conn;
+       struct mlx5_fpga_tls_command_context *ctx;
+       struct mlx5_fpga_tls *tls = fdev->tls;
+       unsigned long flags;
+
+       spin_lock_irqsave(&tls->pending_cmds_lock, flags);
+       ctx = list_first_entry(&tls->pending_cmds,
+                              struct mlx5_fpga_tls_command_context, list);
+       list_del(&ctx->list);
+       spin_unlock_irqrestore(&tls->pending_cmds_lock, flags);
+       ctx->complete(conn, fdev, ctx, resp);
+}
+
+static void mlx5_fpga_cmd_send_complete(struct mlx5_fpga_conn *conn,
+                                       struct mlx5_fpga_device *fdev,
+                                       struct mlx5_fpga_dma_buf *buf,
+                                       u8 status)
+{
+       struct mlx5_fpga_tls_command_context *ctx =
+           container_of(buf, struct mlx5_fpga_tls_command_context, buf);
+
+       mlx5_fpga_tls_put_command_ctx(ctx);
+
+       if (unlikely(status))
+               mlx5_fpga_tls_cmd_complete(fdev, NULL);
+}
+
+static void mlx5_fpga_tls_cmd_send(struct mlx5_fpga_device *fdev,
+                                  struct mlx5_fpga_tls_command_context *cmd,
+                                  mlx5_fpga_tls_command_complete complete)
+{
+       struct mlx5_fpga_tls *tls = fdev->tls;
+       unsigned long flags;
+       int ret;
+
+       refcount_set(&cmd->ref, 2);
+       cmd->complete = complete;
+       cmd->buf.complete = mlx5_fpga_cmd_send_complete;
+
+       spin_lock_irqsave(&tls->pending_cmds_lock, flags);
+       /* mlx5_fpga_sbu_conn_sendmsg is called under pending_cmds_lock
+        * to make sure commands are inserted to the tls->pending_cmds list
+        * and the command QP in the same order.
+        */
+       ret = mlx5_fpga_sbu_conn_sendmsg(tls->conn, &cmd->buf);
+       if (likely(!ret))
+               list_add_tail(&cmd->list, &tls->pending_cmds);
+       else
+               complete(tls->conn, fdev, cmd, NULL);
+       spin_unlock_irqrestore(&tls->pending_cmds_lock, flags);
+}
+
+/* Start of context identifiers range (inclusive) */
+#define SWID_START     0
+/* End of context identifiers range (exclusive) */
+#define SWID_END       BIT(24)
+
+static int mlx5_fpga_tls_alloc_swid(struct idr *idr, spinlock_t *idr_spinlock,
+                                   void *ptr)
+{
+       int ret;
+
+       /* TLS metadata format is 1 byte for syndrome followed
+        * by 3 bytes of swid (software ID)
+        * swid must not exceed 3 bytes.
+        * See tls_rxtx.c:insert_pet() for details
+        */
+       BUILD_BUG_ON((SWID_END - 1) & 0xFF000000);
+
+       idr_preload(GFP_KERNEL);
+       spin_lock_irq(idr_spinlock);
+       ret = idr_alloc(idr, ptr, SWID_START, SWID_END, GFP_ATOMIC);
+       spin_unlock_irq(idr_spinlock);
+       idr_preload_end();
+
+       return ret;
+}
+
+static void mlx5_fpga_tls_release_swid(struct idr *idr,
+                                      spinlock_t *idr_spinlock, u32 swid)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(idr_spinlock, flags);
+       idr_remove(idr, swid);
+       spin_unlock_irqrestore(idr_spinlock, flags);
+}
+
+struct mlx5_teardown_stream_context {
+       struct mlx5_fpga_tls_command_context cmd;
+       u32 swid;
+};
+
+static void
+mlx5_fpga_tls_teardown_completion(struct mlx5_fpga_conn *conn,
+                                 struct mlx5_fpga_device *fdev,
+                                 struct mlx5_fpga_tls_command_context *cmd,
+                                 struct mlx5_fpga_dma_buf *resp)
+{
+       struct mlx5_teardown_stream_context *ctx =
+                   container_of(cmd, struct mlx5_teardown_stream_context, cmd);
+
+       if (resp) {
+               u32 syndrome = MLX5_GET(tls_resp, resp->sg[0].data, syndrome);
+
+               if (syndrome)
+                       mlx5_fpga_err(fdev,
+                                     "Teardown stream failed with syndrome = %d",
+                                     syndrome);
+               else
+                       mlx5_fpga_tls_release_swid(&fdev->tls->tx_idr,
+                                                  &fdev->tls->idr_spinlock,
+                                                  ctx->swid);
+       }
+       mlx5_fpga_tls_put_command_ctx(cmd);
+}
+
+static void mlx5_fpga_tls_flow_to_cmd(void *flow, void *cmd)
+{
+       memcpy(MLX5_ADDR_OF(tls_cmd, cmd, src_port), flow,
+              MLX5_BYTE_OFF(tls_flow, ipv6));
+
+       MLX5_SET(tls_cmd, cmd, ipv6, MLX5_GET(tls_flow, flow, ipv6));
+       MLX5_SET(tls_cmd, cmd, direction_sx,
+                MLX5_GET(tls_flow, flow, direction_sx));
+}
+
+void mlx5_fpga_tls_send_teardown_cmd(struct mlx5_core_dev *mdev, void *flow,
+                                    u32 swid, gfp_t flags)
+{
+       struct mlx5_teardown_stream_context *ctx;
+       struct mlx5_fpga_dma_buf *buf;
+       void *cmd;
+
+       ctx = kzalloc(sizeof(*ctx) + MLX5_TLS_COMMAND_SIZE, flags);
+       if (!ctx)
+               return;
+
+       buf = &ctx->cmd.buf;
+       cmd = (ctx + 1);
+       MLX5_SET(tls_cmd, cmd, command_type, CMD_TEARDOWN_STREAM);
+       MLX5_SET(tls_cmd, cmd, swid, swid);
+
+       mlx5_fpga_tls_flow_to_cmd(flow, cmd);
+       kfree(flow);
+
+       buf->sg[0].data = cmd;
+       buf->sg[0].size = MLX5_TLS_COMMAND_SIZE;
+
+       ctx->swid = swid;
+       mlx5_fpga_tls_cmd_send(mdev->fpga, &ctx->cmd,
+                              mlx5_fpga_tls_teardown_completion);
+}
+
+void mlx5_fpga_tls_del_tx_flow(struct mlx5_core_dev *mdev, u32 swid,
+                              gfp_t flags)
+{
+       struct mlx5_fpga_tls *tls = mdev->fpga->tls;
+       void *flow;
+
+       rcu_read_lock();
+       flow = idr_find(&tls->tx_idr, swid);
+       rcu_read_unlock();
+
+       if (!flow) {
+               mlx5_fpga_err(mdev->fpga, "No flow information for swid %u\n",
+                             swid);
+               return;
+       }
+
+       mlx5_fpga_tls_send_teardown_cmd(mdev, flow, swid, flags);
+}
+
+enum mlx5_fpga_setup_stream_status {
+       MLX5_FPGA_CMD_PENDING,
+       MLX5_FPGA_CMD_SEND_FAILED,
+       MLX5_FPGA_CMD_RESPONSE_RECEIVED,
+       MLX5_FPGA_CMD_ABANDONED,
+};
+
+struct mlx5_setup_stream_context {
+       struct mlx5_fpga_tls_command_context cmd;
+       atomic_t status;
+       u32 syndrome;
+       struct completion comp;
+};
+
+static void
+mlx5_fpga_tls_setup_completion(struct mlx5_fpga_conn *conn,
+                              struct mlx5_fpga_device *fdev,
+                              struct mlx5_fpga_tls_command_context *cmd,
+                              struct mlx5_fpga_dma_buf *resp)
+{
+       struct mlx5_setup_stream_context *ctx =
+           container_of(cmd, struct mlx5_setup_stream_context, cmd);
+       int status = MLX5_FPGA_CMD_SEND_FAILED;
+       void *tls_cmd = ctx + 1;
+
+       /* If we failed to send to command resp == NULL */
+       if (resp) {
+               ctx->syndrome = MLX5_GET(tls_resp, resp->sg[0].data, syndrome);
+               status = MLX5_FPGA_CMD_RESPONSE_RECEIVED;
+       }
+
+       status = atomic_xchg_release(&ctx->status, status);
+       if (likely(status != MLX5_FPGA_CMD_ABANDONED)) {
+               complete(&ctx->comp);
+               return;
+       }
+
+       mlx5_fpga_err(fdev, "Command was abandoned, syndrome = %u\n",
+                     ctx->syndrome);
+
+       if (!ctx->syndrome) {
+               /* The process was killed while waiting for the context to be
+                * added, and the add completed successfully.
+                * We need to destroy the HW context, and we can't can't reuse
+                * the command context because we might not have received
+                * the tx completion yet.
+                */
+               mlx5_fpga_tls_del_tx_flow(fdev->mdev,
+                                         MLX5_GET(tls_cmd, tls_cmd, swid),
+                                         GFP_ATOMIC);
+       }
+
+       mlx5_fpga_tls_put_command_ctx(cmd);
+}
+
+static int mlx5_fpga_tls_setup_stream_cmd(struct mlx5_core_dev *mdev,
+                                         struct mlx5_setup_stream_context *ctx)
+{
+       struct mlx5_fpga_dma_buf *buf;
+       void *cmd = ctx + 1;
+       int status, ret = 0;
+
+       buf = &ctx->cmd.buf;
+       buf->sg[0].data = cmd;
+       buf->sg[0].size = MLX5_TLS_COMMAND_SIZE;
+       MLX5_SET(tls_cmd, cmd, command_type, CMD_SETUP_STREAM);
+
+       init_completion(&ctx->comp);
+       atomic_set(&ctx->status, MLX5_FPGA_CMD_PENDING);
+       ctx->syndrome = -1;
+
+       mlx5_fpga_tls_cmd_send(mdev->fpga, &ctx->cmd,
+                              mlx5_fpga_tls_setup_completion);
+       wait_for_completion_killable(&ctx->comp);
+
+       status = atomic_xchg_acquire(&ctx->status, MLX5_FPGA_CMD_ABANDONED);
+       if (unlikely(status == MLX5_FPGA_CMD_PENDING))
+       /* ctx is going to be released in mlx5_fpga_tls_setup_completion */
+               return -EINTR;
+
+       if (unlikely(ctx->syndrome))
+               ret = -ENOMEM;
+
+       mlx5_fpga_tls_put_command_ctx(&ctx->cmd);
+       return ret;
+}
+
+static void mlx5_fpga_tls_hw_qp_recv_cb(void *cb_arg,
+                                       struct mlx5_fpga_dma_buf *buf)
+{
+       struct mlx5_fpga_device *fdev = (struct mlx5_fpga_device *)cb_arg;
+
+       mlx5_fpga_tls_cmd_complete(fdev, buf);
+}
+
+bool mlx5_fpga_is_tls_device(struct mlx5_core_dev *mdev)
+{
+       if (!mdev->fpga || !MLX5_CAP_GEN(mdev, fpga))
+               return false;
+
+       if (MLX5_CAP_FPGA(mdev, ieee_vendor_id) !=
+           MLX5_FPGA_CAP_SANDBOX_VENDOR_ID_MLNX)
+               return false;
+
+       if (MLX5_CAP_FPGA(mdev, sandbox_product_id) !=
+           MLX5_FPGA_CAP_SANDBOX_PRODUCT_ID_TLS)
+               return false;
+
+       if (MLX5_CAP_FPGA(mdev, sandbox_product_version) != 0)
+               return false;
+
+       return true;
+}
+
+static int mlx5_fpga_tls_get_caps(struct mlx5_fpga_device *fdev,
+                                 u32 *p_caps)
+{
+       int err, cap_size = MLX5_ST_SZ_BYTES(tls_extended_cap);
+       u32 caps = 0;
+       void *buf;
+
+       buf = kzalloc(cap_size, GFP_KERNEL);
+       if (!buf)
+               return -ENOMEM;
+
+       err = mlx5_fpga_get_sbu_caps(fdev, cap_size, buf);
+       if (err)
+               goto out;
+
+       if (MLX5_GET(tls_extended_cap, buf, tx))
+               caps |= MLX5_ACCEL_TLS_TX;
+       if (MLX5_GET(tls_extended_cap, buf, rx))
+               caps |= MLX5_ACCEL_TLS_RX;
+       if (MLX5_GET(tls_extended_cap, buf, tls_v12))
+               caps |= MLX5_ACCEL_TLS_V12;
+       if (MLX5_GET(tls_extended_cap, buf, tls_v13))
+               caps |= MLX5_ACCEL_TLS_V13;
+       if (MLX5_GET(tls_extended_cap, buf, lro))
+               caps |= MLX5_ACCEL_TLS_LRO;
+       if (MLX5_GET(tls_extended_cap, buf, ipv6))
+               caps |= MLX5_ACCEL_TLS_IPV6;
+
+       if (MLX5_GET(tls_extended_cap, buf, aes_gcm_128))
+               caps |= MLX5_ACCEL_TLS_AES_GCM128;
+       if (MLX5_GET(tls_extended_cap, buf, aes_gcm_256))
+               caps |= MLX5_ACCEL_TLS_AES_GCM256;
+
+       *p_caps = caps;
+       err = 0;
+out:
+       kfree(buf);
+       return err;
+}
+
+int mlx5_fpga_tls_init(struct mlx5_core_dev *mdev)
+{
+       struct mlx5_fpga_device *fdev = mdev->fpga;
+       struct mlx5_fpga_conn_attr init_attr = {0};
+       struct mlx5_fpga_conn *conn;
+       struct mlx5_fpga_tls *tls;
+       int err = 0;
+
+       if (!mlx5_fpga_is_tls_device(mdev) || !fdev)
+               return 0;
+
+       tls = kzalloc(sizeof(*tls), GFP_KERNEL);
+       if (!tls)
+               return -ENOMEM;
+
+       err = mlx5_fpga_tls_get_caps(fdev, &tls->caps);
+       if (err)
+               goto error;
+
+       if (!(tls->caps & (MLX5_ACCEL_TLS_TX | MLX5_ACCEL_TLS_V12 |
+                                MLX5_ACCEL_TLS_AES_GCM128))) {
+               err = -ENOTSUPP;
+               goto error;
+       }
+
+       init_attr.rx_size = SBU_QP_QUEUE_SIZE;
+       init_attr.tx_size = SBU_QP_QUEUE_SIZE;
+       init_attr.recv_cb = mlx5_fpga_tls_hw_qp_recv_cb;
+       init_attr.cb_arg = fdev;
+       conn = mlx5_fpga_sbu_conn_create(fdev, &init_attr);
+       if (IS_ERR(conn)) {
+               err = PTR_ERR(conn);
+               mlx5_fpga_err(fdev, "Error creating TLS command connection %d\n",
+                             err);
+               goto error;
+       }
+
+       tls->conn = conn;
+       spin_lock_init(&tls->pending_cmds_lock);
+       INIT_LIST_HEAD(&tls->pending_cmds);
+
+       idr_init(&tls->tx_idr);
+       spin_lock_init(&tls->idr_spinlock);
+       fdev->tls = tls;
+       return 0;
+
+error:
+       kfree(tls);
+       return err;
+}
+
+void mlx5_fpga_tls_cleanup(struct mlx5_core_dev *mdev)
+{
+       struct mlx5_fpga_device *fdev = mdev->fpga;
+
+       if (!fdev || !fdev->tls)
+               return;
+
+       mlx5_fpga_sbu_conn_destroy(fdev->tls->conn);
+       kfree(fdev->tls);
+       fdev->tls = NULL;
+}
+
+static void mlx5_fpga_tls_set_aes_gcm128_ctx(void *cmd,
+                                            struct tls_crypto_info *info,
+                                            __be64 *rcd_sn)
+{
+       struct tls12_crypto_info_aes_gcm_128 *crypto_info =
+           (struct tls12_crypto_info_aes_gcm_128 *)info;
+
+       memcpy(MLX5_ADDR_OF(tls_cmd, cmd, tls_rcd_sn), crypto_info->rec_seq,
+              TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE);
+
+       memcpy(MLX5_ADDR_OF(tls_cmd, cmd, tls_implicit_iv),
+              crypto_info->salt, TLS_CIPHER_AES_GCM_128_SALT_SIZE);
+       memcpy(MLX5_ADDR_OF(tls_cmd, cmd, encryption_key),
+              crypto_info->key, TLS_CIPHER_AES_GCM_128_KEY_SIZE);
+
+       /* in AES-GCM 128 we need to write the key twice */
+       memcpy(MLX5_ADDR_OF(tls_cmd, cmd, encryption_key) +
+                  TLS_CIPHER_AES_GCM_128_KEY_SIZE,
+              crypto_info->key, TLS_CIPHER_AES_GCM_128_KEY_SIZE);
+
+       MLX5_SET(tls_cmd, cmd, alg, MLX5_TLS_ALG_AES_GCM_128);
+}
+
+static int mlx5_fpga_tls_set_key_material(void *cmd, u32 caps,
+                                         struct tls_crypto_info *crypto_info)
+{
+       __be64 rcd_sn;
+
+       switch (crypto_info->cipher_type) {
+       case TLS_CIPHER_AES_GCM_128:
+               if (!(caps & MLX5_ACCEL_TLS_AES_GCM128))
+                       return -EINVAL;
+               mlx5_fpga_tls_set_aes_gcm128_ctx(cmd, crypto_info, &rcd_sn);
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static int mlx5_fpga_tls_add_flow(struct mlx5_core_dev *mdev, void *flow,
+                                 struct tls_crypto_info *crypto_info, u32 swid,
+                                 u32 tcp_sn)
+{
+       u32 caps = mlx5_fpga_tls_device_caps(mdev);
+       struct mlx5_setup_stream_context *ctx;
+       int ret = -ENOMEM;
+       size_t cmd_size;
+       void *cmd;
+
+       cmd_size = MLX5_TLS_COMMAND_SIZE + sizeof(*ctx);
+       ctx = kzalloc(cmd_size, GFP_KERNEL);
+       if (!ctx)
+               goto out;
+
+       cmd = ctx + 1;
+       ret = mlx5_fpga_tls_set_key_material(cmd, caps, crypto_info);
+       if (ret)
+               goto free_ctx;
+
+       mlx5_fpga_tls_flow_to_cmd(flow, cmd);
+
+       MLX5_SET(tls_cmd, cmd, swid, swid);
+       MLX5_SET(tls_cmd, cmd, tcp_sn, tcp_sn);
+
+       return mlx5_fpga_tls_setup_stream_cmd(mdev, ctx);
+
+free_ctx:
+       kfree(ctx);
+out:
+       return ret;
+}
+
+int mlx5_fpga_tls_add_tx_flow(struct mlx5_core_dev *mdev, void *flow,
+                             struct tls_crypto_info *crypto_info,
+                             u32 start_offload_tcp_sn, u32 *p_swid)
+{
+       struct mlx5_fpga_tls *tls = mdev->fpga->tls;
+       int ret = -ENOMEM;
+       u32 swid;
+
+       ret = mlx5_fpga_tls_alloc_swid(&tls->tx_idr, &tls->idr_spinlock, flow);
+       if (ret < 0)
+               return ret;
+
+       swid = ret;
+       MLX5_SET(tls_flow, flow, direction_sx, 1);
+
+       ret = mlx5_fpga_tls_add_flow(mdev, flow, crypto_info, swid,
+                                    start_offload_tcp_sn);
+       if (ret && ret != -EINTR)
+               goto free_swid;
+
+       *p_swid = swid;
+       return 0;
+free_swid:
+       mlx5_fpga_tls_release_swid(&tls->tx_idr, &tls->idr_spinlock, swid);
+
+       return ret;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.h
new file mode 100644 (file)
index 0000000..800a214
--- /dev/null
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __MLX5_FPGA_TLS_H__
+#define __MLX5_FPGA_TLS_H__
+
+#include <linux/mlx5/driver.h>
+
+#include <net/tls.h>
+#include "fpga/core.h"
+
+struct mlx5_fpga_tls {
+       struct list_head pending_cmds;
+       spinlock_t pending_cmds_lock; /* Protects pending_cmds */
+       u32 caps;
+       struct mlx5_fpga_conn *conn;
+
+       struct idr tx_idr;
+       spinlock_t idr_spinlock; /* protects the IDR */
+};
+
+int mlx5_fpga_tls_add_tx_flow(struct mlx5_core_dev *mdev, void *flow,
+                             struct tls_crypto_info *crypto_info,
+                             u32 start_offload_tcp_sn, u32 *p_swid);
+
+void mlx5_fpga_tls_del_tx_flow(struct mlx5_core_dev *mdev, u32 swid,
+                              gfp_t flags);
+
+bool mlx5_fpga_is_tls_device(struct mlx5_core_dev *mdev);
+int mlx5_fpga_tls_init(struct mlx5_core_dev *mdev);
+void mlx5_fpga_tls_cleanup(struct mlx5_core_dev *mdev);
+
+static inline u32 mlx5_fpga_tls_device_caps(struct mlx5_core_dev *mdev)
+{
+       return mdev->fpga->tls->caps;
+}
+
+#endif /* __MLX5_FPGA_TLS_H__ */
index ef5afd7c93259f5beaaec8f7255d5f7a4c14a3ea..5a00deff54576429460ee10fd25ea3bc62f28485 100644 (file)
@@ -372,6 +372,15 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
                        if (dst->dest_attr.type ==
                            MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) {
                                id = dst->dest_attr.ft->id;
+                       } else if (dst->dest_attr.type ==
+                                  MLX5_FLOW_DESTINATION_TYPE_VPORT) {
+                               id = dst->dest_attr.vport.num;
+                               MLX5_SET(dest_format_struct, in_dests,
+                                        destination_eswitch_owner_vhca_id_valid,
+                                        dst->dest_attr.vport.vhca_id_valid);
+                               MLX5_SET(dest_format_struct, in_dests,
+                                        destination_eswitch_owner_vhca_id,
+                                        dst->dest_attr.vport.vhca_id);
                        } else {
                                id = dst->dest_attr.tir_num;
                        }
index de51e7c39bc8b8ae02ea236afc9286b6e768cce5..806e95523f9e5eb964150a276b60816e079b4023 100644 (file)
@@ -187,6 +187,7 @@ static void del_sw_ns(struct fs_node *node);
 static void del_sw_hw_rule(struct fs_node *node);
 static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1,
                                struct mlx5_flow_destination *d2);
+static void cleanup_root_ns(struct mlx5_flow_root_namespace *root_ns);
 static struct mlx5_flow_rule *
 find_flow_rule(struct fs_fte *fte,
               struct mlx5_flow_destination *dest);
@@ -481,7 +482,8 @@ static void del_sw_hw_rule(struct fs_node *node)
 
        if (rule->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER  &&
            --fte->dests_size) {
-               modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION);
+               modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION) |
+                             BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_FLOW_COUNTERS);
                fte->action.action &= ~MLX5_FLOW_CONTEXT_ACTION_COUNT;
                update_fte = true;
                goto out;
@@ -1372,6 +1374,8 @@ static int create_auto_flow_group(struct mlx5_flow_table *ft,
        struct mlx5_core_dev *dev = get_dev(&ft->node);
        int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
        void *match_criteria_addr;
+       u8 src_esw_owner_mask_on;
+       void *misc;
        int err;
        u32 *in;
 
@@ -1384,6 +1388,14 @@ static int create_auto_flow_group(struct mlx5_flow_table *ft,
        MLX5_SET(create_flow_group_in, in, start_flow_index, fg->start_index);
        MLX5_SET(create_flow_group_in, in, end_flow_index,   fg->start_index +
                 fg->max_ftes - 1);
+
+       misc = MLX5_ADDR_OF(fte_match_param, fg->mask.match_criteria,
+                           misc_parameters);
+       src_esw_owner_mask_on = !!MLX5_GET(fte_match_set_misc, misc,
+                                        source_eswitch_owner_vhca_id);
+       MLX5_SET(create_flow_group_in, in,
+                source_eswitch_owner_vhca_id_valid, src_esw_owner_mask_on);
+
        match_criteria_addr = MLX5_ADDR_OF(create_flow_group_in,
                                           in, match_criteria);
        memcpy(match_criteria_addr, fg->mask.match_criteria,
@@ -1404,7 +1416,7 @@ static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1,
 {
        if (d1->type == d2->type) {
                if ((d1->type == MLX5_FLOW_DESTINATION_TYPE_VPORT &&
-                    d1->vport_num == d2->vport_num) ||
+                    d1->vport.num == d2->vport.num) ||
                    (d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE &&
                     d1->ft == d2->ft) ||
                    (d1->type == MLX5_FLOW_DESTINATION_TYPE_TIR &&
@@ -2351,23 +2363,27 @@ static int create_anchor_flow_table(struct mlx5_flow_steering *steering)
 
 static int init_root_ns(struct mlx5_flow_steering *steering)
 {
+       int err;
+
        steering->root_ns = create_root_ns(steering, FS_FT_NIC_RX);
        if (!steering->root_ns)
-               goto cleanup;
+               return -ENOMEM;
 
-       if (init_root_tree(steering, &root_fs, &steering->root_ns->ns.node))
-               goto cleanup;
+       err = init_root_tree(steering, &root_fs, &steering->root_ns->ns.node);
+       if (err)
+               goto out_err;
 
        set_prio_attrs(steering->root_ns);
-
-       if (create_anchor_flow_table(steering))
-               goto cleanup;
+       err = create_anchor_flow_table(steering);
+       if (err)
+               goto out_err;
 
        return 0;
 
-cleanup:
-       mlx5_cleanup_fs(steering->dev);
-       return -ENOMEM;
+out_err:
+       cleanup_root_ns(steering->root_ns);
+       steering->root_ns = NULL;
+       return err;
 }
 
 static void clean_tree(struct fs_node *node)
index 63a8ea31601cee77fe024d3a84f6e2baab7a8c22..615005e6381958c6f8289897993fda27a8693e25 100644 (file)
@@ -60,6 +60,7 @@
 #include "fpga/core.h"
 #include "fpga/ipsec.h"
 #include "accel/ipsec.h"
+#include "accel/tls.h"
 #include "lib/clock.h"
 
 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
@@ -1190,6 +1191,12 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
                goto err_ipsec_start;
        }
 
+       err = mlx5_accel_tls_init(dev);
+       if (err) {
+               dev_err(&pdev->dev, "TLS device start failed %d\n", err);
+               goto err_tls_start;
+       }
+
        err = mlx5_init_fs(dev);
        if (err) {
                dev_err(&pdev->dev, "Failed to init flow steering\n");
@@ -1231,6 +1238,9 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
        mlx5_cleanup_fs(dev);
 
 err_fs:
+       mlx5_accel_tls_cleanup(dev);
+
+err_tls_start:
        mlx5_accel_ipsec_cleanup(dev);
 
 err_ipsec_start:
@@ -1306,6 +1316,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
        mlx5_sriov_detach(dev);
        mlx5_cleanup_fs(dev);
        mlx5_accel_ipsec_cleanup(dev);
+       mlx5_accel_tls_cleanup(dev);
        mlx5_fpga_device_stop(dev);
        mlx5_irq_clear_affinity_hints(dev);
        free_comp_eqs(dev);
@@ -1587,6 +1598,14 @@ static int mlx5_try_fast_unload(struct mlx5_core_dev *dev)
 
        mlx5_enter_error_state(dev, true);
 
+       /* Some platforms requiring freeing the IRQ's in the shutdown
+        * flow. If they aren't freed they can't be allocated after
+        * kexec. There is no need to cleanup the mlx5_core software
+        * contexts.
+        */
+       mlx5_irq_clear_affinity_hints(dev);
+       mlx5_core_eq_free_irqs(dev);
+
        return 0;
 }
 
index 7d001fe6e63187fce56e20e0f94bea2417812d12..023882d9a22e59a7b172aee6a3260606932f4a03 100644 (file)
@@ -128,6 +128,8 @@ int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
                       u32 *out, int outlen);
 int mlx5_start_eqs(struct mlx5_core_dev *dev);
 void mlx5_stop_eqs(struct mlx5_core_dev *dev);
+/* This function should only be called after mlx5_cmd_force_teardown_hca */
+void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev);
 struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn);
 u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq *eq);
 void mlx5_cq_tasklet_cb(unsigned long data);
index b9736f505bdfc2198ad4baf696e4d4d79ee8f07d..f4f02f775c93869ba978ec6dc9be8dd62d7d99a9 100644 (file)
@@ -123,8 +123,8 @@ int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev,
        deleted_mkey = radix_tree_delete(&table->tree, mlx5_base_mkey(mkey->key));
        write_unlock_irqrestore(&table->lock, flags);
        if (!deleted_mkey) {
-               mlx5_core_warn(dev, "failed radix tree delete of mkey 0x%x\n",
-                              mlx5_base_mkey(mkey->key));
+               mlx5_core_dbg(dev, "failed radix tree delete of mkey 0x%x\n",
+                             mlx5_base_mkey(mkey->key));
                return -ENOENT;
        }
 
index 02d6c5b5d502adfa5e7c8f11ea13d774a9eb2409..4ca07bfb6b14f75760928e5df47ce911f8639f10 100644 (file)
@@ -407,21 +407,21 @@ static int modify_qp_mbox_alloc(struct mlx5_core_dev *dev, u16 opcode, int qpn,
        case MLX5_CMD_OP_RST2INIT_QP:
                if (MBOX_ALLOC(mbox, rst2init_qp))
                        return -ENOMEM;
-                MOD_QP_IN_SET_QPC(rst2init_qp, mbox->in, opcode, qpn,
-                                  opt_param_mask, qpc);
-                break;
+               MOD_QP_IN_SET_QPC(rst2init_qp, mbox->in, opcode, qpn,
+                                 opt_param_mask, qpc);
+               break;
        case MLX5_CMD_OP_INIT2RTR_QP:
                if (MBOX_ALLOC(mbox, init2rtr_qp))
                        return -ENOMEM;
-                MOD_QP_IN_SET_QPC(init2rtr_qp, mbox->in, opcode, qpn,
-                                  opt_param_mask, qpc);
-                break;
+               MOD_QP_IN_SET_QPC(init2rtr_qp, mbox->in, opcode, qpn,
+                                 opt_param_mask, qpc);
+               break;
        case MLX5_CMD_OP_RTR2RTS_QP:
                if (MBOX_ALLOC(mbox, rtr2rts_qp))
                        return -ENOMEM;
-                MOD_QP_IN_SET_QPC(rtr2rts_qp, mbox->in, opcode, qpn,
-                                  opt_param_mask, qpc);
-                break;
+               MOD_QP_IN_SET_QPC(rtr2rts_qp, mbox->in, opcode, qpn,
+                                 opt_param_mask, qpc);
+               break;
        case MLX5_CMD_OP_RTS2RTS_QP:
                if (MBOX_ALLOC(mbox, rts2rts_qp))
                        return -ENOMEM;
index 177e076b8d17f88d4f8b3a86692cf965bbc7333e..719cecb182c6c4eb5579eb1b36601acb6c0d0c5c 100644 (file)
@@ -511,7 +511,7 @@ int mlx5_query_nic_vport_system_image_guid(struct mlx5_core_dev *mdev,
        *system_image_guid = MLX5_GET64(query_nic_vport_context_out, out,
                                        nic_vport_context.system_image_guid);
 
-       kfree(out);
+       kvfree(out);
 
        return 0;
 }
@@ -531,7 +531,7 @@ int mlx5_query_nic_vport_node_guid(struct mlx5_core_dev *mdev, u64 *node_guid)
        *node_guid = MLX5_GET64(query_nic_vport_context_out, out,
                                nic_vport_context.node_guid);
 
-       kfree(out);
+       kvfree(out);
 
        return 0;
 }
@@ -587,7 +587,7 @@ int mlx5_query_nic_vport_qkey_viol_cntr(struct mlx5_core_dev *mdev,
        *qkey_viol_cntr = MLX5_GET(query_nic_vport_context_out, out,
                                   nic_vport_context.qkey_violation_counter);
 
-       kfree(out);
+       kvfree(out);
 
        return 0;
 }
index fca90b94596ded77643b1090feb7d0fe46aabab4..f3dfa0ca3c5dec67dcc1df900a6800282420343e 100644 (file)
@@ -38,7 +38,6 @@
 #include <linux/mlx5/qp.h>
 
 struct mlx5_wq_param {
-       int             linear;
        int             buf_numa_node;
        int             db_numa_node;
 };
index 479511cf79bc1ca3f02ec3dd1b8cb578416f1cc8..8da91b023b136b5c75947ee26023c3508ab896b9 100644 (file)
@@ -424,10 +424,15 @@ MLXSW_ITEM32(cmd_mbox, query_aq_cap, log_max_rdq_sz, 0x04, 24, 8);
 MLXSW_ITEM32(cmd_mbox, query_aq_cap, max_num_rdqs, 0x04, 0, 8);
 
 /* cmd_mbox_query_aq_cap_log_max_cq_sz
- * Log (base 2) of max CQEs allowed on CQ.
+ * Log (base 2) of the Maximum CQEs allowed in a CQ for CQEv0 and CQEv1.
  */
 MLXSW_ITEM32(cmd_mbox, query_aq_cap, log_max_cq_sz, 0x08, 24, 8);
 
+/* cmd_mbox_query_aq_cap_log_max_cqv2_sz
+ * Log (base 2) of the Maximum CQEs allowed in a CQ for CQEv2.
+ */
+MLXSW_ITEM32(cmd_mbox, query_aq_cap, log_max_cqv2_sz, 0x08, 16, 8);
+
 /* cmd_mbox_query_aq_cap_max_num_cqs
  * Maximum number of CQs.
  */
@@ -662,6 +667,12 @@ MLXSW_ITEM32(cmd_mbox, config_profile, set_kvd_hash_single_size, 0x0C, 25, 1);
  */
 MLXSW_ITEM32(cmd_mbox, config_profile, set_kvd_hash_double_size, 0x0C, 26, 1);
 
+/* cmd_mbox_config_set_cqe_version
+ * Capability bit. Setting a bit to 1 configures the profile
+ * according to the mailbox contents.
+ */
+MLXSW_ITEM32(cmd_mbox, config_profile, set_cqe_version, 0x08, 0, 1);
+
 /* cmd_mbox_config_profile_max_vepa_channels
  * Maximum number of VEPA channels per port (0 through 16)
  * 0 - multi-channel VEPA is disabled
@@ -841,6 +852,14 @@ MLXSW_ITEM32_INDEXED(cmd_mbox, config_profile, swid_config_type,
 MLXSW_ITEM32_INDEXED(cmd_mbox, config_profile, swid_config_properties,
                     0x60, 0, 8, 0x08, 0x00, false);
 
+/* cmd_mbox_config_profile_cqe_version
+ * CQE version:
+ * 0: CQE version is 0
+ * 1: CQE version is either 1 or 2
+ * CQE ver 1 or 2 is configured by Completion Queue Context field cqe_ver.
+ */
+MLXSW_ITEM32(cmd_mbox, config_profile, cqe_version, 0xB0, 0, 8);
+
 /* ACCESS_REG - Access EMAD Supported Register
  * ----------------------------------
  * OpMod == 0 (N/A), INMmod == 0 (N/A)
@@ -1032,11 +1051,15 @@ static inline int mlxsw_cmd_sw2hw_cq(struct mlxsw_core *mlxsw_core,
                                 0, cq_number, in_mbox, MLXSW_CMD_MBOX_SIZE);
 }
 
-/* cmd_mbox_sw2hw_cq_cv
+enum mlxsw_cmd_mbox_sw2hw_cq_cqe_ver {
+       MLXSW_CMD_MBOX_SW2HW_CQ_CQE_VER_1,
+       MLXSW_CMD_MBOX_SW2HW_CQ_CQE_VER_2,
+};
+
+/* cmd_mbox_sw2hw_cq_cqe_ver
  * CQE Version.
- * 0 - CQE Version 0, 1 - CQE Version 1
  */
-MLXSW_ITEM32(cmd_mbox, sw2hw_cq, cv, 0x00, 28, 4);
+MLXSW_ITEM32(cmd_mbox, sw2hw_cq, cqe_ver, 0x00, 28, 4);
 
 /* cmd_mbox_sw2hw_cq_c_eqn
  * Event Queue this CQ reports completion events to.
index 93ea56620a244ae8c129bfbc20e23d9ead374b64..a38faec45b30213dfa84e9b0b4642dcf28f3cabe 100644 (file)
@@ -1100,11 +1100,11 @@ int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
 err_alloc_lag_mapping:
        mlxsw_ports_fini(mlxsw_core);
 err_ports_init:
-       mlxsw_bus->fini(bus_priv);
-err_bus_init:
        if (!reload)
                devlink_resources_unregister(devlink, NULL);
 err_register_resources:
+       mlxsw_bus->fini(bus_priv);
+err_bus_init:
        if (!reload)
                devlink_free(devlink);
 err_devlink_alloc:
@@ -1714,15 +1714,16 @@ EXPORT_SYMBOL(mlxsw_core_port_fini);
 
 void mlxsw_core_port_eth_set(struct mlxsw_core *mlxsw_core, u8 local_port,
                             void *port_driver_priv, struct net_device *dev,
-                            bool split, u32 split_group)
+                            u32 port_number, bool split,
+                            u32 split_port_subnumber)
 {
        struct mlxsw_core_port *mlxsw_core_port =
                                        &mlxsw_core->ports[local_port];
        struct devlink_port *devlink_port = &mlxsw_core_port->devlink_port;
 
        mlxsw_core_port->port_driver_priv = port_driver_priv;
-       if (split)
-               devlink_port_split_set(devlink_port, split_group);
+       devlink_port_attrs_set(devlink_port, DEVLINK_PORT_FLAVOUR_PHYSICAL,
+                              port_number, split, split_port_subnumber);
        devlink_port_type_eth_set(devlink_port, dev);
 }
 EXPORT_SYMBOL(mlxsw_core_port_eth_set);
@@ -1762,6 +1763,17 @@ enum devlink_port_type mlxsw_core_port_type_get(struct mlxsw_core *mlxsw_core,
 }
 EXPORT_SYMBOL(mlxsw_core_port_type_get);
 
+int mlxsw_core_port_get_phys_port_name(struct mlxsw_core *mlxsw_core,
+                                      u8 local_port, char *name, size_t len)
+{
+       struct mlxsw_core_port *mlxsw_core_port =
+                                       &mlxsw_core->ports[local_port];
+       struct devlink_port *devlink_port = &mlxsw_core_port->devlink_port;
+
+       return devlink_port_get_phys_port_name(devlink_port, name, len);
+}
+EXPORT_SYMBOL(mlxsw_core_port_get_phys_port_name);
+
 static void mlxsw_core_buf_dump_dbg(struct mlxsw_core *mlxsw_core,
                                    const char *buf, size_t size)
 {
index 092d39399f3ce46aed8a84dddc5bca8d218a5837..4eac7fbd07d5d394b28acc53f133c41edb895921 100644 (file)
@@ -201,13 +201,16 @@ int mlxsw_core_port_init(struct mlxsw_core *mlxsw_core, u8 local_port);
 void mlxsw_core_port_fini(struct mlxsw_core *mlxsw_core, u8 local_port);
 void mlxsw_core_port_eth_set(struct mlxsw_core *mlxsw_core, u8 local_port,
                             void *port_driver_priv, struct net_device *dev,
-                            bool split, u32 split_group);
+                            u32 port_number, bool split,
+                            u32 split_port_subnumber);
 void mlxsw_core_port_ib_set(struct mlxsw_core *mlxsw_core, u8 local_port,
                            void *port_driver_priv);
 void mlxsw_core_port_clear(struct mlxsw_core *mlxsw_core, u8 local_port,
                           void *port_driver_priv);
 enum devlink_port_type mlxsw_core_port_type_get(struct mlxsw_core *mlxsw_core,
                                                u8 local_port);
+int mlxsw_core_port_get_phys_port_name(struct mlxsw_core *mlxsw_core,
+                                      u8 local_port, char *name, size_t len);
 
 int mlxsw_core_schedule_dw(struct delayed_work *dwork, unsigned long delay);
 bool mlxsw_core_schedule_work(struct work_struct *work);
index 3a9381977d6d397c2f7b55b60c91ba368624b2ec..db794a1a3a7e5046148dbc22c0ef51b441fedd2f 100644 (file)
@@ -117,6 +117,7 @@ struct mlxsw_pci_queue {
                struct {
                        u32 comp_sdq_count;
                        u32 comp_rdq_count;
+                       enum mlxsw_pci_cqe_v v;
                } cq;
                struct {
                        u32 ev_cmd_count;
@@ -155,6 +156,8 @@ struct mlxsw_pci {
        } cmd;
        struct mlxsw_bus_info bus_info;
        const struct pci_device_id *id;
+       enum mlxsw_pci_cqe_v max_cqe_ver; /* Maximal supported CQE version */
+       u8 num_sdq_cqs; /* Number of CQs used for SDQs */
 };
 
 static void mlxsw_pci_queue_tasklet_schedule(struct mlxsw_pci_queue *q)
@@ -202,24 +205,6 @@ static bool mlxsw_pci_elem_hw_owned(struct mlxsw_pci_queue *q, bool owner_bit)
        return owner_bit != !!(q->consumer_counter & q->count);
 }
 
-static char *
-mlxsw_pci_queue_sw_elem_get(struct mlxsw_pci_queue *q,
-                           u32 (*get_elem_owner_func)(const char *))
-{
-       struct mlxsw_pci_queue_elem_info *elem_info;
-       char *elem;
-       bool owner_bit;
-
-       elem_info = mlxsw_pci_queue_elem_info_consumer_get(q);
-       elem = elem_info->elem;
-       owner_bit = get_elem_owner_func(elem);
-       if (mlxsw_pci_elem_hw_owned(q, owner_bit))
-               return NULL;
-       q->consumer_counter++;
-       rmb(); /* make sure we read owned bit before the rest of elem */
-       return elem;
-}
-
 static struct mlxsw_pci_queue_type_group *
 mlxsw_pci_queue_type_group_get(struct mlxsw_pci *mlxsw_pci,
                               enum mlxsw_pci_queue_type q_type)
@@ -494,6 +479,17 @@ static void mlxsw_pci_rdq_fini(struct mlxsw_pci *mlxsw_pci,
        }
 }
 
+static void mlxsw_pci_cq_pre_init(struct mlxsw_pci *mlxsw_pci,
+                                 struct mlxsw_pci_queue *q)
+{
+       q->u.cq.v = mlxsw_pci->max_cqe_ver;
+
+       /* For SDQ it is pointless to use CQEv2, so use CQEv1 instead */
+       if (q->u.cq.v == MLXSW_PCI_CQE_V2 &&
+           q->num < mlxsw_pci->num_sdq_cqs)
+               q->u.cq.v = MLXSW_PCI_CQE_V1;
+}
+
 static int mlxsw_pci_cq_init(struct mlxsw_pci *mlxsw_pci, char *mbox,
                             struct mlxsw_pci_queue *q)
 {
@@ -505,10 +501,16 @@ static int mlxsw_pci_cq_init(struct mlxsw_pci *mlxsw_pci, char *mbox,
        for (i = 0; i < q->count; i++) {
                char *elem = mlxsw_pci_queue_elem_get(q, i);
 
-               mlxsw_pci_cqe_owner_set(elem, 1);
+               mlxsw_pci_cqe_owner_set(q->u.cq.v, elem, 1);
        }
 
-       mlxsw_cmd_mbox_sw2hw_cq_cv_set(mbox, 0); /* CQE ver 0 */
+       if (q->u.cq.v == MLXSW_PCI_CQE_V1)
+               mlxsw_cmd_mbox_sw2hw_cq_cqe_ver_set(mbox,
+                               MLXSW_CMD_MBOX_SW2HW_CQ_CQE_VER_1);
+       else if (q->u.cq.v == MLXSW_PCI_CQE_V2)
+               mlxsw_cmd_mbox_sw2hw_cq_cqe_ver_set(mbox,
+                               MLXSW_CMD_MBOX_SW2HW_CQ_CQE_VER_2);
+
        mlxsw_cmd_mbox_sw2hw_cq_c_eqn_set(mbox, MLXSW_PCI_EQ_COMP_NUM);
        mlxsw_cmd_mbox_sw2hw_cq_st_set(mbox, 0);
        mlxsw_cmd_mbox_sw2hw_cq_log_cq_size_set(mbox, ilog2(q->count));
@@ -559,7 +561,7 @@ static void mlxsw_pci_cqe_sdq_handle(struct mlxsw_pci *mlxsw_pci,
 static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci,
                                     struct mlxsw_pci_queue *q,
                                     u16 consumer_counter_limit,
-                                    char *cqe)
+                                    enum mlxsw_pci_cqe_v cqe_v, char *cqe)
 {
        struct pci_dev *pdev = mlxsw_pci->pdev;
        struct mlxsw_pci_queue_elem_info *elem_info;
@@ -579,10 +581,11 @@ static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci,
        if (q->consumer_counter++ != consumer_counter_limit)
                dev_dbg_ratelimited(&pdev->dev, "Consumer counter does not match limit in RDQ\n");
 
-       if (mlxsw_pci_cqe_lag_get(cqe)) {
+       if (mlxsw_pci_cqe_lag_get(cqe_v, cqe)) {
                rx_info.is_lag = true;
-               rx_info.u.lag_id = mlxsw_pci_cqe_lag_id_get(cqe);
-               rx_info.lag_port_index = mlxsw_pci_cqe_lag_port_index_get(cqe);
+               rx_info.u.lag_id = mlxsw_pci_cqe_lag_id_get(cqe_v, cqe);
+               rx_info.lag_port_index =
+                       mlxsw_pci_cqe_lag_subport_get(cqe_v, cqe);
        } else {
                rx_info.is_lag = false;
                rx_info.u.sys_port = mlxsw_pci_cqe_system_port_get(cqe);
@@ -591,7 +594,7 @@ static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci,
        rx_info.trap_id = mlxsw_pci_cqe_trap_id_get(cqe);
 
        byte_count = mlxsw_pci_cqe_byte_count_get(cqe);
-       if (mlxsw_pci_cqe_crc_get(cqe))
+       if (mlxsw_pci_cqe_crc_get(cqe_v, cqe))
                byte_count -= ETH_FCS_LEN;
        skb_put(skb, byte_count);
        mlxsw_core_skb_receive(mlxsw_pci->core, skb, &rx_info);
@@ -608,7 +611,18 @@ static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci,
 
 static char *mlxsw_pci_cq_sw_cqe_get(struct mlxsw_pci_queue *q)
 {
-       return mlxsw_pci_queue_sw_elem_get(q, mlxsw_pci_cqe_owner_get);
+       struct mlxsw_pci_queue_elem_info *elem_info;
+       char *elem;
+       bool owner_bit;
+
+       elem_info = mlxsw_pci_queue_elem_info_consumer_get(q);
+       elem = elem_info->elem;
+       owner_bit = mlxsw_pci_cqe_owner_get(q->u.cq.v, elem);
+       if (mlxsw_pci_elem_hw_owned(q, owner_bit))
+               return NULL;
+       q->consumer_counter++;
+       rmb(); /* make sure we read owned bit before the rest of elem */
+       return elem;
 }
 
 static void mlxsw_pci_cq_tasklet(unsigned long data)
@@ -621,8 +635,8 @@ static void mlxsw_pci_cq_tasklet(unsigned long data)
 
        while ((cqe = mlxsw_pci_cq_sw_cqe_get(q))) {
                u16 wqe_counter = mlxsw_pci_cqe_wqe_counter_get(cqe);
-               u8 sendq = mlxsw_pci_cqe_sr_get(cqe);
-               u8 dqn = mlxsw_pci_cqe_dqn_get(cqe);
+               u8 sendq = mlxsw_pci_cqe_sr_get(q->u.cq.v, cqe);
+               u8 dqn = mlxsw_pci_cqe_dqn_get(q->u.cq.v, cqe);
 
                if (sendq) {
                        struct mlxsw_pci_queue *sdq;
@@ -636,7 +650,7 @@ static void mlxsw_pci_cq_tasklet(unsigned long data)
 
                        rdq = mlxsw_pci_rdq_get(mlxsw_pci, dqn);
                        mlxsw_pci_cqe_rdq_handle(mlxsw_pci, rdq,
-                                                wqe_counter, cqe);
+                                                wqe_counter, q->u.cq.v, cqe);
                        q->u.cq.comp_rdq_count++;
                }
                if (++items == credits)
@@ -648,6 +662,18 @@ static void mlxsw_pci_cq_tasklet(unsigned long data)
        }
 }
 
+static u16 mlxsw_pci_cq_elem_count(const struct mlxsw_pci_queue *q)
+{
+       return q->u.cq.v == MLXSW_PCI_CQE_V2 ? MLXSW_PCI_CQE2_COUNT :
+                                              MLXSW_PCI_CQE01_COUNT;
+}
+
+static u8 mlxsw_pci_cq_elem_size(const struct mlxsw_pci_queue *q)
+{
+       return q->u.cq.v == MLXSW_PCI_CQE_V2 ? MLXSW_PCI_CQE2_SIZE :
+                                              MLXSW_PCI_CQE01_SIZE;
+}
+
 static int mlxsw_pci_eq_init(struct mlxsw_pci *mlxsw_pci, char *mbox,
                             struct mlxsw_pci_queue *q)
 {
@@ -696,7 +722,18 @@ static void mlxsw_pci_eq_cmd_event(struct mlxsw_pci *mlxsw_pci, char *eqe)
 
 static char *mlxsw_pci_eq_sw_eqe_get(struct mlxsw_pci_queue *q)
 {
-       return mlxsw_pci_queue_sw_elem_get(q, mlxsw_pci_eqe_owner_get);
+       struct mlxsw_pci_queue_elem_info *elem_info;
+       char *elem;
+       bool owner_bit;
+
+       elem_info = mlxsw_pci_queue_elem_info_consumer_get(q);
+       elem = elem_info->elem;
+       owner_bit = mlxsw_pci_eqe_owner_get(elem);
+       if (mlxsw_pci_elem_hw_owned(q, owner_bit))
+               return NULL;
+       q->consumer_counter++;
+       rmb(); /* make sure we read owned bit before the rest of elem */
+       return elem;
 }
 
 static void mlxsw_pci_eq_tasklet(unsigned long data)
@@ -749,11 +786,15 @@ static void mlxsw_pci_eq_tasklet(unsigned long data)
 struct mlxsw_pci_queue_ops {
        const char *name;
        enum mlxsw_pci_queue_type type;
+       void (*pre_init)(struct mlxsw_pci *mlxsw_pci,
+                        struct mlxsw_pci_queue *q);
        int (*init)(struct mlxsw_pci *mlxsw_pci, char *mbox,
                    struct mlxsw_pci_queue *q);
        void (*fini)(struct mlxsw_pci *mlxsw_pci,
                     struct mlxsw_pci_queue *q);
        void (*tasklet)(unsigned long data);
+       u16 (*elem_count_f)(const struct mlxsw_pci_queue *q);
+       u8 (*elem_size_f)(const struct mlxsw_pci_queue *q);
        u16 elem_count;
        u8 elem_size;
 };
@@ -776,11 +817,12 @@ static const struct mlxsw_pci_queue_ops mlxsw_pci_rdq_ops = {
 
 static const struct mlxsw_pci_queue_ops mlxsw_pci_cq_ops = {
        .type           = MLXSW_PCI_QUEUE_TYPE_CQ,
+       .pre_init       = mlxsw_pci_cq_pre_init,
        .init           = mlxsw_pci_cq_init,
        .fini           = mlxsw_pci_cq_fini,
        .tasklet        = mlxsw_pci_cq_tasklet,
-       .elem_count     = MLXSW_PCI_CQE_COUNT,
-       .elem_size      = MLXSW_PCI_CQE_SIZE
+       .elem_count_f   = mlxsw_pci_cq_elem_count,
+       .elem_size_f    = mlxsw_pci_cq_elem_size
 };
 
 static const struct mlxsw_pci_queue_ops mlxsw_pci_eq_ops = {
@@ -800,10 +842,15 @@ static int mlxsw_pci_queue_init(struct mlxsw_pci *mlxsw_pci, char *mbox,
        int i;
        int err;
 
-       spin_lock_init(&q->lock);
        q->num = q_num;
-       q->count = q_ops->elem_count;
-       q->elem_size = q_ops->elem_size;
+       if (q_ops->pre_init)
+               q_ops->pre_init(mlxsw_pci, q);
+
+       spin_lock_init(&q->lock);
+       q->count = q_ops->elem_count_f ? q_ops->elem_count_f(q) :
+                                        q_ops->elem_count;
+       q->elem_size = q_ops->elem_size_f ? q_ops->elem_size_f(q) :
+                                           q_ops->elem_size;
        q->type = q_ops->type;
        q->pci = mlxsw_pci;
 
@@ -832,7 +879,7 @@ static int mlxsw_pci_queue_init(struct mlxsw_pci *mlxsw_pci, char *mbox,
 
                elem_info = mlxsw_pci_queue_elem_info_get(q, i);
                elem_info->elem =
-                       __mlxsw_pci_queue_elem_get(q, q_ops->elem_size, i);
+                       __mlxsw_pci_queue_elem_get(q, q->elem_size, i);
        }
 
        mlxsw_cmd_mbox_zero(mbox);
@@ -912,6 +959,7 @@ static int mlxsw_pci_aqs_init(struct mlxsw_pci *mlxsw_pci, char *mbox)
        u8 rdq_log2sz;
        u8 num_cqs;
        u8 cq_log2sz;
+       u8 cqv2_log2sz;
        u8 num_eqs;
        u8 eq_log2sz;
        int err;
@@ -927,6 +975,7 @@ static int mlxsw_pci_aqs_init(struct mlxsw_pci *mlxsw_pci, char *mbox)
        rdq_log2sz = mlxsw_cmd_mbox_query_aq_cap_log_max_rdq_sz_get(mbox);
        num_cqs = mlxsw_cmd_mbox_query_aq_cap_max_num_cqs_get(mbox);
        cq_log2sz = mlxsw_cmd_mbox_query_aq_cap_log_max_cq_sz_get(mbox);
+       cqv2_log2sz = mlxsw_cmd_mbox_query_aq_cap_log_max_cqv2_sz_get(mbox);
        num_eqs = mlxsw_cmd_mbox_query_aq_cap_max_num_eqs_get(mbox);
        eq_log2sz = mlxsw_cmd_mbox_query_aq_cap_log_max_eq_sz_get(mbox);
 
@@ -938,12 +987,16 @@ static int mlxsw_pci_aqs_init(struct mlxsw_pci *mlxsw_pci, char *mbox)
 
        if ((1 << sdq_log2sz != MLXSW_PCI_WQE_COUNT) ||
            (1 << rdq_log2sz != MLXSW_PCI_WQE_COUNT) ||
-           (1 << cq_log2sz != MLXSW_PCI_CQE_COUNT) ||
+           (1 << cq_log2sz != MLXSW_PCI_CQE01_COUNT) ||
+           (mlxsw_pci->max_cqe_ver == MLXSW_PCI_CQE_V2 &&
+            (1 << cqv2_log2sz != MLXSW_PCI_CQE2_COUNT)) ||
            (1 << eq_log2sz != MLXSW_PCI_EQE_COUNT)) {
                dev_err(&pdev->dev, "Unsupported number of async queue descriptors\n");
                return -EINVAL;
        }
 
+       mlxsw_pci->num_sdq_cqs = num_sdqs;
+
        err = mlxsw_pci_queue_group_init(mlxsw_pci, mbox, &mlxsw_pci_eq_ops,
                                         num_eqs);
        if (err) {
@@ -1184,6 +1237,11 @@ static int mlxsw_pci_config_profile(struct mlxsw_pci *mlxsw_pci, char *mbox,
                mlxsw_pci_config_profile_swid_config(mlxsw_pci, mbox, i,
                                                     &profile->swid_config[i]);
 
+       if (mlxsw_pci->max_cqe_ver > MLXSW_PCI_CQE_V0) {
+               mlxsw_cmd_mbox_config_profile_set_cqe_version_set(mbox, 1);
+               mlxsw_cmd_mbox_config_profile_cqe_version_set(mbox, 1);
+       }
+
        return mlxsw_cmd_config_profile_set(mlxsw_pci->core, mbox);
 }
 
@@ -1378,6 +1436,21 @@ static int mlxsw_pci_init(void *bus_priv, struct mlxsw_core *mlxsw_core,
        if (err)
                goto err_query_resources;
 
+       if (MLXSW_CORE_RES_VALID(mlxsw_core, CQE_V2) &&
+           MLXSW_CORE_RES_GET(mlxsw_core, CQE_V2))
+               mlxsw_pci->max_cqe_ver = MLXSW_PCI_CQE_V2;
+       else if (MLXSW_CORE_RES_VALID(mlxsw_core, CQE_V1) &&
+                MLXSW_CORE_RES_GET(mlxsw_core, CQE_V1))
+               mlxsw_pci->max_cqe_ver = MLXSW_PCI_CQE_V1;
+       else if ((MLXSW_CORE_RES_VALID(mlxsw_core, CQE_V0) &&
+                 MLXSW_CORE_RES_GET(mlxsw_core, CQE_V0)) ||
+                !MLXSW_CORE_RES_VALID(mlxsw_core, CQE_V0)) {
+               mlxsw_pci->max_cqe_ver = MLXSW_PCI_CQE_V0;
+       } else {
+               dev_err(&pdev->dev, "Invalid supported CQE version combination reported\n");
+               goto err_cqe_v_check;
+       }
+
        err = mlxsw_pci_config_profile(mlxsw_pci, mbox, profile, res);
        if (err)
                goto err_config_profile;
@@ -1400,6 +1473,7 @@ static int mlxsw_pci_init(void *bus_priv, struct mlxsw_core *mlxsw_core,
        mlxsw_pci_aqs_fini(mlxsw_pci);
 err_aqs_init:
 err_config_profile:
+err_cqe_v_check:
 err_query_resources:
 err_boardinfo:
        mlxsw_pci_fw_area_fini(mlxsw_pci);
index fb082ad21b00e43435003e186eae92ff3d197a66..963155f6a17a5ad41a448f9ed0a1bf20d3f81c88 100644 (file)
 #define MLXSW_PCI_AQ_PAGES     8
 #define MLXSW_PCI_AQ_SIZE      (MLXSW_PCI_PAGE_SIZE * MLXSW_PCI_AQ_PAGES)
 #define MLXSW_PCI_WQE_SIZE     32 /* 32 bytes per element */
-#define MLXSW_PCI_CQE_SIZE     16 /* 16 bytes per element */
+#define MLXSW_PCI_CQE01_SIZE   16 /* 16 bytes per element */
+#define MLXSW_PCI_CQE2_SIZE    32 /* 32 bytes per element */
 #define MLXSW_PCI_EQE_SIZE     16 /* 16 bytes per element */
 #define MLXSW_PCI_WQE_COUNT    (MLXSW_PCI_AQ_SIZE / MLXSW_PCI_WQE_SIZE)
-#define MLXSW_PCI_CQE_COUNT    (MLXSW_PCI_AQ_SIZE / MLXSW_PCI_CQE_SIZE)
+#define MLXSW_PCI_CQE01_COUNT  (MLXSW_PCI_AQ_SIZE / MLXSW_PCI_CQE01_SIZE)
+#define MLXSW_PCI_CQE2_COUNT   (MLXSW_PCI_AQ_SIZE / MLXSW_PCI_CQE2_SIZE)
 #define MLXSW_PCI_EQE_COUNT    (MLXSW_PCI_AQ_SIZE / MLXSW_PCI_EQE_SIZE)
 #define MLXSW_PCI_EQE_UPDATE_COUNT     0x80
 
@@ -126,10 +128,48 @@ MLXSW_ITEM16_INDEXED(pci, wqe, byte_count, 0x02, 0, 14, 0x02, 0x00, false);
  */
 MLXSW_ITEM64_INDEXED(pci, wqe, address, 0x08, 0, 64, 0x8, 0x0, false);
 
+enum mlxsw_pci_cqe_v {
+       MLXSW_PCI_CQE_V0,
+       MLXSW_PCI_CQE_V1,
+       MLXSW_PCI_CQE_V2,
+};
+
+#define mlxsw_pci_cqe_item_helpers(name, v0, v1, v2)                           \
+static inline u32 mlxsw_pci_cqe_##name##_get(enum mlxsw_pci_cqe_v v, char *cqe)        \
+{                                                                              \
+       switch (v) {                                                            \
+       default:                                                                \
+       case MLXSW_PCI_CQE_V0:                                                  \
+               return mlxsw_pci_cqe##v0##_##name##_get(cqe);                   \
+       case MLXSW_PCI_CQE_V1:                                                  \
+               return mlxsw_pci_cqe##v1##_##name##_get(cqe);                   \
+       case MLXSW_PCI_CQE_V2:                                                  \
+               return mlxsw_pci_cqe##v2##_##name##_get(cqe);                   \
+       }                                                                       \
+}                                                                              \
+static inline void mlxsw_pci_cqe_##name##_set(enum mlxsw_pci_cqe_v v,          \
+                                             char *cqe, u32 val)               \
+{                                                                              \
+       switch (v) {                                                            \
+       default:                                                                \
+       case MLXSW_PCI_CQE_V0:                                                  \
+               mlxsw_pci_cqe##v0##_##name##_set(cqe, val);                     \
+               break;                                                          \
+       case MLXSW_PCI_CQE_V1:                                                  \
+               mlxsw_pci_cqe##v1##_##name##_set(cqe, val);                     \
+               break;                                                          \
+       case MLXSW_PCI_CQE_V2:                                                  \
+               mlxsw_pci_cqe##v2##_##name##_set(cqe, val);                     \
+               break;                                                          \
+       }                                                                       \
+}
+
 /* pci_cqe_lag
  * Packet arrives from a port which is a LAG
  */
-MLXSW_ITEM32(pci, cqe, lag, 0x00, 23, 1);
+MLXSW_ITEM32(pci, cqe0, lag, 0x00, 23, 1);
+MLXSW_ITEM32(pci, cqe12, lag, 0x00, 24, 1);
+mlxsw_pci_cqe_item_helpers(lag, 0, 12, 12);
 
 /* pci_cqe_system_port/lag_id
  * When lag=0: System port on which the packet was received
@@ -138,8 +178,12 @@ MLXSW_ITEM32(pci, cqe, lag, 0x00, 23, 1);
  * bits [3:0] sub_port on which the packet was received
  */
 MLXSW_ITEM32(pci, cqe, system_port, 0x00, 0, 16);
-MLXSW_ITEM32(pci, cqe, lag_id, 0x00, 4, 12);
-MLXSW_ITEM32(pci, cqe, lag_port_index, 0x00, 0, 4);
+MLXSW_ITEM32(pci, cqe0, lag_id, 0x00, 4, 12);
+MLXSW_ITEM32(pci, cqe12, lag_id, 0x00, 0, 16);
+mlxsw_pci_cqe_item_helpers(lag_id, 0, 12, 12);
+MLXSW_ITEM32(pci, cqe0, lag_subport, 0x00, 0, 4);
+MLXSW_ITEM32(pci, cqe12, lag_subport, 0x00, 16, 8);
+mlxsw_pci_cqe_item_helpers(lag_subport, 0, 12, 12);
 
 /* pci_cqe_wqe_counter
  * WQE count of the WQEs completed on the associated dqn
@@ -162,28 +206,38 @@ MLXSW_ITEM32(pci, cqe, trap_id, 0x08, 0, 9);
  * Length include CRC. Indicates the length field includes
  * the packet's CRC.
  */
-MLXSW_ITEM32(pci, cqe, crc, 0x0C, 8, 1);
+MLXSW_ITEM32(pci, cqe0, crc, 0x0C, 8, 1);
+MLXSW_ITEM32(pci, cqe12, crc, 0x0C, 9, 1);
+mlxsw_pci_cqe_item_helpers(crc, 0, 12, 12);
 
 /* pci_cqe_e
  * CQE with Error.
  */
-MLXSW_ITEM32(pci, cqe, e, 0x0C, 7, 1);
+MLXSW_ITEM32(pci, cqe0, e, 0x0C, 7, 1);
+MLXSW_ITEM32(pci, cqe12, e, 0x00, 27, 1);
+mlxsw_pci_cqe_item_helpers(e, 0, 12, 12);
 
 /* pci_cqe_sr
  * 1 - Send Queue
  * 0 - Receive Queue
  */
-MLXSW_ITEM32(pci, cqe, sr, 0x0C, 6, 1);
+MLXSW_ITEM32(pci, cqe0, sr, 0x0C, 6, 1);
+MLXSW_ITEM32(pci, cqe12, sr, 0x00, 26, 1);
+mlxsw_pci_cqe_item_helpers(sr, 0, 12, 12);
 
 /* pci_cqe_dqn
  * Descriptor Queue (DQ) Number.
  */
-MLXSW_ITEM32(pci, cqe, dqn, 0x0C, 1, 5);
+MLXSW_ITEM32(pci, cqe0, dqn, 0x0C, 1, 5);
+MLXSW_ITEM32(pci, cqe12, dqn, 0x0C, 1, 6);
+mlxsw_pci_cqe_item_helpers(dqn, 0, 12, 12);
 
 /* pci_cqe_owner
  * Ownership bit.
  */
-MLXSW_ITEM32(pci, cqe, owner, 0x0C, 0, 1);
+MLXSW_ITEM32(pci, cqe01, owner, 0x0C, 0, 1);
+MLXSW_ITEM32(pci, cqe2, owner, 0x1C, 0, 1);
+mlxsw_pci_cqe_item_helpers(owner, 01, 01, 2);
 
 /* pci_eqe_event_type
  * Event type.
index 6218231e379ee2dcf1ba79c55e5f79d107d9fd6f..3f4d7e22cece48ec4531018dd538807260dc8d6c 100644 (file)
@@ -6833,6 +6833,12 @@ enum mlxsw_reg_mpat_span_type {
         */
        MLXSW_REG_MPAT_SPAN_TYPE_LOCAL_ETH = 0x0,
 
+       /* Remote SPAN Ethernet VLAN.
+        * The packet is forwarded to the monitoring port on the monitoring
+        * VLAN.
+        */
+       MLXSW_REG_MPAT_SPAN_TYPE_REMOTE_ETH = 0x1,
+
        /* Encapsulated Remote SPAN Ethernet L3 GRE.
         * The packet is encapsulated with GRE header.
         */
index 087aad52c19578779ad19c64c2596a9f2c1b236c..fd9299ccec7212d896846a747a996a35af388403 100644 (file)
@@ -43,6 +43,9 @@ enum mlxsw_res_id {
        MLXSW_RES_ID_KVD_SINGLE_MIN_SIZE,
        MLXSW_RES_ID_KVD_DOUBLE_MIN_SIZE,
        MLXSW_RES_ID_MAX_TRAP_GROUPS,
+       MLXSW_RES_ID_CQE_V0,
+       MLXSW_RES_ID_CQE_V1,
+       MLXSW_RES_ID_CQE_V2,
        MLXSW_RES_ID_COUNTER_POOL_SIZE,
        MLXSW_RES_ID_MAX_SPAN,
        MLXSW_RES_ID_COUNTER_SIZE_PACKETS_BYTES,
@@ -81,6 +84,9 @@ static u16 mlxsw_res_ids[] = {
        [MLXSW_RES_ID_KVD_SINGLE_MIN_SIZE] = 0x1002,
        [MLXSW_RES_ID_KVD_DOUBLE_MIN_SIZE] = 0x1003,
        [MLXSW_RES_ID_MAX_TRAP_GROUPS] = 0x2201,
+       [MLXSW_RES_ID_CQE_V0] = 0x2210,
+       [MLXSW_RES_ID_CQE_V1] = 0x2211,
+       [MLXSW_RES_ID_CQE_V2] = 0x2212,
        [MLXSW_RES_ID_COUNTER_POOL_SIZE] = 0x2410,
        [MLXSW_RES_ID_MAX_SPAN] = 0x2420,
        [MLXSW_RES_ID_COUNTER_SIZE_PACKETS_BYTES] = 0x2443,
index ca38a30fbe913c6126f1dab6afad79fae193887b..bb252b36994d6e615a0bb6b8337f920c9ee19f17 100644 (file)
@@ -441,29 +441,29 @@ static void mlxsw_sp_txhdr_construct(struct sk_buff *skb,
        mlxsw_tx_hdr_type_set(txhdr, MLXSW_TXHDR_TYPE_CONTROL);
 }
 
-int mlxsw_sp_port_vid_stp_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid,
-                             u8 state)
+enum mlxsw_reg_spms_state mlxsw_sp_stp_spms_state(u8 state)
 {
-       struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
-       enum mlxsw_reg_spms_state spms_state;
-       char *spms_pl;
-       int err;
-
        switch (state) {
        case BR_STATE_FORWARDING:
-               spms_state = MLXSW_REG_SPMS_STATE_FORWARDING;
-               break;
+               return MLXSW_REG_SPMS_STATE_FORWARDING;
        case BR_STATE_LEARNING:
-               spms_state = MLXSW_REG_SPMS_STATE_LEARNING;
-               break;
+               return MLXSW_REG_SPMS_STATE_LEARNING;
        case BR_STATE_LISTENING: /* fall-through */
        case BR_STATE_DISABLED: /* fall-through */
        case BR_STATE_BLOCKING:
-               spms_state = MLXSW_REG_SPMS_STATE_DISCARDING;
-               break;
+               return MLXSW_REG_SPMS_STATE_DISCARDING;
        default:
                BUG();
        }
+}
+
+int mlxsw_sp_port_vid_stp_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid,
+                             u8 state)
+{
+       enum mlxsw_reg_spms_state spms_state = mlxsw_sp_stp_spms_state(state);
+       struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+       char *spms_pl;
+       int err;
 
        spms_pl = kmalloc(MLXSW_REG_SPMS_LEN, GFP_KERNEL);
        if (!spms_pl)
@@ -1238,21 +1238,10 @@ static int mlxsw_sp_port_get_phys_port_name(struct net_device *dev, char *name,
                                            size_t len)
 {
        struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
-       u8 module = mlxsw_sp_port->mapping.module;
-       u8 width = mlxsw_sp_port->mapping.width;
-       u8 lane = mlxsw_sp_port->mapping.lane;
-       int err;
-
-       if (!mlxsw_sp_port->split)
-               err = snprintf(name, len, "p%d", module + 1);
-       else
-               err = snprintf(name, len, "p%ds%d", module + 1,
-                              lane / width);
-
-       if (err >= len)
-               return -EINVAL;
 
-       return 0;
+       return mlxsw_core_port_get_phys_port_name(mlxsw_sp_port->mlxsw_sp->core,
+                                                 mlxsw_sp_port->local_port,
+                                                 name, len);
 }
 
 static struct mlxsw_sp_port_mall_tc_entry *
@@ -2927,8 +2916,8 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
        }
 
        mlxsw_core_port_eth_set(mlxsw_sp->core, mlxsw_sp_port->local_port,
-                               mlxsw_sp_port, dev, mlxsw_sp_port->split,
-                               module);
+                               mlxsw_sp_port, dev, module + 1,
+                               mlxsw_sp_port->split, lane / width);
        mlxsw_core_schedule_dw(&mlxsw_sp_port->periodic_hw_stats.update_dw, 0);
        return 0;
 
@@ -3666,6 +3655,15 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
                goto err_lag_init;
        }
 
+       /* Initialize SPAN before router and switchdev, so that those components
+        * can call mlxsw_sp_span_respin().
+        */
+       err = mlxsw_sp_span_init(mlxsw_sp);
+       if (err) {
+               dev_err(mlxsw_sp->bus_info->dev, "Failed to init span system\n");
+               goto err_span_init;
+       }
+
        err = mlxsw_sp_switchdev_init(mlxsw_sp);
        if (err) {
                dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize switchdev\n");
@@ -3684,15 +3682,6 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
                goto err_afa_init;
        }
 
-       err = mlxsw_sp_span_init(mlxsw_sp);
-       if (err) {
-               dev_err(mlxsw_sp->bus_info->dev, "Failed to init span system\n");
-               goto err_span_init;
-       }
-
-       /* Initialize router after SPAN is initialized, so that the FIB and
-        * neighbor event handlers can issue SPAN respin.
-        */
        err = mlxsw_sp_router_init(mlxsw_sp);
        if (err) {
                dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize router\n");
@@ -3739,14 +3728,14 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
 err_netdev_notifier:
        mlxsw_sp_router_fini(mlxsw_sp);
 err_router_init:
-       mlxsw_sp_span_fini(mlxsw_sp);
-err_span_init:
        mlxsw_sp_afa_fini(mlxsw_sp);
 err_afa_init:
        mlxsw_sp_counter_pool_fini(mlxsw_sp);
 err_counter_pool_init:
        mlxsw_sp_switchdev_fini(mlxsw_sp);
 err_switchdev_init:
+       mlxsw_sp_span_fini(mlxsw_sp);
+err_span_init:
        mlxsw_sp_lag_fini(mlxsw_sp);
 err_lag_init:
        mlxsw_sp_buffers_fini(mlxsw_sp);
@@ -3768,10 +3757,10 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core)
        mlxsw_sp_acl_fini(mlxsw_sp);
        unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb);
        mlxsw_sp_router_fini(mlxsw_sp);
-       mlxsw_sp_span_fini(mlxsw_sp);
        mlxsw_sp_afa_fini(mlxsw_sp);
        mlxsw_sp_counter_pool_fini(mlxsw_sp);
        mlxsw_sp_switchdev_fini(mlxsw_sp);
+       mlxsw_sp_span_fini(mlxsw_sp);
        mlxsw_sp_lag_fini(mlxsw_sp);
        mlxsw_sp_buffers_fini(mlxsw_sp);
        mlxsw_sp_traps_fini(mlxsw_sp);
index 804d4d2c80318310b09d8174c2ec82794e6e2865..4a519d8edec8fe0410e5548182a389848011b2d2 100644 (file)
@@ -364,6 +364,7 @@ int __mlxsw_sp_port_headroom_set(struct mlxsw_sp_port *mlxsw_sp_port, int mtu,
 int mlxsw_sp_port_ets_maxrate_set(struct mlxsw_sp_port *mlxsw_sp_port,
                                  enum mlxsw_reg_qeec_hr hr, u8 index,
                                  u8 next_index, u32 maxrate);
+enum mlxsw_reg_spms_state mlxsw_sp_stp_spms_state(u8 stp_state);
 int mlxsw_sp_port_vid_stp_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid,
                              u8 state);
 int mlxsw_sp_port_vp_mode_set(struct mlxsw_sp_port *mlxsw_sp_port, bool enable);
index 8e4edb634b1176b69da9baef152c35c4b13cbbac..8028d221aece87da388ce57270fe0ebdbf5096f5 100644 (file)
@@ -5882,24 +5882,24 @@ static int mlxsw_sp_router_fib_rule_event(unsigned long event,
        switch (info->family) {
        case AF_INET:
                if (!fib4_rule_default(rule) && !rule->l3mdev)
-                       err = -1;
+                       err = -EOPNOTSUPP;
                break;
        case AF_INET6:
                if (!fib6_rule_default(rule) && !rule->l3mdev)
-                       err = -1;
+                       err = -EOPNOTSUPP;
                break;
        case RTNL_FAMILY_IPMR:
                if (!ipmr_rule_default(rule) && !rule->l3mdev)
-                       err = -1;
+                       err = -EOPNOTSUPP;
                break;
        case RTNL_FAMILY_IP6MR:
                if (!ip6mr_rule_default(rule) && !rule->l3mdev)
-                       err = -1;
+                       err = -EOPNOTSUPP;
                break;
        }
 
        if (err < 0)
-               NL_SET_ERR_MSG_MOD(extack, "FIB rules not supported. Aborting offload");
+               NL_SET_ERR_MSG_MOD(extack, "FIB rules not supported");
 
        return err;
 }
@@ -5926,8 +5926,15 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
        case FIB_EVENT_RULE_DEL:
                err = mlxsw_sp_router_fib_rule_event(event, info,
                                                     router->mlxsw_sp);
-               if (!err)
-                       return NOTIFY_DONE;
+               if (!err || info->extack)
+                       return notifier_from_errno(err);
+               break;
+       case FIB_EVENT_ENTRY_ADD:
+               if (router->aborted) {
+                       NL_SET_ERR_MSG_MOD(info->extack, "FIB offload was aborted. Not configuring route");
+                       return notifier_from_errno(-EINVAL);
+               }
+               break;
        }
 
        fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
index 65a77708ff617b8f4b0714cfffd9ff8fa87c4999..da3f7f527360147addf627e1a12ac87eced09e78 100644 (file)
@@ -32,6 +32,7 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
+#include <linux/if_bridge.h>
 #include <linux/list.h>
 #include <net/arp.h>
 #include <net/gre.h>
@@ -39,8 +40,9 @@
 #include <net/ip6_tunnel.h>
 
 #include "spectrum.h"
-#include "spectrum_span.h"
 #include "spectrum_ipip.h"
+#include "spectrum_span.h"
+#include "spectrum_switchdev.h"
 
 int mlxsw_sp_span_init(struct mlxsw_sp *mlxsw_sp)
 {
@@ -135,14 +137,14 @@ struct mlxsw_sp_span_entry_ops mlxsw_sp_span_entry_ops_phys = {
 
 static int mlxsw_sp_span_dmac(struct neigh_table *tbl,
                              const void *pkey,
-                             struct net_device *l3edev,
+                             struct net_device *dev,
                              unsigned char dmac[ETH_ALEN])
 {
-       struct neighbour *neigh = neigh_lookup(tbl, pkey, l3edev);
+       struct neighbour *neigh = neigh_lookup(tbl, pkey, dev);
        int err = 0;
 
        if (!neigh) {
-               neigh = neigh_create(tbl, pkey, l3edev);
+               neigh = neigh_create(tbl, pkey, dev);
                if (IS_ERR(neigh))
                        return PTR_ERR(neigh);
        }
@@ -167,8 +169,97 @@ mlxsw_sp_span_entry_unoffloadable(struct mlxsw_sp_span_parms *sparmsp)
        return 0;
 }
 
+static struct net_device *
+mlxsw_sp_span_entry_bridge_8021q(const struct net_device *br_dev,
+                                unsigned char *dmac,
+                                u16 *p_vid)
+{
+       struct bridge_vlan_info vinfo;
+       struct net_device *edev;
+       u16 vid = *p_vid;
+
+       if (!vid && WARN_ON(br_vlan_get_pvid(br_dev, &vid)))
+               return NULL;
+       if (!vid ||
+           br_vlan_get_info(br_dev, vid, &vinfo) ||
+           !(vinfo.flags & BRIDGE_VLAN_INFO_BRENTRY))
+               return NULL;
+
+       edev = br_fdb_find_port(br_dev, dmac, vid);
+       if (!edev)
+               return NULL;
+
+       if (br_vlan_get_info(edev, vid, &vinfo))
+               return NULL;
+       if (!(vinfo.flags & BRIDGE_VLAN_INFO_UNTAGGED))
+               *p_vid = vid;
+       return edev;
+}
+
+static struct net_device *
+mlxsw_sp_span_entry_bridge_8021d(const struct net_device *br_dev,
+                                unsigned char *dmac)
+{
+       return br_fdb_find_port(br_dev, dmac, 0);
+}
+
+static struct net_device *
+mlxsw_sp_span_entry_bridge(const struct net_device *br_dev,
+                          unsigned char dmac[ETH_ALEN],
+                          u16 *p_vid)
+{
+       struct mlxsw_sp_bridge_port *bridge_port;
+       enum mlxsw_reg_spms_state spms_state;
+       struct net_device *dev = NULL;
+       struct mlxsw_sp_port *port;
+       u8 stp_state;
+
+       if (br_vlan_enabled(br_dev))
+               dev = mlxsw_sp_span_entry_bridge_8021q(br_dev, dmac, p_vid);
+       else if (!*p_vid)
+               dev = mlxsw_sp_span_entry_bridge_8021d(br_dev, dmac);
+       if (!dev)
+               return NULL;
+
+       port = mlxsw_sp_port_dev_lower_find(dev);
+       if (!port)
+               return NULL;
+
+       bridge_port = mlxsw_sp_bridge_port_find(port->mlxsw_sp->bridge, dev);
+       if (!bridge_port)
+               return NULL;
+
+       stp_state = mlxsw_sp_bridge_port_stp_state(bridge_port);
+       spms_state = mlxsw_sp_stp_spms_state(stp_state);
+       if (spms_state != MLXSW_REG_SPMS_STATE_FORWARDING)
+               return NULL;
+
+       return dev;
+}
+
+static struct net_device *
+mlxsw_sp_span_entry_vlan(const struct net_device *vlan_dev,
+                        u16 *p_vid)
+{
+       *p_vid = vlan_dev_vlan_id(vlan_dev);
+       return vlan_dev_real_dev(vlan_dev);
+}
+
+static struct net_device *
+mlxsw_sp_span_entry_lag(struct net_device *lag_dev)
+{
+       struct net_device *dev;
+       struct list_head *iter;
+
+       netdev_for_each_lower_dev(lag_dev, dev, iter)
+               if ((dev->flags & IFF_UP) && mlxsw_sp_port_dev_check(dev))
+                       return dev;
+
+       return NULL;
+}
+
 static __maybe_unused int
-mlxsw_sp_span_entry_tunnel_parms_common(struct net_device *l3edev,
+mlxsw_sp_span_entry_tunnel_parms_common(struct net_device *edev,
                                        union mlxsw_sp_l3addr saddr,
                                        union mlxsw_sp_l3addr daddr,
                                        union mlxsw_sp_l3addr gw,
@@ -177,21 +268,51 @@ mlxsw_sp_span_entry_tunnel_parms_common(struct net_device *l3edev,
                                        struct mlxsw_sp_span_parms *sparmsp)
 {
        unsigned char dmac[ETH_ALEN];
+       u16 vid = 0;
 
        if (mlxsw_sp_l3addr_is_zero(gw))
                gw = daddr;
 
-       if (!l3edev || !mlxsw_sp_port_dev_check(l3edev) ||
-           mlxsw_sp_span_dmac(tbl, &gw, l3edev, dmac))
-               return mlxsw_sp_span_entry_unoffloadable(sparmsp);
+       if (!edev || mlxsw_sp_span_dmac(tbl, &gw, edev, dmac))
+               goto unoffloadable;
+
+       if (is_vlan_dev(edev))
+               edev = mlxsw_sp_span_entry_vlan(edev, &vid);
 
-       sparmsp->dest_port = netdev_priv(l3edev);
+       if (netif_is_bridge_master(edev)) {
+               edev = mlxsw_sp_span_entry_bridge(edev, dmac, &vid);
+               if (!edev)
+                       goto unoffloadable;
+       }
+
+       if (is_vlan_dev(edev)) {
+               if (vid || !(edev->flags & IFF_UP))
+                       goto unoffloadable;
+               edev = mlxsw_sp_span_entry_vlan(edev, &vid);
+       }
+
+       if (netif_is_lag_master(edev)) {
+               if (!(edev->flags & IFF_UP))
+                       goto unoffloadable;
+               edev = mlxsw_sp_span_entry_lag(edev);
+               if (!edev)
+                       goto unoffloadable;
+       }
+
+       if (!mlxsw_sp_port_dev_check(edev))
+               goto unoffloadable;
+
+       sparmsp->dest_port = netdev_priv(edev);
        sparmsp->ttl = ttl;
        memcpy(sparmsp->dmac, dmac, ETH_ALEN);
-       memcpy(sparmsp->smac, l3edev->dev_addr, ETH_ALEN);
+       memcpy(sparmsp->smac, edev->dev_addr, ETH_ALEN);
        sparmsp->saddr = saddr;
        sparmsp->daddr = daddr;
+       sparmsp->vid = vid;
        return 0;
+
+unoffloadable:
+       return mlxsw_sp_span_entry_unoffloadable(sparmsp);
 }
 
 #if IS_ENABLED(CONFIG_NET_IPGRE)
@@ -268,9 +389,10 @@ mlxsw_sp_span_entry_gretap4_configure(struct mlxsw_sp_span_entry *span_entry,
        /* Create a new port analayzer entry for local_port. */
        mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, true,
                            MLXSW_REG_MPAT_SPAN_TYPE_REMOTE_ETH_L3);
+       mlxsw_reg_mpat_eth_rspan_pack(mpat_pl, sparms.vid);
        mlxsw_reg_mpat_eth_rspan_l2_pack(mpat_pl,
                                    MLXSW_REG_MPAT_ETH_RSPAN_VERSION_NO_HEADER,
-                                   sparms.dmac, false);
+                                   sparms.dmac, !!sparms.vid);
        mlxsw_reg_mpat_eth_rspan_l3_ipv4_pack(mpat_pl,
                                              sparms.ttl, sparms.smac,
                                              be32_to_cpu(sparms.saddr.addr4),
@@ -368,9 +490,10 @@ mlxsw_sp_span_entry_gretap6_configure(struct mlxsw_sp_span_entry *span_entry,
        /* Create a new port analayzer entry for local_port. */
        mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, true,
                            MLXSW_REG_MPAT_SPAN_TYPE_REMOTE_ETH_L3);
+       mlxsw_reg_mpat_eth_rspan_pack(mpat_pl, sparms.vid);
        mlxsw_reg_mpat_eth_rspan_l2_pack(mpat_pl,
                                    MLXSW_REG_MPAT_ETH_RSPAN_VERSION_NO_HEADER,
-                                   sparms.dmac, false);
+                                   sparms.dmac, !!sparms.vid);
        mlxsw_reg_mpat_eth_rspan_l3_ipv6_pack(mpat_pl, sparms.ttl, sparms.smac,
                                              sparms.saddr.addr6,
                                              sparms.daddr.addr6);
@@ -394,6 +517,61 @@ struct mlxsw_sp_span_entry_ops mlxsw_sp_span_entry_ops_gretap6 = {
 };
 #endif
 
+static bool
+mlxsw_sp_span_vlan_can_handle(const struct net_device *dev)
+{
+       return is_vlan_dev(dev) &&
+              mlxsw_sp_port_dev_check(vlan_dev_real_dev(dev));
+}
+
+static int
+mlxsw_sp_span_entry_vlan_parms(const struct net_device *to_dev,
+                              struct mlxsw_sp_span_parms *sparmsp)
+{
+       struct net_device *real_dev;
+       u16 vid;
+
+       if (!(to_dev->flags & IFF_UP))
+               return mlxsw_sp_span_entry_unoffloadable(sparmsp);
+
+       real_dev = mlxsw_sp_span_entry_vlan(to_dev, &vid);
+       sparmsp->dest_port = netdev_priv(real_dev);
+       sparmsp->vid = vid;
+       return 0;
+}
+
+static int
+mlxsw_sp_span_entry_vlan_configure(struct mlxsw_sp_span_entry *span_entry,
+                                  struct mlxsw_sp_span_parms sparms)
+{
+       struct mlxsw_sp_port *dest_port = sparms.dest_port;
+       struct mlxsw_sp *mlxsw_sp = dest_port->mlxsw_sp;
+       u8 local_port = dest_port->local_port;
+       char mpat_pl[MLXSW_REG_MPAT_LEN];
+       int pa_id = span_entry->id;
+
+       mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, true,
+                           MLXSW_REG_MPAT_SPAN_TYPE_REMOTE_ETH);
+       mlxsw_reg_mpat_eth_rspan_pack(mpat_pl, sparms.vid);
+
+       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
+}
+
+static void
+mlxsw_sp_span_entry_vlan_deconfigure(struct mlxsw_sp_span_entry *span_entry)
+{
+       mlxsw_sp_span_entry_deconfigure_common(span_entry,
+                                       MLXSW_REG_MPAT_SPAN_TYPE_REMOTE_ETH);
+}
+
+static const
+struct mlxsw_sp_span_entry_ops mlxsw_sp_span_entry_ops_vlan = {
+       .can_handle = mlxsw_sp_span_vlan_can_handle,
+       .parms = mlxsw_sp_span_entry_vlan_parms,
+       .configure = mlxsw_sp_span_entry_vlan_configure,
+       .deconfigure = mlxsw_sp_span_entry_vlan_deconfigure,
+};
+
 static const
 struct mlxsw_sp_span_entry_ops *const mlxsw_sp_span_entry_types[] = {
        &mlxsw_sp_span_entry_ops_phys,
@@ -403,6 +581,7 @@ struct mlxsw_sp_span_entry_ops *const mlxsw_sp_span_entry_types[] = {
 #if IS_ENABLED(CONFIG_IPV6_GRE)
        &mlxsw_sp_span_entry_ops_gretap6,
 #endif
+       &mlxsw_sp_span_entry_ops_vlan,
 };
 
 static int
@@ -766,7 +945,7 @@ int mlxsw_sp_span_mirror_add(struct mlxsw_sp_port *from,
 
        span_entry = mlxsw_sp_span_entry_get(mlxsw_sp, to_dev, ops, sparms);
        if (!span_entry)
-               return -ENOENT;
+               return -ENOBUFS;
 
        netdev_dbg(from->dev, "Adding inspected port to SPAN entry %d\n",
                   span_entry->id);
index 4b87ec20e65810b82ebcec18d65e73f98f54bb0e..14a6de904db176c2b0ef1f652b160bdcbd059002 100644 (file)
@@ -63,6 +63,7 @@ struct mlxsw_sp_span_parms {
        unsigned char smac[ETH_ALEN];
        union mlxsw_sp_l3addr daddr;
        union mlxsw_sp_l3addr saddr;
+       u16 vid;
 };
 
 struct mlxsw_sp_span_entry_ops;
index c11c9a635866a4eb3feb0c6a6761d3c2afabadff..8c9cf8ee93988154e2edfab87f3a020f4d5abb5d 100644 (file)
@@ -49,7 +49,9 @@
 #include <linux/netlink.h>
 #include <net/switchdev.h>
 
+#include "spectrum_span.h"
 #include "spectrum_router.h"
+#include "spectrum_switchdev.h"
 #include "spectrum.h"
 #include "core.h"
 #include "reg.h"
@@ -239,7 +241,7 @@ __mlxsw_sp_bridge_port_find(const struct mlxsw_sp_bridge_device *bridge_device,
        return NULL;
 }
 
-static struct mlxsw_sp_bridge_port *
+struct mlxsw_sp_bridge_port *
 mlxsw_sp_bridge_port_find(struct mlxsw_sp_bridge *bridge,
                          struct net_device *brport_dev)
 {
@@ -922,6 +924,9 @@ static int mlxsw_sp_port_attr_set(struct net_device *dev,
                break;
        }
 
+       if (switchdev_trans_ph_commit(trans))
+               mlxsw_sp_span_respin(mlxsw_sp_port->mlxsw_sp);
+
        return err;
 }
 
@@ -1646,18 +1651,57 @@ mlxsw_sp_port_mrouter_update_mdb(struct mlxsw_sp_port *mlxsw_sp_port,
        }
 }
 
+struct mlxsw_sp_span_respin_work {
+       struct work_struct work;
+       struct mlxsw_sp *mlxsw_sp;
+};
+
+static void mlxsw_sp_span_respin_work(struct work_struct *work)
+{
+       struct mlxsw_sp_span_respin_work *respin_work =
+               container_of(work, struct mlxsw_sp_span_respin_work, work);
+
+       rtnl_lock();
+       mlxsw_sp_span_respin(respin_work->mlxsw_sp);
+       rtnl_unlock();
+       kfree(respin_work);
+}
+
+static void mlxsw_sp_span_respin_schedule(struct mlxsw_sp *mlxsw_sp)
+{
+       struct mlxsw_sp_span_respin_work *respin_work;
+
+       respin_work = kzalloc(sizeof(*respin_work), GFP_ATOMIC);
+       if (!respin_work)
+               return;
+
+       INIT_WORK(&respin_work->work, mlxsw_sp_span_respin_work);
+       respin_work->mlxsw_sp = mlxsw_sp;
+
+       mlxsw_core_schedule_work(&respin_work->work);
+}
+
 static int mlxsw_sp_port_obj_add(struct net_device *dev,
                                 const struct switchdev_obj *obj,
                                 struct switchdev_trans *trans)
 {
        struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+       const struct switchdev_obj_port_vlan *vlan;
        int err = 0;
 
        switch (obj->id) {
        case SWITCHDEV_OBJ_ID_PORT_VLAN:
-               err = mlxsw_sp_port_vlans_add(mlxsw_sp_port,
-                                             SWITCHDEV_OBJ_PORT_VLAN(obj),
-                                             trans);
+               vlan = SWITCHDEV_OBJ_PORT_VLAN(obj);
+               err = mlxsw_sp_port_vlans_add(mlxsw_sp_port, vlan, trans);
+
+               if (switchdev_trans_ph_commit(trans)) {
+                       /* The event is emitted before the changes are actually
+                        * applied to the bridge. Therefore schedule the respin
+                        * call for later, so that the respin logic sees the
+                        * updated bridge state.
+                        */
+                       mlxsw_sp_span_respin_schedule(mlxsw_sp_port->mlxsw_sp);
+               }
                break;
        case SWITCHDEV_OBJ_ID_PORT_MDB:
                err = mlxsw_sp_port_mdb_add(mlxsw_sp_port,
@@ -1718,13 +1762,11 @@ __mlxsw_sp_port_mdb_del(struct mlxsw_sp_port *mlxsw_sp_port,
        struct net_device *dev = mlxsw_sp_port->dev;
        int err;
 
-       if (bridge_port->bridge_device->multicast_enabled) {
-               if (bridge_port->bridge_device->multicast_enabled) {
-                       err = mlxsw_sp_port_smid_set(mlxsw_sp_port, mid->mid,
-                                                    false);
-                       if (err)
-                               netdev_err(dev, "Unable to remove port from SMID\n");
-               }
+       if (bridge_port->bridge_device->multicast_enabled &&
+           !bridge_port->mrouter) {
+               err = mlxsw_sp_port_smid_set(mlxsw_sp_port, mid->mid, false);
+               if (err)
+                       netdev_err(dev, "Unable to remove port from SMID\n");
        }
 
        err = mlxsw_sp_port_remove_from_mid(mlxsw_sp_port, mid);
@@ -1808,6 +1850,8 @@ static int mlxsw_sp_port_obj_del(struct net_device *dev,
                break;
        }
 
+       mlxsw_sp_span_respin(mlxsw_sp_port->mlxsw_sp);
+
        return err;
 }
 
@@ -2224,6 +2268,8 @@ static void mlxsw_sp_switchdev_event_work(struct work_struct *work)
        switch (switchdev_work->event) {
        case SWITCHDEV_FDB_ADD_TO_DEVICE:
                fdb_info = &switchdev_work->fdb_info;
+               if (!fdb_info->added_by_user)
+                       break;
                err = mlxsw_sp_port_fdb_set(mlxsw_sp_port, fdb_info, true);
                if (err)
                        break;
@@ -2233,10 +2279,20 @@ static void mlxsw_sp_switchdev_event_work(struct work_struct *work)
                break;
        case SWITCHDEV_FDB_DEL_TO_DEVICE:
                fdb_info = &switchdev_work->fdb_info;
+               if (!fdb_info->added_by_user)
+                       break;
                mlxsw_sp_port_fdb_set(mlxsw_sp_port, fdb_info, false);
                break;
+       case SWITCHDEV_FDB_ADD_TO_BRIDGE: /* fall through */
+       case SWITCHDEV_FDB_DEL_TO_BRIDGE:
+               /* These events are only used to potentially update an existing
+                * SPAN mirror.
+                */
+               break;
        }
 
+       mlxsw_sp_span_respin(mlxsw_sp_port->mlxsw_sp);
+
 out:
        rtnl_unlock();
        kfree(switchdev_work->fdb_info.addr);
@@ -2265,7 +2321,9 @@ static int mlxsw_sp_switchdev_event(struct notifier_block *unused,
 
        switch (event) {
        case SWITCHDEV_FDB_ADD_TO_DEVICE: /* fall through */
-       case SWITCHDEV_FDB_DEL_TO_DEVICE:
+       case SWITCHDEV_FDB_DEL_TO_DEVICE: /* fall through */
+       case SWITCHDEV_FDB_ADD_TO_BRIDGE: /* fall through */
+       case SWITCHDEV_FDB_DEL_TO_BRIDGE:
                memcpy(&switchdev_work->fdb_info, ptr,
                       sizeof(switchdev_work->fdb_info));
                switchdev_work->fdb_info.addr = kzalloc(ETH_ALEN, GFP_ATOMIC);
@@ -2297,6 +2355,12 @@ static struct notifier_block mlxsw_sp_switchdev_notifier = {
        .notifier_call = mlxsw_sp_switchdev_event,
 };
 
+u8
+mlxsw_sp_bridge_port_stp_state(struct mlxsw_sp_bridge_port *bridge_port)
+{
+       return bridge_port->stp_state;
+}
+
 static int mlxsw_sp_fdb_init(struct mlxsw_sp *mlxsw_sp)
 {
        struct mlxsw_sp_bridge *bridge = mlxsw_sp->bridge;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.h
new file mode 100644 (file)
index 0000000..bc44d5e
--- /dev/null
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+ * drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.h
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/netdevice.h>
+
+struct mlxsw_sp_bridge;
+struct mlxsw_sp_bridge_port;
+
+struct mlxsw_sp_bridge_port *
+mlxsw_sp_bridge_port_find(struct mlxsw_sp_bridge *bridge,
+                         struct net_device *brport_dev);
+
+u8 mlxsw_sp_bridge_port_stp_state(struct mlxsw_sp_bridge_port *bridge_port);
index a655c5850aa6ac3d247072d5202aa36907e502b5..3922c1cfe5f5a78e91c11fcdd6cc04ab6a98bb25 100644 (file)
@@ -417,13 +417,10 @@ static int mlxsw_sx_port_get_phys_port_name(struct net_device *dev, char *name,
                                            size_t len)
 {
        struct mlxsw_sx_port *mlxsw_sx_port = netdev_priv(dev);
-       int err;
-
-       err = snprintf(name, len, "p%d", mlxsw_sx_port->mapping.module + 1);
-       if (err >= len)
-               return -EINVAL;
 
-       return 0;
+       return mlxsw_core_port_get_phys_port_name(mlxsw_sx_port->mlxsw_sx->core,
+                                                 mlxsw_sx_port->local_port,
+                                                 name, len);
 }
 
 static const struct net_device_ops mlxsw_sx_port_netdev_ops = {
@@ -1149,7 +1146,7 @@ static int __mlxsw_sx_port_eth_create(struct mlxsw_sx *mlxsw_sx, u8 local_port,
        }
 
        mlxsw_core_port_eth_set(mlxsw_sx->core, mlxsw_sx_port->local_port,
-                               mlxsw_sx_port, dev, false, 0);
+                               mlxsw_sx_port, dev, module + 1, false, 0);
        mlxsw_sx->ports[local_port] = mlxsw_sx_port;
        return 0;
 
diff --git a/drivers/net/ethernet/mscc/Kconfig b/drivers/net/ethernet/mscc/Kconfig
new file mode 100644 (file)
index 0000000..36c8462
--- /dev/null
@@ -0,0 +1,30 @@
+# SPDX-License-Identifier: (GPL-2.0 OR MIT)
+config NET_VENDOR_MICROSEMI
+       bool "Microsemi devices"
+       default y
+       help
+         If you have a network (Ethernet) card belonging to this class, say Y.
+
+         Note that the answer to this question doesn't directly affect the
+         kernel: saying N will just cause the configurator to skip all
+         the questions about Microsemi devices.
+
+if NET_VENDOR_MICROSEMI
+
+config MSCC_OCELOT_SWITCH
+       tristate "Ocelot switch driver"
+       depends on NET_SWITCHDEV
+       depends on HAS_IOMEM
+       select PHYLIB
+       select REGMAP_MMIO
+       help
+         This driver supports the Ocelot network switch device.
+
+config MSCC_OCELOT_SWITCH_OCELOT
+       tristate "Ocelot switch driver on Ocelot"
+       depends on MSCC_OCELOT_SWITCH
+       help
+         This driver supports the Ocelot network switch device as present on
+         the Ocelot SoCs.
+
+endif # NET_VENDOR_MICROSEMI
diff --git a/drivers/net/ethernet/mscc/Makefile b/drivers/net/ethernet/mscc/Makefile
new file mode 100644 (file)
index 0000000..cb52a3b
--- /dev/null
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: (GPL-2.0 OR MIT)
+obj-$(CONFIG_MSCC_OCELOT_SWITCH) += mscc_ocelot_common.o
+mscc_ocelot_common-y := ocelot.o ocelot_io.o
+mscc_ocelot_common-y += ocelot_regs.o
+obj-$(CONFIG_MSCC_OCELOT_SWITCH_OCELOT) += ocelot_board.o
diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
new file mode 100644 (file)
index 0000000..c8c74aa
--- /dev/null
@@ -0,0 +1,1333 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/*
+ * Microsemi Ocelot Switch driver
+ *
+ * Copyright (c) 2017 Microsemi Corporation
+ */
+#include <linux/etherdevice.h>
+#include <linux/ethtool.h>
+#include <linux/if_bridge.h>
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/phy.h>
+#include <linux/skbuff.h>
+#include <net/arp.h>
+#include <net/netevent.h>
+#include <net/rtnetlink.h>
+#include <net/switchdev.h>
+
+#include "ocelot.h"
+
+/* MAC table entry types.
+ * ENTRYTYPE_NORMAL is subject to aging.
+ * ENTRYTYPE_LOCKED is not subject to aging.
+ * ENTRYTYPE_MACv4 is not subject to aging. For IPv4 multicast.
+ * ENTRYTYPE_MACv6 is not subject to aging. For IPv6 multicast.
+ */
+enum macaccess_entry_type {
+       ENTRYTYPE_NORMAL = 0,
+       ENTRYTYPE_LOCKED,
+       ENTRYTYPE_MACv4,
+       ENTRYTYPE_MACv6,
+};
+
+struct ocelot_mact_entry {
+       u8 mac[ETH_ALEN];
+       u16 vid;
+       enum macaccess_entry_type type;
+};
+
+static inline int ocelot_mact_wait_for_completion(struct ocelot *ocelot)
+{
+       unsigned int val, timeout = 10;
+
+       /* Wait for the issued mac table command to be completed, or timeout.
+        * When the command read from  ANA_TABLES_MACACCESS is
+        * MACACCESS_CMD_IDLE, the issued command completed successfully.
+        */
+       do {
+               val = ocelot_read(ocelot, ANA_TABLES_MACACCESS);
+               val &= ANA_TABLES_MACACCESS_MAC_TABLE_CMD_M;
+       } while (val != MACACCESS_CMD_IDLE && timeout--);
+
+       if (!timeout)
+               return -ETIMEDOUT;
+
+       return 0;
+}
+
+static void ocelot_mact_select(struct ocelot *ocelot,
+                              const unsigned char mac[ETH_ALEN],
+                              unsigned int vid)
+{
+       u32 macl = 0, mach = 0;
+
+       /* Set the MAC address to handle and the vlan associated in a format
+        * understood by the hardware.
+        */
+       mach |= vid    << 16;
+       mach |= mac[0] << 8;
+       mach |= mac[1] << 0;
+       macl |= mac[2] << 24;
+       macl |= mac[3] << 16;
+       macl |= mac[4] << 8;
+       macl |= mac[5] << 0;
+
+       ocelot_write(ocelot, macl, ANA_TABLES_MACLDATA);
+       ocelot_write(ocelot, mach, ANA_TABLES_MACHDATA);
+
+}
+
+static int ocelot_mact_learn(struct ocelot *ocelot, int port,
+                            const unsigned char mac[ETH_ALEN],
+                            unsigned int vid,
+                            enum macaccess_entry_type type)
+{
+       ocelot_mact_select(ocelot, mac, vid);
+
+       /* Issue a write command */
+       ocelot_write(ocelot, ANA_TABLES_MACACCESS_VALID |
+                            ANA_TABLES_MACACCESS_DEST_IDX(port) |
+                            ANA_TABLES_MACACCESS_ENTRYTYPE(type) |
+                            ANA_TABLES_MACACCESS_MAC_TABLE_CMD(MACACCESS_CMD_LEARN),
+                            ANA_TABLES_MACACCESS);
+
+       return ocelot_mact_wait_for_completion(ocelot);
+}
+
+static int ocelot_mact_forget(struct ocelot *ocelot,
+                             const unsigned char mac[ETH_ALEN],
+                             unsigned int vid)
+{
+       ocelot_mact_select(ocelot, mac, vid);
+
+       /* Issue a forget command */
+       ocelot_write(ocelot,
+                    ANA_TABLES_MACACCESS_MAC_TABLE_CMD(MACACCESS_CMD_FORGET),
+                    ANA_TABLES_MACACCESS);
+
+       return ocelot_mact_wait_for_completion(ocelot);
+}
+
+static void ocelot_mact_init(struct ocelot *ocelot)
+{
+       /* Configure the learning mode entries attributes:
+        * - Do not copy the frame to the CPU extraction queues.
+        * - Use the vlan and mac_cpoy for dmac lookup.
+        */
+       ocelot_rmw(ocelot, 0,
+                  ANA_AGENCTRL_LEARN_CPU_COPY | ANA_AGENCTRL_IGNORE_DMAC_FLAGS
+                  | ANA_AGENCTRL_LEARN_FWD_KILL
+                  | ANA_AGENCTRL_LEARN_IGNORE_VLAN,
+                  ANA_AGENCTRL);
+
+       /* Clear the MAC table */
+       ocelot_write(ocelot, MACACCESS_CMD_INIT, ANA_TABLES_MACACCESS);
+}
+
+static inline int ocelot_vlant_wait_for_completion(struct ocelot *ocelot)
+{
+       unsigned int val, timeout = 10;
+
+       /* Wait for the issued mac table command to be completed, or timeout.
+        * When the command read from ANA_TABLES_MACACCESS is
+        * MACACCESS_CMD_IDLE, the issued command completed successfully.
+        */
+       do {
+               val = ocelot_read(ocelot, ANA_TABLES_VLANACCESS);
+               val &= ANA_TABLES_VLANACCESS_VLAN_TBL_CMD_M;
+       } while (val != ANA_TABLES_VLANACCESS_CMD_IDLE && timeout--);
+
+       if (!timeout)
+               return -ETIMEDOUT;
+
+       return 0;
+}
+
+static void ocelot_vlan_init(struct ocelot *ocelot)
+{
+       /* Clear VLAN table, by default all ports are members of all VLANs */
+       ocelot_write(ocelot, ANA_TABLES_VLANACCESS_CMD_INIT,
+                    ANA_TABLES_VLANACCESS);
+       ocelot_vlant_wait_for_completion(ocelot);
+}
+
+/* Watermark encode
+ * Bit 8:   Unit; 0:1, 1:16
+ * Bit 7-0: Value to be multiplied with unit
+ */
+static u16 ocelot_wm_enc(u16 value)
+{
+       if (value >= BIT(8))
+               return BIT(8) | (value / 16);
+
+       return value;
+}
+
+static void ocelot_port_adjust_link(struct net_device *dev)
+{
+       struct ocelot_port *port = netdev_priv(dev);
+       struct ocelot *ocelot = port->ocelot;
+       u8 p = port->chip_port;
+       int speed, atop_wm, mode = 0;
+
+       switch (dev->phydev->speed) {
+       case SPEED_10:
+               speed = OCELOT_SPEED_10;
+               break;
+       case SPEED_100:
+               speed = OCELOT_SPEED_100;
+               break;
+       case SPEED_1000:
+               speed = OCELOT_SPEED_1000;
+               mode = DEV_MAC_MODE_CFG_GIGA_MODE_ENA;
+               break;
+       case SPEED_2500:
+               speed = OCELOT_SPEED_2500;
+               mode = DEV_MAC_MODE_CFG_GIGA_MODE_ENA;
+               break;
+       default:
+               netdev_err(dev, "Unsupported PHY speed: %d\n",
+                          dev->phydev->speed);
+               return;
+       }
+
+       phy_print_status(dev->phydev);
+
+       if (!dev->phydev->link)
+               return;
+
+       /* Only full duplex supported for now */
+       ocelot_port_writel(port, DEV_MAC_MODE_CFG_FDX_ENA |
+                          mode, DEV_MAC_MODE_CFG);
+
+       /* Set MAC IFG Gaps
+        * FDX: TX_IFG = 5, RX_IFG1 = RX_IFG2 = 0
+        * !FDX: TX_IFG = 5, RX_IFG1 = RX_IFG2 = 5
+        */
+       ocelot_port_writel(port, DEV_MAC_IFG_CFG_TX_IFG(5), DEV_MAC_IFG_CFG);
+
+       /* Load seed (0) and set MAC HDX late collision  */
+       ocelot_port_writel(port, DEV_MAC_HDX_CFG_LATE_COL_POS(67) |
+                          DEV_MAC_HDX_CFG_SEED_LOAD,
+                          DEV_MAC_HDX_CFG);
+       mdelay(1);
+       ocelot_port_writel(port, DEV_MAC_HDX_CFG_LATE_COL_POS(67),
+                          DEV_MAC_HDX_CFG);
+
+       /* Disable HDX fast control */
+       ocelot_port_writel(port, DEV_PORT_MISC_HDX_FAST_DIS, DEV_PORT_MISC);
+
+       /* SGMII only for now */
+       ocelot_port_writel(port, PCS1G_MODE_CFG_SGMII_MODE_ENA, PCS1G_MODE_CFG);
+       ocelot_port_writel(port, PCS1G_SD_CFG_SD_SEL, PCS1G_SD_CFG);
+
+       /* Enable PCS */
+       ocelot_port_writel(port, PCS1G_CFG_PCS_ENA, PCS1G_CFG);
+
+       /* No aneg on SGMII */
+       ocelot_port_writel(port, 0, PCS1G_ANEG_CFG);
+
+       /* No loopback */
+       ocelot_port_writel(port, 0, PCS1G_LB_CFG);
+
+       /* Set Max Length and maximum tags allowed */
+       ocelot_port_writel(port, VLAN_ETH_FRAME_LEN, DEV_MAC_MAXLEN_CFG);
+       ocelot_port_writel(port, DEV_MAC_TAGS_CFG_TAG_ID(ETH_P_8021AD) |
+                          DEV_MAC_TAGS_CFG_VLAN_AWR_ENA |
+                          DEV_MAC_TAGS_CFG_VLAN_LEN_AWR_ENA,
+                          DEV_MAC_TAGS_CFG);
+
+       /* Enable MAC module */
+       ocelot_port_writel(port, DEV_MAC_ENA_CFG_RX_ENA |
+                          DEV_MAC_ENA_CFG_TX_ENA, DEV_MAC_ENA_CFG);
+
+       /* Take MAC, Port, Phy (intern) and PCS (SGMII/Serdes) clock out of
+        * reset */
+       ocelot_port_writel(port, DEV_CLOCK_CFG_LINK_SPEED(speed),
+                          DEV_CLOCK_CFG);
+
+       /* Set SMAC of Pause frame (00:00:00:00:00:00) */
+       ocelot_port_writel(port, 0, DEV_MAC_FC_MAC_HIGH_CFG);
+       ocelot_port_writel(port, 0, DEV_MAC_FC_MAC_LOW_CFG);
+
+       /* No PFC */
+       ocelot_write_gix(ocelot, ANA_PFC_PFC_CFG_FC_LINK_SPEED(speed),
+                        ANA_PFC_PFC_CFG, p);
+
+       /* Set Pause WM hysteresis
+        * 152 = 6 * VLAN_ETH_FRAME_LEN / OCELOT_BUFFER_CELL_SZ
+        * 101 = 4 * VLAN_ETH_FRAME_LEN / OCELOT_BUFFER_CELL_SZ
+        */
+       ocelot_write_rix(ocelot, SYS_PAUSE_CFG_PAUSE_ENA |
+                        SYS_PAUSE_CFG_PAUSE_STOP(101) |
+                        SYS_PAUSE_CFG_PAUSE_START(152), SYS_PAUSE_CFG, p);
+
+       /* Core: Enable port for frame transfer */
+       ocelot_write_rix(ocelot, QSYS_SWITCH_PORT_MODE_INGRESS_DROP_MODE |
+                        QSYS_SWITCH_PORT_MODE_SCH_NEXT_CFG(1) |
+                        QSYS_SWITCH_PORT_MODE_PORT_ENA,
+                        QSYS_SWITCH_PORT_MODE, p);
+
+       /* Flow control */
+       ocelot_write_rix(ocelot, SYS_MAC_FC_CFG_PAUSE_VAL_CFG(0xffff) |
+                        SYS_MAC_FC_CFG_RX_FC_ENA | SYS_MAC_FC_CFG_TX_FC_ENA |
+                        SYS_MAC_FC_CFG_ZERO_PAUSE_ENA |
+                        SYS_MAC_FC_CFG_FC_LATENCY_CFG(0x7) |
+                        SYS_MAC_FC_CFG_FC_LINK_SPEED(speed),
+                        SYS_MAC_FC_CFG, p);
+       ocelot_write_rix(ocelot, 0, ANA_POL_FLOWC, p);
+
+       /* Tail dropping watermark */
+       atop_wm = (ocelot->shared_queue_sz - 9 * VLAN_ETH_FRAME_LEN) / OCELOT_BUFFER_CELL_SZ;
+       ocelot_write_rix(ocelot, ocelot_wm_enc(9 * VLAN_ETH_FRAME_LEN),
+                        SYS_ATOP, p);
+       ocelot_write(ocelot, ocelot_wm_enc(atop_wm), SYS_ATOP_TOT_CFG);
+}
+
+static int ocelot_port_open(struct net_device *dev)
+{
+       struct ocelot_port *port = netdev_priv(dev);
+       struct ocelot *ocelot = port->ocelot;
+       int err;
+
+       /* Enable receiving frames on the port, and activate auto-learning of
+        * MAC addresses.
+        */
+       ocelot_write_gix(ocelot, ANA_PORT_PORT_CFG_LEARNAUTO |
+                        ANA_PORT_PORT_CFG_RECV_ENA |
+                        ANA_PORT_PORT_CFG_PORTID_VAL(port->chip_port),
+                        ANA_PORT_PORT_CFG, port->chip_port);
+
+       err = phy_connect_direct(dev, port->phy, &ocelot_port_adjust_link,
+                                PHY_INTERFACE_MODE_NA);
+       if (err) {
+               netdev_err(dev, "Could not attach to PHY\n");
+               return err;
+       }
+
+       dev->phydev = port->phy;
+
+       phy_attached_info(port->phy);
+       phy_start(port->phy);
+       return 0;
+}
+
+static int ocelot_port_stop(struct net_device *dev)
+{
+       struct ocelot_port *port = netdev_priv(dev);
+
+       phy_disconnect(port->phy);
+
+       dev->phydev = NULL;
+
+       ocelot_port_writel(port, 0, DEV_MAC_ENA_CFG);
+       ocelot_rmw_rix(port->ocelot, 0, QSYS_SWITCH_PORT_MODE_PORT_ENA,
+                        QSYS_SWITCH_PORT_MODE, port->chip_port);
+       return 0;
+}
+
+/* Generate the IFH for frame injection
+ *
+ * The IFH is a 128bit-value
+ * bit 127: bypass the analyzer processing
+ * bit 56-67: destination mask
+ * bit 28-29: pop_cnt: 3 disables all rewriting of the frame
+ * bit 20-27: cpu extraction queue mask
+ * bit 16: tag type 0: C-tag, 1: S-tag
+ * bit 0-11: VID
+ */
+static int ocelot_gen_ifh(u32 *ifh, struct frame_info *info)
+{
+       ifh[0] = IFH_INJ_BYPASS;
+       ifh[1] = (0xff00 & info->port) >> 8;
+       ifh[2] = (0xff & info->port) << 24;
+       ifh[3] = IFH_INJ_POP_CNT_DISABLE | (info->cpuq << 20) |
+                (info->tag_type << 16) | info->vid;
+
+       return 0;
+}
+
+static int ocelot_port_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+       struct ocelot_port *port = netdev_priv(dev);
+       struct ocelot *ocelot = port->ocelot;
+       u32 val, ifh[IFH_LEN];
+       struct frame_info info = {};
+       u8 grp = 0; /* Send everything on CPU group 0 */
+       unsigned int i, count, last;
+
+       val = ocelot_read(ocelot, QS_INJ_STATUS);
+       if (!(val & QS_INJ_STATUS_FIFO_RDY(BIT(grp))) ||
+           (val & QS_INJ_STATUS_WMARK_REACHED(BIT(grp))))
+               return NETDEV_TX_BUSY;
+
+       ocelot_write_rix(ocelot, QS_INJ_CTRL_GAP_SIZE(1) |
+                        QS_INJ_CTRL_SOF, QS_INJ_CTRL, grp);
+
+       info.port = BIT(port->chip_port);
+       info.cpuq = 0xff;
+       ocelot_gen_ifh(ifh, &info);
+
+       for (i = 0; i < IFH_LEN; i++)
+               ocelot_write_rix(ocelot, ifh[i], QS_INJ_WR, grp);
+
+       count = (skb->len + 3) / 4;
+       last = skb->len % 4;
+       for (i = 0; i < count; i++) {
+               ocelot_write_rix(ocelot, ((u32 *)skb->data)[i], QS_INJ_WR, grp);
+       }
+
+       /* Add padding */
+       while (i < (OCELOT_BUFFER_CELL_SZ / 4)) {
+               ocelot_write_rix(ocelot, 0, QS_INJ_WR, grp);
+               i++;
+       }
+
+       /* Indicate EOF and valid bytes in last word */
+       ocelot_write_rix(ocelot, QS_INJ_CTRL_GAP_SIZE(1) |
+                        QS_INJ_CTRL_VLD_BYTES(skb->len < OCELOT_BUFFER_CELL_SZ ? 0 : last) |
+                        QS_INJ_CTRL_EOF,
+                        QS_INJ_CTRL, grp);
+
+       /* Add dummy CRC */
+       ocelot_write_rix(ocelot, 0, QS_INJ_WR, grp);
+       skb_tx_timestamp(skb);
+
+       dev->stats.tx_packets++;
+       dev->stats.tx_bytes += skb->len;
+       dev_kfree_skb_any(skb);
+
+       return NETDEV_TX_OK;
+}
+
+static void ocelot_mact_mc_reset(struct ocelot_port *port)
+{
+       struct ocelot *ocelot = port->ocelot;
+       struct netdev_hw_addr *ha, *n;
+
+       /* Free and forget all the MAC addresses stored in the port private mc
+        * list. These are mc addresses that were previously added by calling
+        * ocelot_mact_mc_add().
+        */
+       list_for_each_entry_safe(ha, n, &port->mc, list) {
+               ocelot_mact_forget(ocelot, ha->addr, port->pvid);
+               list_del(&ha->list);
+               kfree(ha);
+       }
+}
+
+static int ocelot_mact_mc_add(struct ocelot_port *port,
+                             struct netdev_hw_addr *hw_addr)
+{
+       struct ocelot *ocelot = port->ocelot;
+       struct netdev_hw_addr *ha = kzalloc(sizeof(*ha), GFP_KERNEL);
+
+       if (!ha)
+               return -ENOMEM;
+
+       memcpy(ha, hw_addr, sizeof(*ha));
+       list_add_tail(&ha->list, &port->mc);
+
+       ocelot_mact_learn(ocelot, PGID_CPU, ha->addr, port->pvid,
+                         ENTRYTYPE_LOCKED);
+
+       return 0;
+}
+
+static void ocelot_set_rx_mode(struct net_device *dev)
+{
+       struct ocelot_port *port = netdev_priv(dev);
+       struct ocelot *ocelot = port->ocelot;
+       struct netdev_hw_addr *ha;
+       int i;
+       u32 val;
+
+       /* This doesn't handle promiscuous mode because the bridge core is
+        * setting IFF_PROMISC on all slave interfaces and all frames would be
+        * forwarded to the CPU port.
+        */
+       val = GENMASK(ocelot->num_phys_ports - 1, 0);
+       for (i = ocelot->num_phys_ports + 1; i < PGID_CPU; i++)
+               ocelot_write_rix(ocelot, val, ANA_PGID_PGID, i);
+
+       /* Handle the device multicast addresses. First remove all the
+        * previously installed addresses and then add the latest ones to the
+        * mac table.
+        */
+       ocelot_mact_mc_reset(port);
+       netdev_for_each_mc_addr(ha, dev)
+               ocelot_mact_mc_add(port, ha);
+}
+
+static int ocelot_port_get_phys_port_name(struct net_device *dev,
+                                         char *buf, size_t len)
+{
+       struct ocelot_port *port = netdev_priv(dev);
+       int ret;
+
+       ret = snprintf(buf, len, "p%d", port->chip_port);
+       if (ret >= len)
+               return -EINVAL;
+
+       return 0;
+}
+
+static int ocelot_port_set_mac_address(struct net_device *dev, void *p)
+{
+       struct ocelot_port *port = netdev_priv(dev);
+       struct ocelot *ocelot = port->ocelot;
+       const struct sockaddr *addr = p;
+
+       /* Learn the new net device MAC address in the mac table. */
+       ocelot_mact_learn(ocelot, PGID_CPU, addr->sa_data, port->pvid,
+                         ENTRYTYPE_LOCKED);
+       /* Then forget the previous one. */
+       ocelot_mact_forget(ocelot, dev->dev_addr, port->pvid);
+
+       ether_addr_copy(dev->dev_addr, addr->sa_data);
+       return 0;
+}
+
+static void ocelot_get_stats64(struct net_device *dev,
+                              struct rtnl_link_stats64 *stats)
+{
+       struct ocelot_port *port = netdev_priv(dev);
+       struct ocelot *ocelot = port->ocelot;
+
+       /* Configure the port to read the stats from */
+       ocelot_write(ocelot, SYS_STAT_CFG_STAT_VIEW(port->chip_port),
+                    SYS_STAT_CFG);
+
+       /* Get Rx stats */
+       stats->rx_bytes = ocelot_read(ocelot, SYS_COUNT_RX_OCTETS);
+       stats->rx_packets = ocelot_read(ocelot, SYS_COUNT_RX_SHORTS) +
+                           ocelot_read(ocelot, SYS_COUNT_RX_FRAGMENTS) +
+                           ocelot_read(ocelot, SYS_COUNT_RX_JABBERS) +
+                           ocelot_read(ocelot, SYS_COUNT_RX_LONGS) +
+                           ocelot_read(ocelot, SYS_COUNT_RX_64) +
+                           ocelot_read(ocelot, SYS_COUNT_RX_65_127) +
+                           ocelot_read(ocelot, SYS_COUNT_RX_128_255) +
+                           ocelot_read(ocelot, SYS_COUNT_RX_256_1023) +
+                           ocelot_read(ocelot, SYS_COUNT_RX_1024_1526) +
+                           ocelot_read(ocelot, SYS_COUNT_RX_1527_MAX);
+       stats->multicast = ocelot_read(ocelot, SYS_COUNT_RX_MULTICAST);
+       stats->rx_dropped = dev->stats.rx_dropped;
+
+       /* Get Tx stats */
+       stats->tx_bytes = ocelot_read(ocelot, SYS_COUNT_TX_OCTETS);
+       stats->tx_packets = ocelot_read(ocelot, SYS_COUNT_TX_64) +
+                           ocelot_read(ocelot, SYS_COUNT_TX_65_127) +
+                           ocelot_read(ocelot, SYS_COUNT_TX_128_511) +
+                           ocelot_read(ocelot, SYS_COUNT_TX_512_1023) +
+                           ocelot_read(ocelot, SYS_COUNT_TX_1024_1526) +
+                           ocelot_read(ocelot, SYS_COUNT_TX_1527_MAX);
+       stats->tx_dropped = ocelot_read(ocelot, SYS_COUNT_TX_DROPS) +
+                           ocelot_read(ocelot, SYS_COUNT_TX_AGING);
+       stats->collisions = ocelot_read(ocelot, SYS_COUNT_TX_COLLISION);
+}
+
+static int ocelot_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
+                         struct net_device *dev, const unsigned char *addr,
+                         u16 vid, u16 flags)
+{
+       struct ocelot_port *port = netdev_priv(dev);
+       struct ocelot *ocelot = port->ocelot;
+
+       return ocelot_mact_learn(ocelot, port->chip_port, addr, vid,
+                                ENTRYTYPE_NORMAL);
+}
+
+static int ocelot_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
+                         struct net_device *dev,
+                         const unsigned char *addr, u16 vid)
+{
+       struct ocelot_port *port = netdev_priv(dev);
+       struct ocelot *ocelot = port->ocelot;
+
+       return ocelot_mact_forget(ocelot, addr, vid);
+}
+
+struct ocelot_dump_ctx {
+       struct net_device *dev;
+       struct sk_buff *skb;
+       struct netlink_callback *cb;
+       int idx;
+};
+
+static int ocelot_fdb_do_dump(struct ocelot_mact_entry *entry,
+                             struct ocelot_dump_ctx *dump)
+{
+       u32 portid = NETLINK_CB(dump->cb->skb).portid;
+       u32 seq = dump->cb->nlh->nlmsg_seq;
+       struct nlmsghdr *nlh;
+       struct ndmsg *ndm;
+
+       if (dump->idx < dump->cb->args[2])
+               goto skip;
+
+       nlh = nlmsg_put(dump->skb, portid, seq, RTM_NEWNEIGH,
+                       sizeof(*ndm), NLM_F_MULTI);
+       if (!nlh)
+               return -EMSGSIZE;
+
+       ndm = nlmsg_data(nlh);
+       ndm->ndm_family  = AF_BRIDGE;
+       ndm->ndm_pad1    = 0;
+       ndm->ndm_pad2    = 0;
+       ndm->ndm_flags   = NTF_SELF;
+       ndm->ndm_type    = 0;
+       ndm->ndm_ifindex = dump->dev->ifindex;
+       ndm->ndm_state   = NUD_REACHABLE;
+
+       if (nla_put(dump->skb, NDA_LLADDR, ETH_ALEN, entry->mac))
+               goto nla_put_failure;
+
+       if (entry->vid && nla_put_u16(dump->skb, NDA_VLAN, entry->vid))
+               goto nla_put_failure;
+
+       nlmsg_end(dump->skb, nlh);
+
+skip:
+       dump->idx++;
+       return 0;
+
+nla_put_failure:
+       nlmsg_cancel(dump->skb, nlh);
+       return -EMSGSIZE;
+}
+
+static inline int ocelot_mact_read(struct ocelot_port *port, int row, int col,
+                                  struct ocelot_mact_entry *entry)
+{
+       struct ocelot *ocelot = port->ocelot;
+       char mac[ETH_ALEN];
+       u32 val, dst, macl, mach;
+
+       /* Set row and column to read from */
+       ocelot_field_write(ocelot, ANA_TABLES_MACTINDX_M_INDEX, row);
+       ocelot_field_write(ocelot, ANA_TABLES_MACTINDX_BUCKET, col);
+
+       /* Issue a read command */
+       ocelot_write(ocelot,
+                    ANA_TABLES_MACACCESS_MAC_TABLE_CMD(MACACCESS_CMD_READ),
+                    ANA_TABLES_MACACCESS);
+
+       if (ocelot_mact_wait_for_completion(ocelot))
+               return -ETIMEDOUT;
+
+       /* Read the entry flags */
+       val = ocelot_read(ocelot, ANA_TABLES_MACACCESS);
+       if (!(val & ANA_TABLES_MACACCESS_VALID))
+               return -EINVAL;
+
+       /* If the entry read has another port configured as its destination,
+        * do not report it.
+        */
+       dst = (val & ANA_TABLES_MACACCESS_DEST_IDX_M) >> 3;
+       if (dst != port->chip_port)
+               return -EINVAL;
+
+       /* Get the entry's MAC address and VLAN id */
+       macl = ocelot_read(ocelot, ANA_TABLES_MACLDATA);
+       mach = ocelot_read(ocelot, ANA_TABLES_MACHDATA);
+
+       mac[0] = (mach >> 8)  & 0xff;
+       mac[1] = (mach >> 0)  & 0xff;
+       mac[2] = (macl >> 24) & 0xff;
+       mac[3] = (macl >> 16) & 0xff;
+       mac[4] = (macl >> 8)  & 0xff;
+       mac[5] = (macl >> 0)  & 0xff;
+
+       entry->vid = (mach >> 16) & 0xfff;
+       ether_addr_copy(entry->mac, mac);
+
+       return 0;
+}
+
+static int ocelot_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
+                          struct net_device *dev,
+                          struct net_device *filter_dev, int *idx)
+{
+       struct ocelot_port *port = netdev_priv(dev);
+       int i, j, ret = 0;
+       struct ocelot_dump_ctx dump = {
+               .dev = dev,
+               .skb = skb,
+               .cb = cb,
+               .idx = *idx,
+       };
+
+       struct ocelot_mact_entry entry;
+
+       /* Loop through all the mac tables entries. There are 1024 rows of 4
+        * entries.
+        */
+       for (i = 0; i < 1024; i++) {
+               for (j = 0; j < 4; j++) {
+                       ret = ocelot_mact_read(port, i, j, &entry);
+                       /* If the entry is invalid (wrong port, invalid...),
+                        * skip it.
+                        */
+                       if (ret == -EINVAL)
+                               continue;
+                       else if (ret)
+                               goto end;
+
+                       ret = ocelot_fdb_do_dump(&entry, &dump);
+                       if (ret)
+                               goto end;
+               }
+       }
+
+end:
+       *idx = dump.idx;
+       return ret;
+}
+
+static const struct net_device_ops ocelot_port_netdev_ops = {
+       .ndo_open                       = ocelot_port_open,
+       .ndo_stop                       = ocelot_port_stop,
+       .ndo_start_xmit                 = ocelot_port_xmit,
+       .ndo_set_rx_mode                = ocelot_set_rx_mode,
+       .ndo_get_phys_port_name         = ocelot_port_get_phys_port_name,
+       .ndo_set_mac_address            = ocelot_port_set_mac_address,
+       .ndo_get_stats64                = ocelot_get_stats64,
+       .ndo_fdb_add                    = ocelot_fdb_add,
+       .ndo_fdb_del                    = ocelot_fdb_del,
+       .ndo_fdb_dump                   = ocelot_fdb_dump,
+};
+
+static void ocelot_get_strings(struct net_device *netdev, u32 sset, u8 *data)
+{
+       struct ocelot_port *port = netdev_priv(netdev);
+       struct ocelot *ocelot = port->ocelot;
+       int i;
+
+       if (sset != ETH_SS_STATS)
+               return;
+
+       for (i = 0; i < ocelot->num_stats; i++)
+               memcpy(data + i * ETH_GSTRING_LEN, ocelot->stats_layout[i].name,
+                      ETH_GSTRING_LEN);
+}
+
+static void ocelot_check_stats(struct work_struct *work)
+{
+       struct delayed_work *del_work = to_delayed_work(work);
+       struct ocelot *ocelot = container_of(del_work, struct ocelot, stats_work);
+       int i, j;
+
+       mutex_lock(&ocelot->stats_lock);
+
+       for (i = 0; i < ocelot->num_phys_ports; i++) {
+               /* Configure the port to read the stats from */
+               ocelot_write(ocelot, SYS_STAT_CFG_STAT_VIEW(i), SYS_STAT_CFG);
+
+               for (j = 0; j < ocelot->num_stats; j++) {
+                       u32 val;
+                       unsigned int idx = i * ocelot->num_stats + j;
+
+                       val = ocelot_read_rix(ocelot, SYS_COUNT_RX_OCTETS,
+                                             ocelot->stats_layout[j].offset);
+
+                       if (val < (ocelot->stats[idx] & U32_MAX))
+                               ocelot->stats[idx] += (u64)1 << 32;
+
+                       ocelot->stats[idx] = (ocelot->stats[idx] &
+                                             ~(u64)U32_MAX) + val;
+               }
+       }
+
+       cancel_delayed_work(&ocelot->stats_work);
+       queue_delayed_work(ocelot->stats_queue, &ocelot->stats_work,
+                          OCELOT_STATS_CHECK_DELAY);
+
+       mutex_unlock(&ocelot->stats_lock);
+}
+
+static void ocelot_get_ethtool_stats(struct net_device *dev,
+                                    struct ethtool_stats *stats, u64 *data)
+{
+       struct ocelot_port *port = netdev_priv(dev);
+       struct ocelot *ocelot = port->ocelot;
+       int i;
+
+       /* check and update now */
+       ocelot_check_stats(&ocelot->stats_work.work);
+
+       /* Copy all counters */
+       for (i = 0; i < ocelot->num_stats; i++)
+               *data++ = ocelot->stats[port->chip_port * ocelot->num_stats + i];
+}
+
+static int ocelot_get_sset_count(struct net_device *dev, int sset)
+{
+       struct ocelot_port *port = netdev_priv(dev);
+       struct ocelot *ocelot = port->ocelot;
+
+       if (sset != ETH_SS_STATS)
+               return -EOPNOTSUPP;
+       return ocelot->num_stats;
+}
+
+static const struct ethtool_ops ocelot_ethtool_ops = {
+       .get_strings            = ocelot_get_strings,
+       .get_ethtool_stats      = ocelot_get_ethtool_stats,
+       .get_sset_count         = ocelot_get_sset_count,
+};
+
+static int ocelot_port_attr_get(struct net_device *dev,
+                               struct switchdev_attr *attr)
+{
+       struct ocelot_port *ocelot_port = netdev_priv(dev);
+       struct ocelot *ocelot = ocelot_port->ocelot;
+
+       switch (attr->id) {
+       case SWITCHDEV_ATTR_ID_PORT_PARENT_ID:
+               attr->u.ppid.id_len = sizeof(ocelot->base_mac);
+               memcpy(&attr->u.ppid.id, &ocelot->base_mac,
+                      attr->u.ppid.id_len);
+               break;
+       default:
+               return -EOPNOTSUPP;
+       }
+
+       return 0;
+}
+
+static int ocelot_port_attr_stp_state_set(struct ocelot_port *ocelot_port,
+                                         struct switchdev_trans *trans,
+                                         u8 state)
+{
+       struct ocelot *ocelot = ocelot_port->ocelot;
+       u32 port_cfg;
+       int port, i;
+
+       if (switchdev_trans_ph_prepare(trans))
+               return 0;
+
+       if (!(BIT(ocelot_port->chip_port) & ocelot->bridge_mask))
+               return 0;
+
+       port_cfg = ocelot_read_gix(ocelot, ANA_PORT_PORT_CFG,
+                                  ocelot_port->chip_port);
+
+       switch (state) {
+       case BR_STATE_FORWARDING:
+               ocelot->bridge_fwd_mask |= BIT(ocelot_port->chip_port);
+               /* Fallthrough */
+       case BR_STATE_LEARNING:
+               port_cfg |= ANA_PORT_PORT_CFG_LEARN_ENA;
+               break;
+
+       default:
+               port_cfg &= ~ANA_PORT_PORT_CFG_LEARN_ENA;
+               ocelot->bridge_fwd_mask &= ~BIT(ocelot_port->chip_port);
+               break;
+       }
+
+       ocelot_write_gix(ocelot, port_cfg, ANA_PORT_PORT_CFG,
+                        ocelot_port->chip_port);
+
+       /* Apply FWD mask. The loop is needed to add/remove the current port as
+        * a source for the other ports.
+        */
+       for (port = 0; port < ocelot->num_phys_ports; port++) {
+               if (ocelot->bridge_fwd_mask & BIT(port)) {
+                       unsigned long mask = ocelot->bridge_fwd_mask & ~BIT(port);
+
+                       for (i = 0; i < ocelot->num_phys_ports; i++) {
+                               unsigned long bond_mask = ocelot->lags[i];
+
+                               if (!bond_mask)
+                                       continue;
+
+                               if (bond_mask & BIT(port)) {
+                                       mask &= ~bond_mask;
+                                       break;
+                               }
+                       }
+
+                       ocelot_write_rix(ocelot,
+                                        BIT(ocelot->num_phys_ports) | mask,
+                                        ANA_PGID_PGID, PGID_SRC + port);
+               } else {
+                       /* Only the CPU port, this is compatible with link
+                        * aggregation.
+                        */
+                       ocelot_write_rix(ocelot,
+                                        BIT(ocelot->num_phys_ports),
+                                        ANA_PGID_PGID, PGID_SRC + port);
+               }
+       }
+
+       return 0;
+}
+
+static void ocelot_port_attr_ageing_set(struct ocelot_port *ocelot_port,
+                                       unsigned long ageing_clock_t)
+{
+       struct ocelot *ocelot = ocelot_port->ocelot;
+       unsigned long ageing_jiffies = clock_t_to_jiffies(ageing_clock_t);
+       u32 ageing_time = jiffies_to_msecs(ageing_jiffies) / 1000;
+
+       ocelot_write(ocelot, ANA_AUTOAGE_AGE_PERIOD(ageing_time / 2),
+                    ANA_AUTOAGE);
+}
+
+static void ocelot_port_attr_mc_set(struct ocelot_port *port, bool mc)
+{
+       struct ocelot *ocelot = port->ocelot;
+       u32 val = ocelot_read_gix(ocelot, ANA_PORT_CPU_FWD_CFG,
+                                 port->chip_port);
+
+       if (mc)
+               val |= ANA_PORT_CPU_FWD_CFG_CPU_IGMP_REDIR_ENA |
+                      ANA_PORT_CPU_FWD_CFG_CPU_MLD_REDIR_ENA |
+                      ANA_PORT_CPU_FWD_CFG_CPU_IPMC_CTRL_COPY_ENA;
+       else
+               val &= ~(ANA_PORT_CPU_FWD_CFG_CPU_IGMP_REDIR_ENA |
+                        ANA_PORT_CPU_FWD_CFG_CPU_MLD_REDIR_ENA |
+                        ANA_PORT_CPU_FWD_CFG_CPU_IPMC_CTRL_COPY_ENA);
+
+       ocelot_write_gix(ocelot, val, ANA_PORT_CPU_FWD_CFG, port->chip_port);
+}
+
+static int ocelot_port_attr_set(struct net_device *dev,
+                               const struct switchdev_attr *attr,
+                               struct switchdev_trans *trans)
+{
+       struct ocelot_port *ocelot_port = netdev_priv(dev);
+       int err = 0;
+
+       switch (attr->id) {
+       case SWITCHDEV_ATTR_ID_PORT_STP_STATE:
+               ocelot_port_attr_stp_state_set(ocelot_port, trans,
+                                              attr->u.stp_state);
+               break;
+       case SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME:
+               ocelot_port_attr_ageing_set(ocelot_port, attr->u.ageing_time);
+               break;
+       case SWITCHDEV_ATTR_ID_BRIDGE_MC_DISABLED:
+               ocelot_port_attr_mc_set(ocelot_port, !attr->u.mc_disabled);
+               break;
+       default:
+               err = -EOPNOTSUPP;
+               break;
+       }
+
+       return err;
+}
+
+static struct ocelot_multicast *ocelot_multicast_get(struct ocelot *ocelot,
+                                                    const unsigned char *addr,
+                                                    u16 vid)
+{
+       struct ocelot_multicast *mc;
+
+       list_for_each_entry(mc, &ocelot->multicast, list) {
+               if (ether_addr_equal(mc->addr, addr) && mc->vid == vid)
+                       return mc;
+       }
+
+       return NULL;
+}
+
+static int ocelot_port_obj_add_mdb(struct net_device *dev,
+                                  const struct switchdev_obj_port_mdb *mdb,
+                                  struct switchdev_trans *trans)
+{
+       struct ocelot_port *port = netdev_priv(dev);
+       struct ocelot *ocelot = port->ocelot;
+       struct ocelot_multicast *mc;
+       unsigned char addr[ETH_ALEN];
+       u16 vid = mdb->vid;
+       bool new = false;
+
+       if (!vid)
+               vid = 1;
+
+       mc = ocelot_multicast_get(ocelot, mdb->addr, vid);
+       if (!mc) {
+               mc = devm_kzalloc(ocelot->dev, sizeof(*mc), GFP_KERNEL);
+               if (!mc)
+                       return -ENOMEM;
+
+               memcpy(mc->addr, mdb->addr, ETH_ALEN);
+               mc->vid = vid;
+
+               list_add_tail(&mc->list, &ocelot->multicast);
+               new = true;
+       }
+
+       memcpy(addr, mc->addr, ETH_ALEN);
+       addr[0] = 0;
+
+       if (!new) {
+               addr[2] = mc->ports << 0;
+               addr[1] = mc->ports << 8;
+               ocelot_mact_forget(ocelot, addr, vid);
+       }
+
+       mc->ports |= BIT(port->chip_port);
+       addr[2] = mc->ports << 0;
+       addr[1] = mc->ports << 8;
+
+       return ocelot_mact_learn(ocelot, 0, addr, vid, ENTRYTYPE_MACv4);
+}
+
+static int ocelot_port_obj_del_mdb(struct net_device *dev,
+                                  const struct switchdev_obj_port_mdb *mdb)
+{
+       struct ocelot_port *port = netdev_priv(dev);
+       struct ocelot *ocelot = port->ocelot;
+       struct ocelot_multicast *mc;
+       unsigned char addr[ETH_ALEN];
+       u16 vid = mdb->vid;
+
+       if (!vid)
+               vid = 1;
+
+       mc = ocelot_multicast_get(ocelot, mdb->addr, vid);
+       if (!mc)
+               return -ENOENT;
+
+       memcpy(addr, mc->addr, ETH_ALEN);
+       addr[2] = mc->ports << 0;
+       addr[1] = mc->ports << 8;
+       addr[0] = 0;
+       ocelot_mact_forget(ocelot, addr, vid);
+
+       mc->ports &= ~BIT(port->chip_port);
+       if (!mc->ports) {
+               list_del(&mc->list);
+               devm_kfree(ocelot->dev, mc);
+               return 0;
+       }
+
+       addr[2] = mc->ports << 0;
+       addr[1] = mc->ports << 8;
+
+       return ocelot_mact_learn(ocelot, 0, addr, vid, ENTRYTYPE_MACv4);
+}
+
+static int ocelot_port_obj_add(struct net_device *dev,
+                              const struct switchdev_obj *obj,
+                              struct switchdev_trans *trans)
+{
+       int ret = 0;
+
+       switch (obj->id) {
+       case SWITCHDEV_OBJ_ID_PORT_MDB:
+               ret = ocelot_port_obj_add_mdb(dev, SWITCHDEV_OBJ_PORT_MDB(obj),
+                                             trans);
+               break;
+       default:
+               return -EOPNOTSUPP;
+       }
+
+       return ret;
+}
+
+static int ocelot_port_obj_del(struct net_device *dev,
+                              const struct switchdev_obj *obj)
+{
+       int ret = 0;
+
+       switch (obj->id) {
+       case SWITCHDEV_OBJ_ID_PORT_MDB:
+               ret = ocelot_port_obj_del_mdb(dev, SWITCHDEV_OBJ_PORT_MDB(obj));
+               break;
+       default:
+               return -EOPNOTSUPP;
+       }
+
+       return ret;
+}
+
+static const struct switchdev_ops ocelot_port_switchdev_ops = {
+       .switchdev_port_attr_get        = ocelot_port_attr_get,
+       .switchdev_port_attr_set        = ocelot_port_attr_set,
+       .switchdev_port_obj_add         = ocelot_port_obj_add,
+       .switchdev_port_obj_del         = ocelot_port_obj_del,
+};
+
+static int ocelot_port_bridge_join(struct ocelot_port *ocelot_port,
+                                  struct net_device *bridge)
+{
+       struct ocelot *ocelot = ocelot_port->ocelot;
+
+       if (!ocelot->bridge_mask) {
+               ocelot->hw_bridge_dev = bridge;
+       } else {
+               if (ocelot->hw_bridge_dev != bridge)
+                       /* This is adding the port to a second bridge, this is
+                        * unsupported */
+                       return -ENODEV;
+       }
+
+       ocelot->bridge_mask |= BIT(ocelot_port->chip_port);
+
+       return 0;
+}
+
+static void ocelot_port_bridge_leave(struct ocelot_port *ocelot_port,
+                                    struct net_device *bridge)
+{
+       struct ocelot *ocelot = ocelot_port->ocelot;
+
+       ocelot->bridge_mask &= ~BIT(ocelot_port->chip_port);
+
+       if (!ocelot->bridge_mask)
+               ocelot->hw_bridge_dev = NULL;
+}
+
+/* Checks if the net_device instance given to us originate from our driver. */
+static bool ocelot_netdevice_dev_check(const struct net_device *dev)
+{
+       return dev->netdev_ops == &ocelot_port_netdev_ops;
+}
+
+static int ocelot_netdevice_port_event(struct net_device *dev,
+                                      unsigned long event,
+                                      struct netdev_notifier_changeupper_info *info)
+{
+       struct ocelot_port *ocelot_port = netdev_priv(dev);
+       int err = 0;
+
+       if (!ocelot_netdevice_dev_check(dev))
+               return 0;
+
+       switch (event) {
+       case NETDEV_CHANGEUPPER:
+               if (netif_is_bridge_master(info->upper_dev)) {
+                       if (info->linking)
+                               err = ocelot_port_bridge_join(ocelot_port,
+                                                             info->upper_dev);
+                       else
+                               ocelot_port_bridge_leave(ocelot_port,
+                                                        info->upper_dev);
+               }
+               break;
+       default:
+               break;
+       }
+
+       return err;
+}
+
+static int ocelot_netdevice_event(struct notifier_block *unused,
+                                 unsigned long event, void *ptr)
+{
+       struct netdev_notifier_changeupper_info *info = ptr;
+       struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+       int ret;
+
+       if (netif_is_lag_master(dev)) {
+               struct net_device *slave;
+               struct list_head *iter;
+
+               netdev_for_each_lower_dev(dev, slave, iter) {
+                       ret = ocelot_netdevice_port_event(slave, event, info);
+                       if (ret)
+                               goto notify;
+               }
+       } else {
+               ret = ocelot_netdevice_port_event(dev, event, info);
+       }
+
+notify:
+       return notifier_from_errno(ret);
+}
+
+struct notifier_block ocelot_netdevice_nb __read_mostly = {
+       .notifier_call = ocelot_netdevice_event,
+};
+EXPORT_SYMBOL(ocelot_netdevice_nb);
+
+int ocelot_probe_port(struct ocelot *ocelot, u8 port,
+                     void __iomem *regs,
+                     struct phy_device *phy)
+{
+       struct ocelot_port *ocelot_port;
+       struct net_device *dev;
+       int err;
+
+       dev = alloc_etherdev(sizeof(struct ocelot_port));
+       if (!dev)
+               return -ENOMEM;
+       SET_NETDEV_DEV(dev, ocelot->dev);
+       ocelot_port = netdev_priv(dev);
+       ocelot_port->dev = dev;
+       ocelot_port->ocelot = ocelot;
+       ocelot_port->regs = regs;
+       ocelot_port->chip_port = port;
+       ocelot_port->phy = phy;
+       INIT_LIST_HEAD(&ocelot_port->mc);
+       ocelot->ports[port] = ocelot_port;
+
+       dev->netdev_ops = &ocelot_port_netdev_ops;
+       dev->ethtool_ops = &ocelot_ethtool_ops;
+       dev->switchdev_ops = &ocelot_port_switchdev_ops;
+
+       memcpy(dev->dev_addr, ocelot->base_mac, ETH_ALEN);
+       dev->dev_addr[ETH_ALEN - 1] += port;
+       ocelot_mact_learn(ocelot, PGID_CPU, dev->dev_addr, ocelot_port->pvid,
+                         ENTRYTYPE_LOCKED);
+
+       err = register_netdev(dev);
+       if (err) {
+               dev_err(ocelot->dev, "register_netdev failed\n");
+               goto err_register_netdev;
+       }
+
+       return 0;
+
+err_register_netdev:
+       free_netdev(dev);
+       return err;
+}
+EXPORT_SYMBOL(ocelot_probe_port);
+
+int ocelot_init(struct ocelot *ocelot)
+{
+       u32 port;
+       int i, cpu = ocelot->num_phys_ports;
+       char queue_name[32];
+
+       ocelot->stats = devm_kcalloc(ocelot->dev,
+                                    ocelot->num_phys_ports * ocelot->num_stats,
+                                    sizeof(u64), GFP_KERNEL);
+       if (!ocelot->stats)
+               return -ENOMEM;
+
+       mutex_init(&ocelot->stats_lock);
+       snprintf(queue_name, sizeof(queue_name), "%s-stats",
+                dev_name(ocelot->dev));
+       ocelot->stats_queue = create_singlethread_workqueue(queue_name);
+       if (!ocelot->stats_queue)
+               return -ENOMEM;
+
+       ocelot_mact_init(ocelot);
+       ocelot_vlan_init(ocelot);
+
+       for (port = 0; port < ocelot->num_phys_ports; port++) {
+               /* Clear all counters (5 groups) */
+               ocelot_write(ocelot, SYS_STAT_CFG_STAT_VIEW(port) |
+                                    SYS_STAT_CFG_STAT_CLEAR_SHOT(0x7f),
+                            SYS_STAT_CFG);
+       }
+
+       /* Only use S-Tag */
+       ocelot_write(ocelot, ETH_P_8021AD, SYS_VLAN_ETYPE_CFG);
+
+       /* Aggregation mode */
+       ocelot_write(ocelot, ANA_AGGR_CFG_AC_SMAC_ENA |
+                            ANA_AGGR_CFG_AC_DMAC_ENA |
+                            ANA_AGGR_CFG_AC_IP4_SIPDIP_ENA |
+                            ANA_AGGR_CFG_AC_IP4_TCPUDP_ENA, ANA_AGGR_CFG);
+
+       /* Set MAC age time to default value. The entry is aged after
+        * 2*AGE_PERIOD
+        */
+       ocelot_write(ocelot,
+                    ANA_AUTOAGE_AGE_PERIOD(BR_DEFAULT_AGEING_TIME / 2 / HZ),
+                    ANA_AUTOAGE);
+
+       /* Disable learning for frames discarded by VLAN ingress filtering */
+       regmap_field_write(ocelot->regfields[ANA_ADVLEARN_VLAN_CHK], 1);
+
+       /* Setup frame ageing - fixed value "2 sec" - in 6.5 us units */
+       ocelot_write(ocelot, SYS_FRM_AGING_AGE_TX_ENA |
+                    SYS_FRM_AGING_MAX_AGE(307692), SYS_FRM_AGING);
+
+       /* Setup flooding PGIDs */
+       ocelot_write_rix(ocelot, ANA_FLOODING_FLD_MULTICAST(PGID_MC) |
+                        ANA_FLOODING_FLD_BROADCAST(PGID_MC) |
+                        ANA_FLOODING_FLD_UNICAST(PGID_UC),
+                        ANA_FLOODING, 0);
+       ocelot_write(ocelot, ANA_FLOODING_IPMC_FLD_MC6_DATA(PGID_MCIPV6) |
+                    ANA_FLOODING_IPMC_FLD_MC6_CTRL(PGID_MC) |
+                    ANA_FLOODING_IPMC_FLD_MC4_DATA(PGID_MCIPV4) |
+                    ANA_FLOODING_IPMC_FLD_MC4_CTRL(PGID_MC),
+                    ANA_FLOODING_IPMC);
+
+       for (port = 0; port < ocelot->num_phys_ports; port++) {
+               /* Transmit the frame to the local port. */
+               ocelot_write_rix(ocelot, BIT(port), ANA_PGID_PGID, port);
+               /* Do not forward BPDU frames to the front ports. */
+               ocelot_write_gix(ocelot,
+                                ANA_PORT_CPU_FWD_BPDU_CFG_BPDU_REDIR_ENA(0xffff),
+                                ANA_PORT_CPU_FWD_BPDU_CFG,
+                                port);
+               /* Ensure bridging is disabled */
+               ocelot_write_rix(ocelot, 0, ANA_PGID_PGID, PGID_SRC + port);
+       }
+
+       /* Configure and enable the CPU port. */
+       ocelot_write_rix(ocelot, 0, ANA_PGID_PGID, cpu);
+       ocelot_write_rix(ocelot, BIT(cpu), ANA_PGID_PGID, PGID_CPU);
+       ocelot_write_gix(ocelot, ANA_PORT_PORT_CFG_RECV_ENA |
+                        ANA_PORT_PORT_CFG_PORTID_VAL(cpu),
+                        ANA_PORT_PORT_CFG, cpu);
+
+       /* Allow broadcast MAC frames. */
+       for (i = ocelot->num_phys_ports + 1; i < PGID_CPU; i++) {
+               u32 val = ANA_PGID_PGID_PGID(GENMASK(ocelot->num_phys_ports - 1, 0));
+
+               ocelot_write_rix(ocelot, val, ANA_PGID_PGID, i);
+       }
+       ocelot_write_rix(ocelot,
+                        ANA_PGID_PGID_PGID(GENMASK(ocelot->num_phys_ports, 0)),
+                        ANA_PGID_PGID, PGID_MC);
+       ocelot_write_rix(ocelot, 0, ANA_PGID_PGID, PGID_MCIPV4);
+       ocelot_write_rix(ocelot, 0, ANA_PGID_PGID, PGID_MCIPV6);
+
+       /* CPU port Injection/Extraction configuration */
+       ocelot_write_rix(ocelot, QSYS_SWITCH_PORT_MODE_INGRESS_DROP_MODE |
+                        QSYS_SWITCH_PORT_MODE_SCH_NEXT_CFG(1) |
+                        QSYS_SWITCH_PORT_MODE_PORT_ENA,
+                        QSYS_SWITCH_PORT_MODE, cpu);
+       ocelot_write_rix(ocelot, SYS_PORT_MODE_INCL_XTR_HDR(1) |
+                        SYS_PORT_MODE_INCL_INJ_HDR(1), SYS_PORT_MODE, cpu);
+       /* Allow manual injection via DEVCPU_QS registers, and byte swap these
+        * registers endianness.
+        */
+       ocelot_write_rix(ocelot, QS_INJ_GRP_CFG_BYTE_SWAP |
+                        QS_INJ_GRP_CFG_MODE(1), QS_INJ_GRP_CFG, 0);
+       ocelot_write_rix(ocelot, QS_XTR_GRP_CFG_BYTE_SWAP |
+                        QS_XTR_GRP_CFG_MODE(1), QS_XTR_GRP_CFG, 0);
+       ocelot_write(ocelot, ANA_CPUQ_CFG_CPUQ_MIRROR(2) |
+                    ANA_CPUQ_CFG_CPUQ_LRN(2) |
+                    ANA_CPUQ_CFG_CPUQ_MAC_COPY(2) |
+                    ANA_CPUQ_CFG_CPUQ_SRC_COPY(2) |
+                    ANA_CPUQ_CFG_CPUQ_LOCKED_PORTMOVE(2) |
+                    ANA_CPUQ_CFG_CPUQ_ALLBRIDGE(6) |
+                    ANA_CPUQ_CFG_CPUQ_IPMC_CTRL(6) |
+                    ANA_CPUQ_CFG_CPUQ_IGMP(6) |
+                    ANA_CPUQ_CFG_CPUQ_MLD(6), ANA_CPUQ_CFG);
+       for (i = 0; i < 16; i++)
+               ocelot_write_rix(ocelot, ANA_CPUQ_8021_CFG_CPUQ_GARP_VAL(6) |
+                                ANA_CPUQ_8021_CFG_CPUQ_BPDU_VAL(6),
+                                ANA_CPUQ_8021_CFG, i);
+
+       INIT_DELAYED_WORK(&ocelot->stats_work, ocelot_check_stats);
+       queue_delayed_work(ocelot->stats_queue, &ocelot->stats_work,
+                          OCELOT_STATS_CHECK_DELAY);
+       return 0;
+}
+EXPORT_SYMBOL(ocelot_init);
+
+void ocelot_deinit(struct ocelot *ocelot)
+{
+       destroy_workqueue(ocelot->stats_queue);
+       mutex_destroy(&ocelot->stats_lock);
+}
+EXPORT_SYMBOL(ocelot_deinit);
+
+MODULE_LICENSE("Dual MIT/GPL");
diff --git a/drivers/net/ethernet/mscc/ocelot.h b/drivers/net/ethernet/mscc/ocelot.h
new file mode 100644 (file)
index 0000000..097bd12
--- /dev/null
@@ -0,0 +1,572 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */
+/*
+ * Microsemi Ocelot Switch driver
+ *
+ * Copyright (c) 2017 Microsemi Corporation
+ */
+
+#ifndef _MSCC_OCELOT_H_
+#define _MSCC_OCELOT_H_
+
+#include <linux/bitops.h>
+#include <linux/etherdevice.h>
+#include <linux/if_vlan.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+
+#include "ocelot_ana.h"
+#include "ocelot_dev.h"
+#include "ocelot_hsio.h"
+#include "ocelot_qsys.h"
+#include "ocelot_rew.h"
+#include "ocelot_sys.h"
+#include "ocelot_qs.h"
+
+#define PGID_AGGR    64
+#define PGID_SRC     80
+
+/* Reserved PGIDs */
+#define PGID_CPU     (PGID_AGGR - 5)
+#define PGID_UC      (PGID_AGGR - 4)
+#define PGID_MC      (PGID_AGGR - 3)
+#define PGID_MCIPV4  (PGID_AGGR - 2)
+#define PGID_MCIPV6  (PGID_AGGR - 1)
+
+#define OCELOT_BUFFER_CELL_SZ 60
+
+#define OCELOT_STATS_CHECK_DELAY (2 * HZ)
+
+#define IFH_LEN 4
+
+struct frame_info {
+       u32 len;
+       u16 port;
+       u16 vid;
+       u8 cpuq;
+       u8 tag_type;
+};
+
+#define IFH_INJ_BYPASS BIT(31)
+#define IFH_INJ_POP_CNT_DISABLE (3 << 28)
+
+#define IFH_TAG_TYPE_C 0
+#define IFH_TAG_TYPE_S 1
+
+#define OCELOT_SPEED_2500 0
+#define OCELOT_SPEED_1000 1
+#define OCELOT_SPEED_100  2
+#define OCELOT_SPEED_10   3
+
+#define TARGET_OFFSET 24
+#define REG_MASK GENMASK(TARGET_OFFSET - 1, 0)
+#define REG(reg, offset) [reg & REG_MASK] = offset
+
+enum ocelot_target {
+       ANA = 1,
+       QS,
+       QSYS,
+       REW,
+       SYS,
+       HSIO,
+       TARGET_MAX,
+};
+
+enum ocelot_reg {
+       ANA_ADVLEARN = ANA << TARGET_OFFSET,
+       ANA_VLANMASK,
+       ANA_PORT_B_DOMAIN,
+       ANA_ANAGEFIL,
+       ANA_ANEVENTS,
+       ANA_STORMLIMIT_BURST,
+       ANA_STORMLIMIT_CFG,
+       ANA_ISOLATED_PORTS,
+       ANA_COMMUNITY_PORTS,
+       ANA_AUTOAGE,
+       ANA_MACTOPTIONS,
+       ANA_LEARNDISC,
+       ANA_AGENCTRL,
+       ANA_MIRRORPORTS,
+       ANA_EMIRRORPORTS,
+       ANA_FLOODING,
+       ANA_FLOODING_IPMC,
+       ANA_SFLOW_CFG,
+       ANA_PORT_MODE,
+       ANA_CUT_THRU_CFG,
+       ANA_PGID_PGID,
+       ANA_TABLES_ANMOVED,
+       ANA_TABLES_MACHDATA,
+       ANA_TABLES_MACLDATA,
+       ANA_TABLES_STREAMDATA,
+       ANA_TABLES_MACACCESS,
+       ANA_TABLES_MACTINDX,
+       ANA_TABLES_VLANACCESS,
+       ANA_TABLES_VLANTIDX,
+       ANA_TABLES_ISDXACCESS,
+       ANA_TABLES_ISDXTIDX,
+       ANA_TABLES_ENTRYLIM,
+       ANA_TABLES_PTP_ID_HIGH,
+       ANA_TABLES_PTP_ID_LOW,
+       ANA_TABLES_STREAMACCESS,
+       ANA_TABLES_STREAMTIDX,
+       ANA_TABLES_SEQ_HISTORY,
+       ANA_TABLES_SEQ_MASK,
+       ANA_TABLES_SFID_MASK,
+       ANA_TABLES_SFIDACCESS,
+       ANA_TABLES_SFIDTIDX,
+       ANA_MSTI_STATE,
+       ANA_OAM_UPM_LM_CNT,
+       ANA_SG_ACCESS_CTRL,
+       ANA_SG_CONFIG_REG_1,
+       ANA_SG_CONFIG_REG_2,
+       ANA_SG_CONFIG_REG_3,
+       ANA_SG_CONFIG_REG_4,
+       ANA_SG_CONFIG_REG_5,
+       ANA_SG_GCL_GS_CONFIG,
+       ANA_SG_GCL_TI_CONFIG,
+       ANA_SG_STATUS_REG_1,
+       ANA_SG_STATUS_REG_2,
+       ANA_SG_STATUS_REG_3,
+       ANA_PORT_VLAN_CFG,
+       ANA_PORT_DROP_CFG,
+       ANA_PORT_QOS_CFG,
+       ANA_PORT_VCAP_CFG,
+       ANA_PORT_VCAP_S1_KEY_CFG,
+       ANA_PORT_VCAP_S2_CFG,
+       ANA_PORT_PCP_DEI_MAP,
+       ANA_PORT_CPU_FWD_CFG,
+       ANA_PORT_CPU_FWD_BPDU_CFG,
+       ANA_PORT_CPU_FWD_GARP_CFG,
+       ANA_PORT_CPU_FWD_CCM_CFG,
+       ANA_PORT_PORT_CFG,
+       ANA_PORT_POL_CFG,
+       ANA_PORT_PTP_CFG,
+       ANA_PORT_PTP_DLY1_CFG,
+       ANA_PORT_PTP_DLY2_CFG,
+       ANA_PORT_SFID_CFG,
+       ANA_PFC_PFC_CFG,
+       ANA_PFC_PFC_TIMER,
+       ANA_IPT_OAM_MEP_CFG,
+       ANA_IPT_IPT,
+       ANA_PPT_PPT,
+       ANA_FID_MAP_FID_MAP,
+       ANA_AGGR_CFG,
+       ANA_CPUQ_CFG,
+       ANA_CPUQ_CFG2,
+       ANA_CPUQ_8021_CFG,
+       ANA_DSCP_CFG,
+       ANA_DSCP_REWR_CFG,
+       ANA_VCAP_RNG_TYPE_CFG,
+       ANA_VCAP_RNG_VAL_CFG,
+       ANA_VRAP_CFG,
+       ANA_VRAP_HDR_DATA,
+       ANA_VRAP_HDR_MASK,
+       ANA_DISCARD_CFG,
+       ANA_FID_CFG,
+       ANA_POL_PIR_CFG,
+       ANA_POL_CIR_CFG,
+       ANA_POL_MODE_CFG,
+       ANA_POL_PIR_STATE,
+       ANA_POL_CIR_STATE,
+       ANA_POL_STATE,
+       ANA_POL_FLOWC,
+       ANA_POL_HYST,
+       ANA_POL_MISC_CFG,
+       QS_XTR_GRP_CFG = QS << TARGET_OFFSET,
+       QS_XTR_RD,
+       QS_XTR_FRM_PRUNING,
+       QS_XTR_FLUSH,
+       QS_XTR_DATA_PRESENT,
+       QS_XTR_CFG,
+       QS_INJ_GRP_CFG,
+       QS_INJ_WR,
+       QS_INJ_CTRL,
+       QS_INJ_STATUS,
+       QS_INJ_ERR,
+       QS_INH_DBG,
+       QSYS_PORT_MODE = QSYS << TARGET_OFFSET,
+       QSYS_SWITCH_PORT_MODE,
+       QSYS_STAT_CNT_CFG,
+       QSYS_EEE_CFG,
+       QSYS_EEE_THRES,
+       QSYS_IGR_NO_SHARING,
+       QSYS_EGR_NO_SHARING,
+       QSYS_SW_STATUS,
+       QSYS_EXT_CPU_CFG,
+       QSYS_PAD_CFG,
+       QSYS_CPU_GROUP_MAP,
+       QSYS_QMAP,
+       QSYS_ISDX_SGRP,
+       QSYS_TIMED_FRAME_ENTRY,
+       QSYS_TFRM_MISC,
+       QSYS_TFRM_PORT_DLY,
+       QSYS_TFRM_TIMER_CFG_1,
+       QSYS_TFRM_TIMER_CFG_2,
+       QSYS_TFRM_TIMER_CFG_3,
+       QSYS_TFRM_TIMER_CFG_4,
+       QSYS_TFRM_TIMER_CFG_5,
+       QSYS_TFRM_TIMER_CFG_6,
+       QSYS_TFRM_TIMER_CFG_7,
+       QSYS_TFRM_TIMER_CFG_8,
+       QSYS_RED_PROFILE,
+       QSYS_RES_QOS_MODE,
+       QSYS_RES_CFG,
+       QSYS_RES_STAT,
+       QSYS_EGR_DROP_MODE,
+       QSYS_EQ_CTRL,
+       QSYS_EVENTS_CORE,
+       QSYS_QMAXSDU_CFG_0,
+       QSYS_QMAXSDU_CFG_1,
+       QSYS_QMAXSDU_CFG_2,
+       QSYS_QMAXSDU_CFG_3,
+       QSYS_QMAXSDU_CFG_4,
+       QSYS_QMAXSDU_CFG_5,
+       QSYS_QMAXSDU_CFG_6,
+       QSYS_QMAXSDU_CFG_7,
+       QSYS_PREEMPTION_CFG,
+       QSYS_CIR_CFG,
+       QSYS_EIR_CFG,
+       QSYS_SE_CFG,
+       QSYS_SE_DWRR_CFG,
+       QSYS_SE_CONNECT,
+       QSYS_SE_DLB_SENSE,
+       QSYS_CIR_STATE,
+       QSYS_EIR_STATE,
+       QSYS_SE_STATE,
+       QSYS_HSCH_MISC_CFG,
+       QSYS_TAG_CONFIG,
+       QSYS_TAS_PARAM_CFG_CTRL,
+       QSYS_PORT_MAX_SDU,
+       QSYS_PARAM_CFG_REG_1,
+       QSYS_PARAM_CFG_REG_2,
+       QSYS_PARAM_CFG_REG_3,
+       QSYS_PARAM_CFG_REG_4,
+       QSYS_PARAM_CFG_REG_5,
+       QSYS_GCL_CFG_REG_1,
+       QSYS_GCL_CFG_REG_2,
+       QSYS_PARAM_STATUS_REG_1,
+       QSYS_PARAM_STATUS_REG_2,
+       QSYS_PARAM_STATUS_REG_3,
+       QSYS_PARAM_STATUS_REG_4,
+       QSYS_PARAM_STATUS_REG_5,
+       QSYS_PARAM_STATUS_REG_6,
+       QSYS_PARAM_STATUS_REG_7,
+       QSYS_PARAM_STATUS_REG_8,
+       QSYS_PARAM_STATUS_REG_9,
+       QSYS_GCL_STATUS_REG_1,
+       QSYS_GCL_STATUS_REG_2,
+       REW_PORT_VLAN_CFG = REW << TARGET_OFFSET,
+       REW_TAG_CFG,
+       REW_PORT_CFG,
+       REW_DSCP_CFG,
+       REW_PCP_DEI_QOS_MAP_CFG,
+       REW_PTP_CFG,
+       REW_PTP_DLY1_CFG,
+       REW_RED_TAG_CFG,
+       REW_DSCP_REMAP_DP1_CFG,
+       REW_DSCP_REMAP_CFG,
+       REW_STAT_CFG,
+       REW_REW_STICKY,
+       REW_PPT,
+       SYS_COUNT_RX_OCTETS = SYS << TARGET_OFFSET,
+       SYS_COUNT_RX_UNICAST,
+       SYS_COUNT_RX_MULTICAST,
+       SYS_COUNT_RX_BROADCAST,
+       SYS_COUNT_RX_SHORTS,
+       SYS_COUNT_RX_FRAGMENTS,
+       SYS_COUNT_RX_JABBERS,
+       SYS_COUNT_RX_CRC_ALIGN_ERRS,
+       SYS_COUNT_RX_SYM_ERRS,
+       SYS_COUNT_RX_64,
+       SYS_COUNT_RX_65_127,
+       SYS_COUNT_RX_128_255,
+       SYS_COUNT_RX_256_1023,
+       SYS_COUNT_RX_1024_1526,
+       SYS_COUNT_RX_1527_MAX,
+       SYS_COUNT_RX_PAUSE,
+       SYS_COUNT_RX_CONTROL,
+       SYS_COUNT_RX_LONGS,
+       SYS_COUNT_RX_CLASSIFIED_DROPS,
+       SYS_COUNT_TX_OCTETS,
+       SYS_COUNT_TX_UNICAST,
+       SYS_COUNT_TX_MULTICAST,
+       SYS_COUNT_TX_BROADCAST,
+       SYS_COUNT_TX_COLLISION,
+       SYS_COUNT_TX_DROPS,
+       SYS_COUNT_TX_PAUSE,
+       SYS_COUNT_TX_64,
+       SYS_COUNT_TX_65_127,
+       SYS_COUNT_TX_128_511,
+       SYS_COUNT_TX_512_1023,
+       SYS_COUNT_TX_1024_1526,
+       SYS_COUNT_TX_1527_MAX,
+       SYS_COUNT_TX_AGING,
+       SYS_RESET_CFG,
+       SYS_SR_ETYPE_CFG,
+       SYS_VLAN_ETYPE_CFG,
+       SYS_PORT_MODE,
+       SYS_FRONT_PORT_MODE,
+       SYS_FRM_AGING,
+       SYS_STAT_CFG,
+       SYS_SW_STATUS,
+       SYS_MISC_CFG,
+       SYS_REW_MAC_HIGH_CFG,
+       SYS_REW_MAC_LOW_CFG,
+       SYS_TIMESTAMP_OFFSET,
+       SYS_CMID,
+       SYS_PAUSE_CFG,
+       SYS_PAUSE_TOT_CFG,
+       SYS_ATOP,
+       SYS_ATOP_TOT_CFG,
+       SYS_MAC_FC_CFG,
+       SYS_MMGT,
+       SYS_MMGT_FAST,
+       SYS_EVENTS_DIF,
+       SYS_EVENTS_CORE,
+       SYS_CNT,
+       SYS_PTP_STATUS,
+       SYS_PTP_TXSTAMP,
+       SYS_PTP_NXT,
+       SYS_PTP_CFG,
+       SYS_RAM_INIT,
+       SYS_CM_ADDR,
+       SYS_CM_DATA_WR,
+       SYS_CM_DATA_RD,
+       SYS_CM_OP,
+       SYS_CM_DATA,
+       HSIO_PLL5G_CFG0 = HSIO << TARGET_OFFSET,
+       HSIO_PLL5G_CFG1,
+       HSIO_PLL5G_CFG2,
+       HSIO_PLL5G_CFG3,
+       HSIO_PLL5G_CFG4,
+       HSIO_PLL5G_CFG5,
+       HSIO_PLL5G_CFG6,
+       HSIO_PLL5G_STATUS0,
+       HSIO_PLL5G_STATUS1,
+       HSIO_PLL5G_BIST_CFG0,
+       HSIO_PLL5G_BIST_CFG1,
+       HSIO_PLL5G_BIST_CFG2,
+       HSIO_PLL5G_BIST_STAT0,
+       HSIO_PLL5G_BIST_STAT1,
+       HSIO_RCOMP_CFG0,
+       HSIO_RCOMP_STATUS,
+       HSIO_SYNC_ETH_CFG,
+       HSIO_SYNC_ETH_PLL_CFG,
+       HSIO_S1G_DES_CFG,
+       HSIO_S1G_IB_CFG,
+       HSIO_S1G_OB_CFG,
+       HSIO_S1G_SER_CFG,
+       HSIO_S1G_COMMON_CFG,
+       HSIO_S1G_PLL_CFG,
+       HSIO_S1G_PLL_STATUS,
+       HSIO_S1G_DFT_CFG0,
+       HSIO_S1G_DFT_CFG1,
+       HSIO_S1G_DFT_CFG2,
+       HSIO_S1G_TP_CFG,
+       HSIO_S1G_RC_PLL_BIST_CFG,
+       HSIO_S1G_MISC_CFG,
+       HSIO_S1G_DFT_STATUS,
+       HSIO_S1G_MISC_STATUS,
+       HSIO_MCB_S1G_ADDR_CFG,
+       HSIO_S6G_DIG_CFG,
+       HSIO_S6G_DFT_CFG0,
+       HSIO_S6G_DFT_CFG1,
+       HSIO_S6G_DFT_CFG2,
+       HSIO_S6G_TP_CFG0,
+       HSIO_S6G_TP_CFG1,
+       HSIO_S6G_RC_PLL_BIST_CFG,
+       HSIO_S6G_MISC_CFG,
+       HSIO_S6G_OB_ANEG_CFG,
+       HSIO_S6G_DFT_STATUS,
+       HSIO_S6G_ERR_CNT,
+       HSIO_S6G_MISC_STATUS,
+       HSIO_S6G_DES_CFG,
+       HSIO_S6G_IB_CFG,
+       HSIO_S6G_IB_CFG1,
+       HSIO_S6G_IB_CFG2,
+       HSIO_S6G_IB_CFG3,
+       HSIO_S6G_IB_CFG4,
+       HSIO_S6G_IB_CFG5,
+       HSIO_S6G_OB_CFG,
+       HSIO_S6G_OB_CFG1,
+       HSIO_S6G_SER_CFG,
+       HSIO_S6G_COMMON_CFG,
+       HSIO_S6G_PLL_CFG,
+       HSIO_S6G_ACJTAG_CFG,
+       HSIO_S6G_GP_CFG,
+       HSIO_S6G_IB_STATUS0,
+       HSIO_S6G_IB_STATUS1,
+       HSIO_S6G_ACJTAG_STATUS,
+       HSIO_S6G_PLL_STATUS,
+       HSIO_S6G_REVID,
+       HSIO_MCB_S6G_ADDR_CFG,
+       HSIO_HW_CFG,
+       HSIO_HW_QSGMII_CFG,
+       HSIO_HW_QSGMII_STAT,
+       HSIO_CLK_CFG,
+       HSIO_TEMP_SENSOR_CTRL,
+       HSIO_TEMP_SENSOR_CFG,
+       HSIO_TEMP_SENSOR_STAT,
+};
+
+enum ocelot_regfield {
+       ANA_ADVLEARN_VLAN_CHK,
+       ANA_ADVLEARN_LEARN_MIRROR,
+       ANA_ANEVENTS_FLOOD_DISCARD,
+       ANA_ANEVENTS_MSTI_DROP,
+       ANA_ANEVENTS_ACLKILL,
+       ANA_ANEVENTS_ACLUSED,
+       ANA_ANEVENTS_AUTOAGE,
+       ANA_ANEVENTS_VS2TTL1,
+       ANA_ANEVENTS_STORM_DROP,
+       ANA_ANEVENTS_LEARN_DROP,
+       ANA_ANEVENTS_AGED_ENTRY,
+       ANA_ANEVENTS_CPU_LEARN_FAILED,
+       ANA_ANEVENTS_AUTO_LEARN_FAILED,
+       ANA_ANEVENTS_LEARN_REMOVE,
+       ANA_ANEVENTS_AUTO_LEARNED,
+       ANA_ANEVENTS_AUTO_MOVED,
+       ANA_ANEVENTS_DROPPED,
+       ANA_ANEVENTS_CLASSIFIED_DROP,
+       ANA_ANEVENTS_CLASSIFIED_COPY,
+       ANA_ANEVENTS_VLAN_DISCARD,
+       ANA_ANEVENTS_FWD_DISCARD,
+       ANA_ANEVENTS_MULTICAST_FLOOD,
+       ANA_ANEVENTS_UNICAST_FLOOD,
+       ANA_ANEVENTS_DEST_KNOWN,
+       ANA_ANEVENTS_BUCKET3_MATCH,
+       ANA_ANEVENTS_BUCKET2_MATCH,
+       ANA_ANEVENTS_BUCKET1_MATCH,
+       ANA_ANEVENTS_BUCKET0_MATCH,
+       ANA_ANEVENTS_CPU_OPERATION,
+       ANA_ANEVENTS_DMAC_LOOKUP,
+       ANA_ANEVENTS_SMAC_LOOKUP,
+       ANA_ANEVENTS_SEQ_GEN_ERR_0,
+       ANA_ANEVENTS_SEQ_GEN_ERR_1,
+       ANA_TABLES_MACACCESS_B_DOM,
+       ANA_TABLES_MACTINDX_BUCKET,
+       ANA_TABLES_MACTINDX_M_INDEX,
+       QSYS_TIMED_FRAME_ENTRY_TFRM_VLD,
+       QSYS_TIMED_FRAME_ENTRY_TFRM_FP,
+       QSYS_TIMED_FRAME_ENTRY_TFRM_PORTNO,
+       QSYS_TIMED_FRAME_ENTRY_TFRM_TM_SEL,
+       QSYS_TIMED_FRAME_ENTRY_TFRM_TM_T,
+       SYS_RESET_CFG_CORE_ENA,
+       SYS_RESET_CFG_MEM_ENA,
+       SYS_RESET_CFG_MEM_INIT,
+       REGFIELD_MAX
+};
+
+struct ocelot_multicast {
+       struct list_head list;
+       unsigned char addr[ETH_ALEN];
+       u16 vid;
+       u16 ports;
+};
+
+struct ocelot_port;
+
+struct ocelot_stat_layout {
+       u32 offset;
+       char name[ETH_GSTRING_LEN];
+};
+
+struct ocelot {
+       struct device *dev;
+
+       struct regmap *targets[TARGET_MAX];
+       struct regmap_field *regfields[REGFIELD_MAX];
+       const u32 *const *map;
+       const struct ocelot_stat_layout *stats_layout;
+       unsigned int num_stats;
+
+       u8 base_mac[ETH_ALEN];
+
+       struct net_device *hw_bridge_dev;
+       u16 bridge_mask;
+       u16 bridge_fwd_mask;
+
+       struct workqueue_struct *ocelot_owq;
+
+       int shared_queue_sz;
+
+       u8 num_phys_ports;
+       u8 num_cpu_ports;
+       struct ocelot_port **ports;
+
+       u16 lags[16];
+
+       /* Keep track of the vlan port masks */
+       u32 vlan_mask[VLAN_N_VID];
+
+       struct list_head multicast;
+
+       /* Workqueue to check statistics for overflow with its lock */
+       struct mutex stats_lock;
+       u64 *stats;
+       struct delayed_work stats_work;
+       struct workqueue_struct *stats_queue;
+};
+
+struct ocelot_port {
+       struct net_device *dev;
+       struct ocelot *ocelot;
+       struct phy_device *phy;
+       void __iomem *regs;
+       u8 chip_port;
+       /* Keep a track of the mc addresses added to the mac table, so that they
+        * can be removed when needed.
+        */
+       struct list_head mc;
+
+       /* Ingress default VLAN (pvid) */
+       u16 pvid;
+
+       /* Egress default VLAN (vid) */
+       u16 vid;
+
+       u8 vlan_aware;
+
+       u64 *stats;
+};
+
+u32 __ocelot_read_ix(struct ocelot *ocelot, u32 reg, u32 offset);
+#define ocelot_read_ix(ocelot, reg, gi, ri) __ocelot_read_ix(ocelot, reg, reg##_GSZ * (gi) + reg##_RSZ * (ri))
+#define ocelot_read_gix(ocelot, reg, gi) __ocelot_read_ix(ocelot, reg, reg##_GSZ * (gi))
+#define ocelot_read_rix(ocelot, reg, ri) __ocelot_read_ix(ocelot, reg, reg##_RSZ * (ri))
+#define ocelot_read(ocelot, reg) __ocelot_read_ix(ocelot, reg, 0)
+
+void __ocelot_write_ix(struct ocelot *ocelot, u32 val, u32 reg, u32 offset);
+#define ocelot_write_ix(ocelot, val, reg, gi, ri) __ocelot_write_ix(ocelot, val, reg, reg##_GSZ * (gi) + reg##_RSZ * (ri))
+#define ocelot_write_gix(ocelot, val, reg, gi) __ocelot_write_ix(ocelot, val, reg, reg##_GSZ * (gi))
+#define ocelot_write_rix(ocelot, val, reg, ri) __ocelot_write_ix(ocelot, val, reg, reg##_RSZ * (ri))
+#define ocelot_write(ocelot, val, reg) __ocelot_write_ix(ocelot, val, reg, 0)
+
+void __ocelot_rmw_ix(struct ocelot *ocelot, u32 val, u32 reg, u32 mask,
+                    u32 offset);
+#define ocelot_rmw_ix(ocelot, val, m, reg, gi, ri) __ocelot_rmw_ix(ocelot, val, m, reg, reg##_GSZ * (gi) + reg##_RSZ * (ri))
+#define ocelot_rmw_gix(ocelot, val, m, reg, gi) __ocelot_rmw_ix(ocelot, val, m, reg, reg##_GSZ * (gi))
+#define ocelot_rmw_rix(ocelot, val, m, reg, ri) __ocelot_rmw_ix(ocelot, val, m, reg, reg##_RSZ * (ri))
+#define ocelot_rmw(ocelot, val, m, reg) __ocelot_rmw_ix(ocelot, val, m, reg, 0)
+
+u32 ocelot_port_readl(struct ocelot_port *port, u32 reg);
+void ocelot_port_writel(struct ocelot_port *port, u32 val, u32 reg);
+
+int ocelot_regfields_init(struct ocelot *ocelot,
+                         const struct reg_field *const regfields);
+struct regmap *ocelot_io_platform_init(struct ocelot *ocelot,
+                                      struct platform_device *pdev,
+                                      const char *name);
+
+#define ocelot_field_write(ocelot, reg, val) regmap_field_write((ocelot)->regfields[(reg)], (val))
+#define ocelot_field_read(ocelot, reg, val) regmap_field_read((ocelot)->regfields[(reg)], (val))
+
+int ocelot_init(struct ocelot *ocelot);
+void ocelot_deinit(struct ocelot *ocelot);
+int ocelot_chip_init(struct ocelot *ocelot);
+int ocelot_probe_port(struct ocelot *ocelot, u8 port,
+                     void __iomem *regs,
+                     struct phy_device *phy);
+
+extern struct notifier_block ocelot_netdevice_nb;
+
+#endif
diff --git a/drivers/net/ethernet/mscc/ocelot_ana.h b/drivers/net/ethernet/mscc/ocelot_ana.h
new file mode 100644 (file)
index 0000000..841c6ec
--- /dev/null
@@ -0,0 +1,625 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */
+/*
+ * Microsemi Ocelot Switch driver
+ *
+ * Copyright (c) 2017 Microsemi Corporation
+ */
+
+#ifndef _MSCC_OCELOT_ANA_H_
+#define _MSCC_OCELOT_ANA_H_
+
+#define ANA_ANAGEFIL_B_DOM_EN                             BIT(22)
+#define ANA_ANAGEFIL_B_DOM_VAL                            BIT(21)
+#define ANA_ANAGEFIL_AGE_LOCKED                           BIT(20)
+#define ANA_ANAGEFIL_PID_EN                               BIT(19)
+#define ANA_ANAGEFIL_PID_VAL(x)                           (((x) << 14) & GENMASK(18, 14))
+#define ANA_ANAGEFIL_PID_VAL_M                            GENMASK(18, 14)
+#define ANA_ANAGEFIL_PID_VAL_X(x)                         (((x) & GENMASK(18, 14)) >> 14)
+#define ANA_ANAGEFIL_VID_EN                               BIT(13)
+#define ANA_ANAGEFIL_VID_VAL(x)                           ((x) & GENMASK(12, 0))
+#define ANA_ANAGEFIL_VID_VAL_M                            GENMASK(12, 0)
+
+#define ANA_STORMLIMIT_CFG_RSZ                            0x4
+
+#define ANA_STORMLIMIT_CFG_STORM_RATE(x)                  (((x) << 3) & GENMASK(6, 3))
+#define ANA_STORMLIMIT_CFG_STORM_RATE_M                   GENMASK(6, 3)
+#define ANA_STORMLIMIT_CFG_STORM_RATE_X(x)                (((x) & GENMASK(6, 3)) >> 3)
+#define ANA_STORMLIMIT_CFG_STORM_UNIT                     BIT(2)
+#define ANA_STORMLIMIT_CFG_STORM_MODE(x)                  ((x) & GENMASK(1, 0))
+#define ANA_STORMLIMIT_CFG_STORM_MODE_M                   GENMASK(1, 0)
+
+#define ANA_AUTOAGE_AGE_FAST                              BIT(21)
+#define ANA_AUTOAGE_AGE_PERIOD(x)                         (((x) << 1) & GENMASK(20, 1))
+#define ANA_AUTOAGE_AGE_PERIOD_M                          GENMASK(20, 1)
+#define ANA_AUTOAGE_AGE_PERIOD_X(x)                       (((x) & GENMASK(20, 1)) >> 1)
+#define ANA_AUTOAGE_AUTOAGE_LOCKED                        BIT(0)
+
+#define ANA_MACTOPTIONS_REDUCED_TABLE                     BIT(1)
+#define ANA_MACTOPTIONS_SHADOW                            BIT(0)
+
+#define ANA_AGENCTRL_FID_MASK(x)                          (((x) << 12) & GENMASK(23, 12))
+#define ANA_AGENCTRL_FID_MASK_M                           GENMASK(23, 12)
+#define ANA_AGENCTRL_FID_MASK_X(x)                        (((x) & GENMASK(23, 12)) >> 12)
+#define ANA_AGENCTRL_IGNORE_DMAC_FLAGS                    BIT(11)
+#define ANA_AGENCTRL_IGNORE_SMAC_FLAGS                    BIT(10)
+#define ANA_AGENCTRL_FLOOD_SPECIAL                        BIT(9)
+#define ANA_AGENCTRL_FLOOD_IGNORE_VLAN                    BIT(8)
+#define ANA_AGENCTRL_MIRROR_CPU                           BIT(7)
+#define ANA_AGENCTRL_LEARN_CPU_COPY                       BIT(6)
+#define ANA_AGENCTRL_LEARN_FWD_KILL                       BIT(5)
+#define ANA_AGENCTRL_LEARN_IGNORE_VLAN                    BIT(4)
+#define ANA_AGENCTRL_CPU_CPU_KILL_ENA                     BIT(3)
+#define ANA_AGENCTRL_GREEN_COUNT_MODE                     BIT(2)
+#define ANA_AGENCTRL_YELLOW_COUNT_MODE                    BIT(1)
+#define ANA_AGENCTRL_RED_COUNT_MODE                       BIT(0)
+
+#define ANA_FLOODING_RSZ                                  0x4
+
+#define ANA_FLOODING_FLD_UNICAST(x)                       (((x) << 12) & GENMASK(17, 12))
+#define ANA_FLOODING_FLD_UNICAST_M                        GENMASK(17, 12)
+#define ANA_FLOODING_FLD_UNICAST_X(x)                     (((x) & GENMASK(17, 12)) >> 12)
+#define ANA_FLOODING_FLD_BROADCAST(x)                     (((x) << 6) & GENMASK(11, 6))
+#define ANA_FLOODING_FLD_BROADCAST_M                      GENMASK(11, 6)
+#define ANA_FLOODING_FLD_BROADCAST_X(x)                   (((x) & GENMASK(11, 6)) >> 6)
+#define ANA_FLOODING_FLD_MULTICAST(x)                     ((x) & GENMASK(5, 0))
+#define ANA_FLOODING_FLD_MULTICAST_M                      GENMASK(5, 0)
+
+#define ANA_FLOODING_IPMC_FLD_MC4_CTRL(x)                 (((x) << 18) & GENMASK(23, 18))
+#define ANA_FLOODING_IPMC_FLD_MC4_CTRL_M                  GENMASK(23, 18)
+#define ANA_FLOODING_IPMC_FLD_MC4_CTRL_X(x)               (((x) & GENMASK(23, 18)) >> 18)
+#define ANA_FLOODING_IPMC_FLD_MC4_DATA(x)                 (((x) << 12) & GENMASK(17, 12))
+#define ANA_FLOODING_IPMC_FLD_MC4_DATA_M                  GENMASK(17, 12)
+#define ANA_FLOODING_IPMC_FLD_MC4_DATA_X(x)               (((x) & GENMASK(17, 12)) >> 12)
+#define ANA_FLOODING_IPMC_FLD_MC6_CTRL(x)                 (((x) << 6) & GENMASK(11, 6))
+#define ANA_FLOODING_IPMC_FLD_MC6_CTRL_M                  GENMASK(11, 6)
+#define ANA_FLOODING_IPMC_FLD_MC6_CTRL_X(x)               (((x) & GENMASK(11, 6)) >> 6)
+#define ANA_FLOODING_IPMC_FLD_MC6_DATA(x)                 ((x) & GENMASK(5, 0))
+#define ANA_FLOODING_IPMC_FLD_MC6_DATA_M                  GENMASK(5, 0)
+
+#define ANA_SFLOW_CFG_RSZ                                 0x4
+
+#define ANA_SFLOW_CFG_SF_RATE(x)                          (((x) << 2) & GENMASK(13, 2))
+#define ANA_SFLOW_CFG_SF_RATE_M                           GENMASK(13, 2)
+#define ANA_SFLOW_CFG_SF_RATE_X(x)                        (((x) & GENMASK(13, 2)) >> 2)
+#define ANA_SFLOW_CFG_SF_SAMPLE_RX                        BIT(1)
+#define ANA_SFLOW_CFG_SF_SAMPLE_TX                        BIT(0)
+
+#define ANA_PORT_MODE_RSZ                                 0x4
+
+#define ANA_PORT_MODE_REDTAG_PARSE_CFG                    BIT(3)
+#define ANA_PORT_MODE_VLAN_PARSE_CFG(x)                   (((x) << 1) & GENMASK(2, 1))
+#define ANA_PORT_MODE_VLAN_PARSE_CFG_M                    GENMASK(2, 1)
+#define ANA_PORT_MODE_VLAN_PARSE_CFG_X(x)                 (((x) & GENMASK(2, 1)) >> 1)
+#define ANA_PORT_MODE_L3_PARSE_CFG                        BIT(0)
+
+#define ANA_CUT_THRU_CFG_RSZ                              0x4
+
+#define ANA_PGID_PGID_RSZ                                 0x4
+
+#define ANA_PGID_PGID_PGID(x)                             ((x) & GENMASK(11, 0))
+#define ANA_PGID_PGID_PGID_M                              GENMASK(11, 0)
+#define ANA_PGID_PGID_CPUQ_DST_PGID(x)                    (((x) << 27) & GENMASK(29, 27))
+#define ANA_PGID_PGID_CPUQ_DST_PGID_M                     GENMASK(29, 27)
+#define ANA_PGID_PGID_CPUQ_DST_PGID_X(x)                  (((x) & GENMASK(29, 27)) >> 27)
+
+#define ANA_TABLES_MACHDATA_VID(x)                        (((x) << 16) & GENMASK(28, 16))
+#define ANA_TABLES_MACHDATA_VID_M                         GENMASK(28, 16)
+#define ANA_TABLES_MACHDATA_VID_X(x)                      (((x) & GENMASK(28, 16)) >> 16)
+#define ANA_TABLES_MACHDATA_MACHDATA(x)                   ((x) & GENMASK(15, 0))
+#define ANA_TABLES_MACHDATA_MACHDATA_M                    GENMASK(15, 0)
+
+#define ANA_TABLES_STREAMDATA_SSID_VALID                  BIT(16)
+#define ANA_TABLES_STREAMDATA_SSID(x)                     (((x) << 9) & GENMASK(15, 9))
+#define ANA_TABLES_STREAMDATA_SSID_M                      GENMASK(15, 9)
+#define ANA_TABLES_STREAMDATA_SSID_X(x)                   (((x) & GENMASK(15, 9)) >> 9)
+#define ANA_TABLES_STREAMDATA_SFID_VALID                  BIT(8)
+#define ANA_TABLES_STREAMDATA_SFID(x)                     ((x) & GENMASK(7, 0))
+#define ANA_TABLES_STREAMDATA_SFID_M                      GENMASK(7, 0)
+
+#define ANA_TABLES_MACACCESS_MAC_CPU_COPY                 BIT(15)
+#define ANA_TABLES_MACACCESS_SRC_KILL                     BIT(14)
+#define ANA_TABLES_MACACCESS_IGNORE_VLAN                  BIT(13)
+#define ANA_TABLES_MACACCESS_AGED_FLAG                    BIT(12)
+#define ANA_TABLES_MACACCESS_VALID                        BIT(11)
+#define ANA_TABLES_MACACCESS_ENTRYTYPE(x)                 (((x) << 9) & GENMASK(10, 9))
+#define ANA_TABLES_MACACCESS_ENTRYTYPE_M                  GENMASK(10, 9)
+#define ANA_TABLES_MACACCESS_ENTRYTYPE_X(x)               (((x) & GENMASK(10, 9)) >> 9)
+#define ANA_TABLES_MACACCESS_DEST_IDX(x)                  (((x) << 3) & GENMASK(8, 3))
+#define ANA_TABLES_MACACCESS_DEST_IDX_M                   GENMASK(8, 3)
+#define ANA_TABLES_MACACCESS_DEST_IDX_X(x)                (((x) & GENMASK(8, 3)) >> 3)
+#define ANA_TABLES_MACACCESS_MAC_TABLE_CMD(x)             ((x) & GENMASK(2, 0))
+#define ANA_TABLES_MACACCESS_MAC_TABLE_CMD_M              GENMASK(2, 0)
+#define MACACCESS_CMD_IDLE                     0
+#define MACACCESS_CMD_LEARN                    1
+#define MACACCESS_CMD_FORGET                   2
+#define MACACCESS_CMD_AGE                      3
+#define MACACCESS_CMD_GET_NEXT                 4
+#define MACACCESS_CMD_INIT                     5
+#define MACACCESS_CMD_READ                     6
+#define MACACCESS_CMD_WRITE                    7
+
+#define ANA_TABLES_VLANACCESS_VLAN_PORT_MASK(x)           (((x) << 2) & GENMASK(13, 2))
+#define ANA_TABLES_VLANACCESS_VLAN_PORT_MASK_M            GENMASK(13, 2)
+#define ANA_TABLES_VLANACCESS_VLAN_PORT_MASK_X(x)         (((x) & GENMASK(13, 2)) >> 2)
+#define ANA_TABLES_VLANACCESS_VLAN_TBL_CMD(x)             ((x) & GENMASK(1, 0))
+#define ANA_TABLES_VLANACCESS_VLAN_TBL_CMD_M              GENMASK(1, 0)
+#define ANA_TABLES_VLANACCESS_CMD_IDLE                    0x0
+#define ANA_TABLES_VLANACCESS_CMD_WRITE                   0x2
+#define ANA_TABLES_VLANACCESS_CMD_INIT                    0x3
+
+#define ANA_TABLES_VLANTIDX_VLAN_SEC_FWD_ENA              BIT(17)
+#define ANA_TABLES_VLANTIDX_VLAN_FLOOD_DIS                BIT(16)
+#define ANA_TABLES_VLANTIDX_VLAN_PRIV_VLAN                BIT(15)
+#define ANA_TABLES_VLANTIDX_VLAN_LEARN_DISABLED           BIT(14)
+#define ANA_TABLES_VLANTIDX_VLAN_MIRROR                   BIT(13)
+#define ANA_TABLES_VLANTIDX_VLAN_SRC_CHK                  BIT(12)
+#define ANA_TABLES_VLANTIDX_V_INDEX(x)                    ((x) & GENMASK(11, 0))
+#define ANA_TABLES_VLANTIDX_V_INDEX_M                     GENMASK(11, 0)
+
+#define ANA_TABLES_ISDXACCESS_ISDX_PORT_MASK(x)           (((x) << 2) & GENMASK(8, 2))
+#define ANA_TABLES_ISDXACCESS_ISDX_PORT_MASK_M            GENMASK(8, 2)
+#define ANA_TABLES_ISDXACCESS_ISDX_PORT_MASK_X(x)         (((x) & GENMASK(8, 2)) >> 2)
+#define ANA_TABLES_ISDXACCESS_ISDX_TBL_CMD(x)             ((x) & GENMASK(1, 0))
+#define ANA_TABLES_ISDXACCESS_ISDX_TBL_CMD_M              GENMASK(1, 0)
+
+#define ANA_TABLES_ISDXTIDX_ISDX_SDLBI(x)                 (((x) << 21) & GENMASK(28, 21))
+#define ANA_TABLES_ISDXTIDX_ISDX_SDLBI_M                  GENMASK(28, 21)
+#define ANA_TABLES_ISDXTIDX_ISDX_SDLBI_X(x)               (((x) & GENMASK(28, 21)) >> 21)
+#define ANA_TABLES_ISDXTIDX_ISDX_MSTI(x)                  (((x) << 15) & GENMASK(20, 15))
+#define ANA_TABLES_ISDXTIDX_ISDX_MSTI_M                   GENMASK(20, 15)
+#define ANA_TABLES_ISDXTIDX_ISDX_MSTI_X(x)                (((x) & GENMASK(20, 15)) >> 15)
+#define ANA_TABLES_ISDXTIDX_ISDX_ES0_KEY_ENA              BIT(14)
+#define ANA_TABLES_ISDXTIDX_ISDX_FORCE_ENA                BIT(10)
+#define ANA_TABLES_ISDXTIDX_ISDX_INDEX(x)                 ((x) & GENMASK(7, 0))
+#define ANA_TABLES_ISDXTIDX_ISDX_INDEX_M                  GENMASK(7, 0)
+
+#define ANA_TABLES_ENTRYLIM_RSZ                           0x4
+
+#define ANA_TABLES_ENTRYLIM_ENTRYLIM(x)                   (((x) << 14) & GENMASK(17, 14))
+#define ANA_TABLES_ENTRYLIM_ENTRYLIM_M                    GENMASK(17, 14)
+#define ANA_TABLES_ENTRYLIM_ENTRYLIM_X(x)                 (((x) & GENMASK(17, 14)) >> 14)
+#define ANA_TABLES_ENTRYLIM_ENTRYSTAT(x)                  ((x) & GENMASK(13, 0))
+#define ANA_TABLES_ENTRYLIM_ENTRYSTAT_M                   GENMASK(13, 0)
+
+#define ANA_TABLES_STREAMACCESS_GEN_REC_SEQ_NUM(x)        (((x) << 4) & GENMASK(31, 4))
+#define ANA_TABLES_STREAMACCESS_GEN_REC_SEQ_NUM_M         GENMASK(31, 4)
+#define ANA_TABLES_STREAMACCESS_GEN_REC_SEQ_NUM_X(x)      (((x) & GENMASK(31, 4)) >> 4)
+#define ANA_TABLES_STREAMACCESS_SEQ_GEN_REC_ENA           BIT(3)
+#define ANA_TABLES_STREAMACCESS_GEN_REC_TYPE              BIT(2)
+#define ANA_TABLES_STREAMACCESS_STREAM_TBL_CMD(x)         ((x) & GENMASK(1, 0))
+#define ANA_TABLES_STREAMACCESS_STREAM_TBL_CMD_M          GENMASK(1, 0)
+
+#define ANA_TABLES_STREAMTIDX_SEQ_GEN_ERR_STATUS(x)       (((x) << 30) & GENMASK(31, 30))
+#define ANA_TABLES_STREAMTIDX_SEQ_GEN_ERR_STATUS_M        GENMASK(31, 30)
+#define ANA_TABLES_STREAMTIDX_SEQ_GEN_ERR_STATUS_X(x)     (((x) & GENMASK(31, 30)) >> 30)
+#define ANA_TABLES_STREAMTIDX_S_INDEX(x)                  (((x) << 16) & GENMASK(22, 16))
+#define ANA_TABLES_STREAMTIDX_S_INDEX_M                   GENMASK(22, 16)
+#define ANA_TABLES_STREAMTIDX_S_INDEX_X(x)                (((x) & GENMASK(22, 16)) >> 16)
+#define ANA_TABLES_STREAMTIDX_FORCE_SF_BEHAVIOUR          BIT(14)
+#define ANA_TABLES_STREAMTIDX_SEQ_HISTORY_LEN(x)          (((x) << 8) & GENMASK(13, 8))
+#define ANA_TABLES_STREAMTIDX_SEQ_HISTORY_LEN_M           GENMASK(13, 8)
+#define ANA_TABLES_STREAMTIDX_SEQ_HISTORY_LEN_X(x)        (((x) & GENMASK(13, 8)) >> 8)
+#define ANA_TABLES_STREAMTIDX_RESET_ON_ROGUE              BIT(7)
+#define ANA_TABLES_STREAMTIDX_REDTAG_POP                  BIT(6)
+#define ANA_TABLES_STREAMTIDX_STREAM_SPLIT                BIT(5)
+#define ANA_TABLES_STREAMTIDX_SEQ_SPACE_LOG2(x)           ((x) & GENMASK(4, 0))
+#define ANA_TABLES_STREAMTIDX_SEQ_SPACE_LOG2_M            GENMASK(4, 0)
+
+#define ANA_TABLES_SEQ_MASK_SPLIT_MASK(x)                 (((x) << 16) & GENMASK(22, 16))
+#define ANA_TABLES_SEQ_MASK_SPLIT_MASK_M                  GENMASK(22, 16)
+#define ANA_TABLES_SEQ_MASK_SPLIT_MASK_X(x)               (((x) & GENMASK(22, 16)) >> 16)
+#define ANA_TABLES_SEQ_MASK_INPUT_PORT_MASK(x)            ((x) & GENMASK(6, 0))
+#define ANA_TABLES_SEQ_MASK_INPUT_PORT_MASK_M             GENMASK(6, 0)
+
+#define ANA_TABLES_SFID_MASK_IGR_PORT_MASK(x)             (((x) << 1) & GENMASK(7, 1))
+#define ANA_TABLES_SFID_MASK_IGR_PORT_MASK_M              GENMASK(7, 1)
+#define ANA_TABLES_SFID_MASK_IGR_PORT_MASK_X(x)           (((x) & GENMASK(7, 1)) >> 1)
+#define ANA_TABLES_SFID_MASK_IGR_SRCPORT_MATCH_ENA        BIT(0)
+
+#define ANA_TABLES_SFIDACCESS_IGR_PRIO_MATCH_ENA          BIT(22)
+#define ANA_TABLES_SFIDACCESS_IGR_PRIO(x)                 (((x) << 19) & GENMASK(21, 19))
+#define ANA_TABLES_SFIDACCESS_IGR_PRIO_M                  GENMASK(21, 19)
+#define ANA_TABLES_SFIDACCESS_IGR_PRIO_X(x)               (((x) & GENMASK(21, 19)) >> 19)
+#define ANA_TABLES_SFIDACCESS_FORCE_BLOCK                 BIT(18)
+#define ANA_TABLES_SFIDACCESS_MAX_SDU_LEN(x)              (((x) << 2) & GENMASK(17, 2))
+#define ANA_TABLES_SFIDACCESS_MAX_SDU_LEN_M               GENMASK(17, 2)
+#define ANA_TABLES_SFIDACCESS_MAX_SDU_LEN_X(x)            (((x) & GENMASK(17, 2)) >> 2)
+#define ANA_TABLES_SFIDACCESS_SFID_TBL_CMD(x)             ((x) & GENMASK(1, 0))
+#define ANA_TABLES_SFIDACCESS_SFID_TBL_CMD_M              GENMASK(1, 0)
+
+#define ANA_TABLES_SFIDTIDX_SGID_VALID                    BIT(26)
+#define ANA_TABLES_SFIDTIDX_SGID(x)                       (((x) << 18) & GENMASK(25, 18))
+#define ANA_TABLES_SFIDTIDX_SGID_M                        GENMASK(25, 18)
+#define ANA_TABLES_SFIDTIDX_SGID_X(x)                     (((x) & GENMASK(25, 18)) >> 18)
+#define ANA_TABLES_SFIDTIDX_POL_ENA                       BIT(17)
+#define ANA_TABLES_SFIDTIDX_POL_IDX(x)                    (((x) << 8) & GENMASK(16, 8))
+#define ANA_TABLES_SFIDTIDX_POL_IDX_M                     GENMASK(16, 8)
+#define ANA_TABLES_SFIDTIDX_POL_IDX_X(x)                  (((x) & GENMASK(16, 8)) >> 8)
+#define ANA_TABLES_SFIDTIDX_SFID_INDEX(x)                 ((x) & GENMASK(7, 0))
+#define ANA_TABLES_SFIDTIDX_SFID_INDEX_M                  GENMASK(7, 0)
+
+#define ANA_MSTI_STATE_RSZ                                0x4
+
+#define ANA_OAM_UPM_LM_CNT_RSZ                            0x4
+
+#define ANA_SG_ACCESS_CTRL_SGID(x)                        ((x) & GENMASK(7, 0))
+#define ANA_SG_ACCESS_CTRL_SGID_M                         GENMASK(7, 0)
+#define ANA_SG_ACCESS_CTRL_CONFIG_CHANGE                  BIT(28)
+
+#define ANA_SG_CONFIG_REG_3_BASE_TIME_SEC_MSB(x)          ((x) & GENMASK(15, 0))
+#define ANA_SG_CONFIG_REG_3_BASE_TIME_SEC_MSB_M           GENMASK(15, 0)
+#define ANA_SG_CONFIG_REG_3_LIST_LENGTH(x)                (((x) << 16) & GENMASK(18, 16))
+#define ANA_SG_CONFIG_REG_3_LIST_LENGTH_M                 GENMASK(18, 16)
+#define ANA_SG_CONFIG_REG_3_LIST_LENGTH_X(x)              (((x) & GENMASK(18, 16)) >> 16)
+#define ANA_SG_CONFIG_REG_3_GATE_ENABLE                   BIT(20)
+#define ANA_SG_CONFIG_REG_3_INIT_IPS(x)                   (((x) << 24) & GENMASK(27, 24))
+#define ANA_SG_CONFIG_REG_3_INIT_IPS_M                    GENMASK(27, 24)
+#define ANA_SG_CONFIG_REG_3_INIT_IPS_X(x)                 (((x) & GENMASK(27, 24)) >> 24)
+#define ANA_SG_CONFIG_REG_3_INIT_GATE_STATE               BIT(28)
+
+#define ANA_SG_GCL_GS_CONFIG_RSZ                          0x4
+
+#define ANA_SG_GCL_GS_CONFIG_IPS(x)                       ((x) & GENMASK(3, 0))
+#define ANA_SG_GCL_GS_CONFIG_IPS_M                        GENMASK(3, 0)
+#define ANA_SG_GCL_GS_CONFIG_GATE_STATE                   BIT(4)
+
+#define ANA_SG_GCL_TI_CONFIG_RSZ                          0x4
+
+#define ANA_SG_STATUS_REG_3_CFG_CHG_TIME_SEC_MSB(x)       ((x) & GENMASK(15, 0))
+#define ANA_SG_STATUS_REG_3_CFG_CHG_TIME_SEC_MSB_M        GENMASK(15, 0)
+#define ANA_SG_STATUS_REG_3_GATE_STATE                    BIT(16)
+#define ANA_SG_STATUS_REG_3_IPS(x)                        (((x) << 20) & GENMASK(23, 20))
+#define ANA_SG_STATUS_REG_3_IPS_M                         GENMASK(23, 20)
+#define ANA_SG_STATUS_REG_3_IPS_X(x)                      (((x) & GENMASK(23, 20)) >> 20)
+#define ANA_SG_STATUS_REG_3_CONFIG_PENDING                BIT(24)
+
+#define ANA_PORT_VLAN_CFG_GSZ                             0x100
+
+#define ANA_PORT_VLAN_CFG_VLAN_VID_AS_ISDX                BIT(21)
+#define ANA_PORT_VLAN_CFG_VLAN_AWARE_ENA                  BIT(20)
+#define ANA_PORT_VLAN_CFG_VLAN_POP_CNT(x)                 (((x) << 18) & GENMASK(19, 18))
+#define ANA_PORT_VLAN_CFG_VLAN_POP_CNT_M                  GENMASK(19, 18)
+#define ANA_PORT_VLAN_CFG_VLAN_POP_CNT_X(x)               (((x) & GENMASK(19, 18)) >> 18)
+#define ANA_PORT_VLAN_CFG_VLAN_INNER_TAG_ENA              BIT(17)
+#define ANA_PORT_VLAN_CFG_VLAN_TAG_TYPE                   BIT(16)
+#define ANA_PORT_VLAN_CFG_VLAN_DEI                        BIT(15)
+#define ANA_PORT_VLAN_CFG_VLAN_PCP(x)                     (((x) << 12) & GENMASK(14, 12))
+#define ANA_PORT_VLAN_CFG_VLAN_PCP_M                      GENMASK(14, 12)
+#define ANA_PORT_VLAN_CFG_VLAN_PCP_X(x)                   (((x) & GENMASK(14, 12)) >> 12)
+#define ANA_PORT_VLAN_CFG_VLAN_VID(x)                     ((x) & GENMASK(11, 0))
+#define ANA_PORT_VLAN_CFG_VLAN_VID_M                      GENMASK(11, 0)
+
+#define ANA_PORT_DROP_CFG_GSZ                             0x100
+
+#define ANA_PORT_DROP_CFG_DROP_UNTAGGED_ENA               BIT(6)
+#define ANA_PORT_DROP_CFG_DROP_S_TAGGED_ENA               BIT(5)
+#define ANA_PORT_DROP_CFG_DROP_C_TAGGED_ENA               BIT(4)
+#define ANA_PORT_DROP_CFG_DROP_PRIO_S_TAGGED_ENA          BIT(3)
+#define ANA_PORT_DROP_CFG_DROP_PRIO_C_TAGGED_ENA          BIT(2)
+#define ANA_PORT_DROP_CFG_DROP_NULL_MAC_ENA               BIT(1)
+#define ANA_PORT_DROP_CFG_DROP_MC_SMAC_ENA                BIT(0)
+
+#define ANA_PORT_QOS_CFG_GSZ                              0x100
+
+#define ANA_PORT_QOS_CFG_DP_DEFAULT_VAL                   BIT(8)
+#define ANA_PORT_QOS_CFG_QOS_DEFAULT_VAL(x)               (((x) << 5) & GENMASK(7, 5))
+#define ANA_PORT_QOS_CFG_QOS_DEFAULT_VAL_M                GENMASK(7, 5)
+#define ANA_PORT_QOS_CFG_QOS_DEFAULT_VAL_X(x)             (((x) & GENMASK(7, 5)) >> 5)
+#define ANA_PORT_QOS_CFG_QOS_DSCP_ENA                     BIT(4)
+#define ANA_PORT_QOS_CFG_QOS_PCP_ENA                      BIT(3)
+#define ANA_PORT_QOS_CFG_DSCP_TRANSLATE_ENA               BIT(2)
+#define ANA_PORT_QOS_CFG_DSCP_REWR_CFG(x)                 ((x) & GENMASK(1, 0))
+#define ANA_PORT_QOS_CFG_DSCP_REWR_CFG_M                  GENMASK(1, 0)
+
+#define ANA_PORT_VCAP_CFG_GSZ                             0x100
+
+#define ANA_PORT_VCAP_CFG_S1_ENA                          BIT(14)
+#define ANA_PORT_VCAP_CFG_S1_DMAC_DIP_ENA(x)              (((x) << 11) & GENMASK(13, 11))
+#define ANA_PORT_VCAP_CFG_S1_DMAC_DIP_ENA_M               GENMASK(13, 11)
+#define ANA_PORT_VCAP_CFG_S1_DMAC_DIP_ENA_X(x)            (((x) & GENMASK(13, 11)) >> 11)
+#define ANA_PORT_VCAP_CFG_S1_VLAN_INNER_TAG_ENA(x)        (((x) << 8) & GENMASK(10, 8))
+#define ANA_PORT_VCAP_CFG_S1_VLAN_INNER_TAG_ENA_M         GENMASK(10, 8)
+#define ANA_PORT_VCAP_CFG_S1_VLAN_INNER_TAG_ENA_X(x)      (((x) & GENMASK(10, 8)) >> 8)
+#define ANA_PORT_VCAP_CFG_PAG_VAL(x)                      ((x) & GENMASK(7, 0))
+#define ANA_PORT_VCAP_CFG_PAG_VAL_M                       GENMASK(7, 0)
+
+#define ANA_PORT_VCAP_S1_KEY_CFG_GSZ                      0x100
+#define ANA_PORT_VCAP_S1_KEY_CFG_RSZ                      0x4
+
+#define ANA_PORT_VCAP_S1_KEY_CFG_S1_KEY_IP6_CFG(x)        (((x) << 4) & GENMASK(6, 4))
+#define ANA_PORT_VCAP_S1_KEY_CFG_S1_KEY_IP6_CFG_M         GENMASK(6, 4)
+#define ANA_PORT_VCAP_S1_KEY_CFG_S1_KEY_IP6_CFG_X(x)      (((x) & GENMASK(6, 4)) >> 4)
+#define ANA_PORT_VCAP_S1_KEY_CFG_S1_KEY_IP4_CFG(x)        (((x) << 2) & GENMASK(3, 2))
+#define ANA_PORT_VCAP_S1_KEY_CFG_S1_KEY_IP4_CFG_M         GENMASK(3, 2)
+#define ANA_PORT_VCAP_S1_KEY_CFG_S1_KEY_IP4_CFG_X(x)      (((x) & GENMASK(3, 2)) >> 2)
+#define ANA_PORT_VCAP_S1_KEY_CFG_S1_KEY_OTHER_CFG(x)      ((x) & GENMASK(1, 0))
+#define ANA_PORT_VCAP_S1_KEY_CFG_S1_KEY_OTHER_CFG_M       GENMASK(1, 0)
+
+#define ANA_PORT_VCAP_S2_CFG_GSZ                          0x100
+
+#define ANA_PORT_VCAP_S2_CFG_S2_UDP_PAYLOAD_ENA(x)        (((x) << 17) & GENMASK(18, 17))
+#define ANA_PORT_VCAP_S2_CFG_S2_UDP_PAYLOAD_ENA_M         GENMASK(18, 17)
+#define ANA_PORT_VCAP_S2_CFG_S2_UDP_PAYLOAD_ENA_X(x)      (((x) & GENMASK(18, 17)) >> 17)
+#define ANA_PORT_VCAP_S2_CFG_S2_ETYPE_PAYLOAD_ENA(x)      (((x) << 15) & GENMASK(16, 15))
+#define ANA_PORT_VCAP_S2_CFG_S2_ETYPE_PAYLOAD_ENA_M       GENMASK(16, 15)
+#define ANA_PORT_VCAP_S2_CFG_S2_ETYPE_PAYLOAD_ENA_X(x)    (((x) & GENMASK(16, 15)) >> 15)
+#define ANA_PORT_VCAP_S2_CFG_S2_ENA                       BIT(14)
+#define ANA_PORT_VCAP_S2_CFG_S2_SNAP_DIS(x)               (((x) << 12) & GENMASK(13, 12))
+#define ANA_PORT_VCAP_S2_CFG_S2_SNAP_DIS_M                GENMASK(13, 12)
+#define ANA_PORT_VCAP_S2_CFG_S2_SNAP_DIS_X(x)             (((x) & GENMASK(13, 12)) >> 12)
+#define ANA_PORT_VCAP_S2_CFG_S2_ARP_DIS(x)                (((x) << 10) & GENMASK(11, 10))
+#define ANA_PORT_VCAP_S2_CFG_S2_ARP_DIS_M                 GENMASK(11, 10)
+#define ANA_PORT_VCAP_S2_CFG_S2_ARP_DIS_X(x)              (((x) & GENMASK(11, 10)) >> 10)
+#define ANA_PORT_VCAP_S2_CFG_S2_IP_TCPUDP_DIS(x)          (((x) << 8) & GENMASK(9, 8))
+#define ANA_PORT_VCAP_S2_CFG_S2_IP_TCPUDP_DIS_M           GENMASK(9, 8)
+#define ANA_PORT_VCAP_S2_CFG_S2_IP_TCPUDP_DIS_X(x)        (((x) & GENMASK(9, 8)) >> 8)
+#define ANA_PORT_VCAP_S2_CFG_S2_IP_OTHER_DIS(x)           (((x) << 6) & GENMASK(7, 6))
+#define ANA_PORT_VCAP_S2_CFG_S2_IP_OTHER_DIS_M            GENMASK(7, 6)
+#define ANA_PORT_VCAP_S2_CFG_S2_IP_OTHER_DIS_X(x)         (((x) & GENMASK(7, 6)) >> 6)
+#define ANA_PORT_VCAP_S2_CFG_S2_IP6_CFG(x)                (((x) << 2) & GENMASK(5, 2))
+#define ANA_PORT_VCAP_S2_CFG_S2_IP6_CFG_M                 GENMASK(5, 2)
+#define ANA_PORT_VCAP_S2_CFG_S2_IP6_CFG_X(x)              (((x) & GENMASK(5, 2)) >> 2)
+#define ANA_PORT_VCAP_S2_CFG_S2_OAM_DIS(x)                ((x) & GENMASK(1, 0))
+#define ANA_PORT_VCAP_S2_CFG_S2_OAM_DIS_M                 GENMASK(1, 0)
+
+#define ANA_PORT_PCP_DEI_MAP_GSZ                          0x100
+#define ANA_PORT_PCP_DEI_MAP_RSZ                          0x4
+
+#define ANA_PORT_PCP_DEI_MAP_DP_PCP_DEI_VAL               BIT(3)
+#define ANA_PORT_PCP_DEI_MAP_QOS_PCP_DEI_VAL(x)           ((x) & GENMASK(2, 0))
+#define ANA_PORT_PCP_DEI_MAP_QOS_PCP_DEI_VAL_M            GENMASK(2, 0)
+
+#define ANA_PORT_CPU_FWD_CFG_GSZ                          0x100
+
+#define ANA_PORT_CPU_FWD_CFG_CPU_VRAP_REDIR_ENA           BIT(7)
+#define ANA_PORT_CPU_FWD_CFG_CPU_MLD_REDIR_ENA            BIT(6)
+#define ANA_PORT_CPU_FWD_CFG_CPU_IGMP_REDIR_ENA           BIT(5)
+#define ANA_PORT_CPU_FWD_CFG_CPU_IPMC_CTRL_COPY_ENA       BIT(4)
+#define ANA_PORT_CPU_FWD_CFG_CPU_SRC_COPY_ENA             BIT(3)
+#define ANA_PORT_CPU_FWD_CFG_CPU_ALLBRIDGE_DROP_ENA       BIT(2)
+#define ANA_PORT_CPU_FWD_CFG_CPU_ALLBRIDGE_REDIR_ENA      BIT(1)
+#define ANA_PORT_CPU_FWD_CFG_CPU_OAM_ENA                  BIT(0)
+
+#define ANA_PORT_CPU_FWD_BPDU_CFG_GSZ                     0x100
+
+#define ANA_PORT_CPU_FWD_BPDU_CFG_BPDU_DROP_ENA(x)        (((x) << 16) & GENMASK(31, 16))
+#define ANA_PORT_CPU_FWD_BPDU_CFG_BPDU_DROP_ENA_M         GENMASK(31, 16)
+#define ANA_PORT_CPU_FWD_BPDU_CFG_BPDU_DROP_ENA_X(x)      (((x) & GENMASK(31, 16)) >> 16)
+#define ANA_PORT_CPU_FWD_BPDU_CFG_BPDU_REDIR_ENA(x)       ((x) & GENMASK(15, 0))
+#define ANA_PORT_CPU_FWD_BPDU_CFG_BPDU_REDIR_ENA_M        GENMASK(15, 0)
+
+#define ANA_PORT_CPU_FWD_GARP_CFG_GSZ                     0x100
+
+#define ANA_PORT_CPU_FWD_GARP_CFG_GARP_DROP_ENA(x)        (((x) << 16) & GENMASK(31, 16))
+#define ANA_PORT_CPU_FWD_GARP_CFG_GARP_DROP_ENA_M         GENMASK(31, 16)
+#define ANA_PORT_CPU_FWD_GARP_CFG_GARP_DROP_ENA_X(x)      (((x) & GENMASK(31, 16)) >> 16)
+#define ANA_PORT_CPU_FWD_GARP_CFG_GARP_REDIR_ENA(x)       ((x) & GENMASK(15, 0))
+#define ANA_PORT_CPU_FWD_GARP_CFG_GARP_REDIR_ENA_M        GENMASK(15, 0)
+
+#define ANA_PORT_CPU_FWD_CCM_CFG_GSZ                      0x100
+
+#define ANA_PORT_CPU_FWD_CCM_CFG_CCM_DROP_ENA(x)          (((x) << 16) & GENMASK(31, 16))
+#define ANA_PORT_CPU_FWD_CCM_CFG_CCM_DROP_ENA_M           GENMASK(31, 16)
+#define ANA_PORT_CPU_FWD_CCM_CFG_CCM_DROP_ENA_X(x)        (((x) & GENMASK(31, 16)) >> 16)
+#define ANA_PORT_CPU_FWD_CCM_CFG_CCM_REDIR_ENA(x)         ((x) & GENMASK(15, 0))
+#define ANA_PORT_CPU_FWD_CCM_CFG_CCM_REDIR_ENA_M          GENMASK(15, 0)
+
+#define ANA_PORT_PORT_CFG_GSZ                             0x100
+
+#define ANA_PORT_PORT_CFG_SRC_MIRROR_ENA                  BIT(15)
+#define ANA_PORT_PORT_CFG_LIMIT_DROP                      BIT(14)
+#define ANA_PORT_PORT_CFG_LIMIT_CPU                       BIT(13)
+#define ANA_PORT_PORT_CFG_LOCKED_PORTMOVE_DROP            BIT(12)
+#define ANA_PORT_PORT_CFG_LOCKED_PORTMOVE_CPU             BIT(11)
+#define ANA_PORT_PORT_CFG_LEARNDROP                       BIT(10)
+#define ANA_PORT_PORT_CFG_LEARNCPU                        BIT(9)
+#define ANA_PORT_PORT_CFG_LEARNAUTO                       BIT(8)
+#define ANA_PORT_PORT_CFG_LEARN_ENA                       BIT(7)
+#define ANA_PORT_PORT_CFG_RECV_ENA                        BIT(6)
+#define ANA_PORT_PORT_CFG_PORTID_VAL(x)                   (((x) << 2) & GENMASK(5, 2))
+#define ANA_PORT_PORT_CFG_PORTID_VAL_M                    GENMASK(5, 2)
+#define ANA_PORT_PORT_CFG_PORTID_VAL_X(x)                 (((x) & GENMASK(5, 2)) >> 2)
+#define ANA_PORT_PORT_CFG_USE_B_DOM_TBL                   BIT(1)
+#define ANA_PORT_PORT_CFG_LSR_MODE                        BIT(0)
+
+#define ANA_PORT_POL_CFG_GSZ                              0x100
+
+#define ANA_PORT_POL_CFG_POL_CPU_REDIR_8021               BIT(19)
+#define ANA_PORT_POL_CFG_POL_CPU_REDIR_IP                 BIT(18)
+#define ANA_PORT_POL_CFG_PORT_POL_ENA                     BIT(17)
+#define ANA_PORT_POL_CFG_QUEUE_POL_ENA(x)                 (((x) << 9) & GENMASK(16, 9))
+#define ANA_PORT_POL_CFG_QUEUE_POL_ENA_M                  GENMASK(16, 9)
+#define ANA_PORT_POL_CFG_QUEUE_POL_ENA_X(x)               (((x) & GENMASK(16, 9)) >> 9)
+#define ANA_PORT_POL_CFG_POL_ORDER(x)                     ((x) & GENMASK(8, 0))
+#define ANA_PORT_POL_CFG_POL_ORDER_M                      GENMASK(8, 0)
+
+#define ANA_PORT_PTP_CFG_GSZ                              0x100
+
+#define ANA_PORT_PTP_CFG_PTP_BACKPLANE_MODE               BIT(0)
+
+#define ANA_PORT_PTP_DLY1_CFG_GSZ                         0x100
+
+#define ANA_PORT_PTP_DLY2_CFG_GSZ                         0x100
+
+#define ANA_PORT_SFID_CFG_GSZ                             0x100
+#define ANA_PORT_SFID_CFG_RSZ                             0x4
+
+#define ANA_PORT_SFID_CFG_SFID_VALID                      BIT(8)
+#define ANA_PORT_SFID_CFG_SFID(x)                         ((x) & GENMASK(7, 0))
+#define ANA_PORT_SFID_CFG_SFID_M                          GENMASK(7, 0)
+
+#define ANA_PFC_PFC_CFG_GSZ                               0x40
+
+#define ANA_PFC_PFC_CFG_RX_PFC_ENA(x)                     (((x) << 2) & GENMASK(9, 2))
+#define ANA_PFC_PFC_CFG_RX_PFC_ENA_M                      GENMASK(9, 2)
+#define ANA_PFC_PFC_CFG_RX_PFC_ENA_X(x)                   (((x) & GENMASK(9, 2)) >> 2)
+#define ANA_PFC_PFC_CFG_FC_LINK_SPEED(x)                  ((x) & GENMASK(1, 0))
+#define ANA_PFC_PFC_CFG_FC_LINK_SPEED_M                   GENMASK(1, 0)
+
+#define ANA_PFC_PFC_TIMER_GSZ                             0x40
+#define ANA_PFC_PFC_TIMER_RSZ                             0x4
+
+#define ANA_IPT_OAM_MEP_CFG_GSZ                           0x8
+
+#define ANA_IPT_OAM_MEP_CFG_MEP_IDX_P(x)                  (((x) << 6) & GENMASK(10, 6))
+#define ANA_IPT_OAM_MEP_CFG_MEP_IDX_P_M                   GENMASK(10, 6)
+#define ANA_IPT_OAM_MEP_CFG_MEP_IDX_P_X(x)                (((x) & GENMASK(10, 6)) >> 6)
+#define ANA_IPT_OAM_MEP_CFG_MEP_IDX(x)                    (((x) << 1) & GENMASK(5, 1))
+#define ANA_IPT_OAM_MEP_CFG_MEP_IDX_M                     GENMASK(5, 1)
+#define ANA_IPT_OAM_MEP_CFG_MEP_IDX_X(x)                  (((x) & GENMASK(5, 1)) >> 1)
+#define ANA_IPT_OAM_MEP_CFG_MEP_IDX_ENA                   BIT(0)
+
+#define ANA_IPT_IPT_GSZ                                   0x8
+
+#define ANA_IPT_IPT_IPT_CFG(x)                            (((x) << 15) & GENMASK(16, 15))
+#define ANA_IPT_IPT_IPT_CFG_M                             GENMASK(16, 15)
+#define ANA_IPT_IPT_IPT_CFG_X(x)                          (((x) & GENMASK(16, 15)) >> 15)
+#define ANA_IPT_IPT_ISDX_P(x)                             (((x) << 7) & GENMASK(14, 7))
+#define ANA_IPT_IPT_ISDX_P_M                              GENMASK(14, 7)
+#define ANA_IPT_IPT_ISDX_P_X(x)                           (((x) & GENMASK(14, 7)) >> 7)
+#define ANA_IPT_IPT_PPT_IDX(x)                            ((x) & GENMASK(6, 0))
+#define ANA_IPT_IPT_PPT_IDX_M                             GENMASK(6, 0)
+
+#define ANA_PPT_PPT_RSZ                                   0x4
+
+#define ANA_FID_MAP_FID_MAP_RSZ                           0x4
+
+#define ANA_FID_MAP_FID_MAP_FID_C_VAL(x)                  (((x) << 6) & GENMASK(11, 6))
+#define ANA_FID_MAP_FID_MAP_FID_C_VAL_M                   GENMASK(11, 6)
+#define ANA_FID_MAP_FID_MAP_FID_C_VAL_X(x)                (((x) & GENMASK(11, 6)) >> 6)
+#define ANA_FID_MAP_FID_MAP_FID_B_VAL(x)                  ((x) & GENMASK(5, 0))
+#define ANA_FID_MAP_FID_MAP_FID_B_VAL_M                   GENMASK(5, 0)
+
+#define ANA_AGGR_CFG_AC_RND_ENA                           BIT(7)
+#define ANA_AGGR_CFG_AC_DMAC_ENA                          BIT(6)
+#define ANA_AGGR_CFG_AC_SMAC_ENA                          BIT(5)
+#define ANA_AGGR_CFG_AC_IP6_FLOW_LBL_ENA                  BIT(4)
+#define ANA_AGGR_CFG_AC_IP6_TCPUDP_ENA                    BIT(3)
+#define ANA_AGGR_CFG_AC_IP4_SIPDIP_ENA                    BIT(2)
+#define ANA_AGGR_CFG_AC_IP4_TCPUDP_ENA                    BIT(1)
+#define ANA_AGGR_CFG_AC_ISDX_ENA                          BIT(0)
+
+#define ANA_CPUQ_CFG_CPUQ_MLD(x)                          (((x) << 27) & GENMASK(29, 27))
+#define ANA_CPUQ_CFG_CPUQ_MLD_M                           GENMASK(29, 27)
+#define ANA_CPUQ_CFG_CPUQ_MLD_X(x)                        (((x) & GENMASK(29, 27)) >> 27)
+#define ANA_CPUQ_CFG_CPUQ_IGMP(x)                         (((x) << 24) & GENMASK(26, 24))
+#define ANA_CPUQ_CFG_CPUQ_IGMP_M                          GENMASK(26, 24)
+#define ANA_CPUQ_CFG_CPUQ_IGMP_X(x)                       (((x) & GENMASK(26, 24)) >> 24)
+#define ANA_CPUQ_CFG_CPUQ_IPMC_CTRL(x)                    (((x) << 21) & GENMASK(23, 21))
+#define ANA_CPUQ_CFG_CPUQ_IPMC_CTRL_M                     GENMASK(23, 21)
+#define ANA_CPUQ_CFG_CPUQ_IPMC_CTRL_X(x)                  (((x) & GENMASK(23, 21)) >> 21)
+#define ANA_CPUQ_CFG_CPUQ_ALLBRIDGE(x)                    (((x) << 18) & GENMASK(20, 18))
+#define ANA_CPUQ_CFG_CPUQ_ALLBRIDGE_M                     GENMASK(20, 18)
+#define ANA_CPUQ_CFG_CPUQ_ALLBRIDGE_X(x)                  (((x) & GENMASK(20, 18)) >> 18)
+#define ANA_CPUQ_CFG_CPUQ_LOCKED_PORTMOVE(x)              (((x) << 15) & GENMASK(17, 15))
+#define ANA_CPUQ_CFG_CPUQ_LOCKED_PORTMOVE_M               GENMASK(17, 15)
+#define ANA_CPUQ_CFG_CPUQ_LOCKED_PORTMOVE_X(x)            (((x) & GENMASK(17, 15)) >> 15)
+#define ANA_CPUQ_CFG_CPUQ_SRC_COPY(x)                     (((x) << 12) & GENMASK(14, 12))
+#define ANA_CPUQ_CFG_CPUQ_SRC_COPY_M                      GENMASK(14, 12)
+#define ANA_CPUQ_CFG_CPUQ_SRC_COPY_X(x)                   (((x) & GENMASK(14, 12)) >> 12)
+#define ANA_CPUQ_CFG_CPUQ_MAC_COPY(x)                     (((x) << 9) & GENMASK(11, 9))
+#define ANA_CPUQ_CFG_CPUQ_MAC_COPY_M                      GENMASK(11, 9)
+#define ANA_CPUQ_CFG_CPUQ_MAC_COPY_X(x)                   (((x) & GENMASK(11, 9)) >> 9)
+#define ANA_CPUQ_CFG_CPUQ_LRN(x)                          (((x) << 6) & GENMASK(8, 6))
+#define ANA_CPUQ_CFG_CPUQ_LRN_M                           GENMASK(8, 6)
+#define ANA_CPUQ_CFG_CPUQ_LRN_X(x)                        (((x) & GENMASK(8, 6)) >> 6)
+#define ANA_CPUQ_CFG_CPUQ_MIRROR(x)                       (((x) << 3) & GENMASK(5, 3))
+#define ANA_CPUQ_CFG_CPUQ_MIRROR_M                        GENMASK(5, 3)
+#define ANA_CPUQ_CFG_CPUQ_MIRROR_X(x)                     (((x) & GENMASK(5, 3)) >> 3)
+#define ANA_CPUQ_CFG_CPUQ_SFLOW(x)                        ((x) & GENMASK(2, 0))
+#define ANA_CPUQ_CFG_CPUQ_SFLOW_M                         GENMASK(2, 0)
+
+#define ANA_CPUQ_8021_CFG_RSZ                             0x4
+
+#define ANA_CPUQ_8021_CFG_CPUQ_BPDU_VAL(x)                (((x) << 6) & GENMASK(8, 6))
+#define ANA_CPUQ_8021_CFG_CPUQ_BPDU_VAL_M                 GENMASK(8, 6)
+#define ANA_CPUQ_8021_CFG_CPUQ_BPDU_VAL_X(x)              (((x) & GENMASK(8, 6)) >> 6)
+#define ANA_CPUQ_8021_CFG_CPUQ_GARP_VAL(x)                (((x) << 3) & GENMASK(5, 3))
+#define ANA_CPUQ_8021_CFG_CPUQ_GARP_VAL_M                 GENMASK(5, 3)
+#define ANA_CPUQ_8021_CFG_CPUQ_GARP_VAL_X(x)              (((x) & GENMASK(5, 3)) >> 3)
+#define ANA_CPUQ_8021_CFG_CPUQ_CCM_VAL(x)                 ((x) & GENMASK(2, 0))
+#define ANA_CPUQ_8021_CFG_CPUQ_CCM_VAL_M                  GENMASK(2, 0)
+
+#define ANA_DSCP_CFG_RSZ                                  0x4
+
+#define ANA_DSCP_CFG_DP_DSCP_VAL                          BIT(11)
+#define ANA_DSCP_CFG_QOS_DSCP_VAL(x)                      (((x) << 8) & GENMASK(10, 8))
+#define ANA_DSCP_CFG_QOS_DSCP_VAL_M                       GENMASK(10, 8)
+#define ANA_DSCP_CFG_QOS_DSCP_VAL_X(x)                    (((x) & GENMASK(10, 8)) >> 8)
+#define ANA_DSCP_CFG_DSCP_TRANSLATE_VAL(x)                (((x) << 2) & GENMASK(7, 2))
+#define ANA_DSCP_CFG_DSCP_TRANSLATE_VAL_M                 GENMASK(7, 2)
+#define ANA_DSCP_CFG_DSCP_TRANSLATE_VAL_X(x)              (((x) & GENMASK(7, 2)) >> 2)
+#define ANA_DSCP_CFG_DSCP_TRUST_ENA                       BIT(1)
+#define ANA_DSCP_CFG_DSCP_REWR_ENA                        BIT(0)
+
+#define ANA_DSCP_REWR_CFG_RSZ                             0x4
+
+#define ANA_VCAP_RNG_TYPE_CFG_RSZ                         0x4
+
+#define ANA_VCAP_RNG_VAL_CFG_RSZ                          0x4
+
+#define ANA_VCAP_RNG_VAL_CFG_VCAP_RNG_MIN_VAL(x)          (((x) << 16) & GENMASK(31, 16))
+#define ANA_VCAP_RNG_VAL_CFG_VCAP_RNG_MIN_VAL_M           GENMASK(31, 16)
+#define ANA_VCAP_RNG_VAL_CFG_VCAP_RNG_MIN_VAL_X(x)        (((x) & GENMASK(31, 16)) >> 16)
+#define ANA_VCAP_RNG_VAL_CFG_VCAP_RNG_MAX_VAL(x)          ((x) & GENMASK(15, 0))
+#define ANA_VCAP_RNG_VAL_CFG_VCAP_RNG_MAX_VAL_M           GENMASK(15, 0)
+
+#define ANA_VRAP_CFG_VRAP_VLAN_AWARE_ENA                  BIT(12)
+#define ANA_VRAP_CFG_VRAP_VID(x)                          ((x) & GENMASK(11, 0))
+#define ANA_VRAP_CFG_VRAP_VID_M                           GENMASK(11, 0)
+
+#define ANA_DISCARD_CFG_DROP_TAGGING_ISDX0                BIT(3)
+#define ANA_DISCARD_CFG_DROP_CTRLPROT_ISDX0               BIT(2)
+#define ANA_DISCARD_CFG_DROP_TAGGING_S2_ENA               BIT(1)
+#define ANA_DISCARD_CFG_DROP_CTRLPROT_S2_ENA              BIT(0)
+
+#define ANA_FID_CFG_VID_MC_ENA                            BIT(0)
+
+#define ANA_POL_PIR_CFG_GSZ                               0x20
+
+#define ANA_POL_PIR_CFG_PIR_RATE(x)                       (((x) << 6) & GENMASK(20, 6))
+#define ANA_POL_PIR_CFG_PIR_RATE_M                        GENMASK(20, 6)
+#define ANA_POL_PIR_CFG_PIR_RATE_X(x)                     (((x) & GENMASK(20, 6)) >> 6)
+#define ANA_POL_PIR_CFG_PIR_BURST(x)                      ((x) & GENMASK(5, 0))
+#define ANA_POL_PIR_CFG_PIR_BURST_M                       GENMASK(5, 0)
+
+#define ANA_POL_CIR_CFG_GSZ                               0x20
+
+#define ANA_POL_CIR_CFG_CIR_RATE(x)                       (((x) << 6) & GENMASK(20, 6))
+#define ANA_POL_CIR_CFG_CIR_RATE_M                        GENMASK(20, 6)
+#define ANA_POL_CIR_CFG_CIR_RATE_X(x)                     (((x) & GENMASK(20, 6)) >> 6)
+#define ANA_POL_CIR_CFG_CIR_BURST(x)                      ((x) & GENMASK(5, 0))
+#define ANA_POL_CIR_CFG_CIR_BURST_M                       GENMASK(5, 0)
+
+#define ANA_POL_MODE_CFG_GSZ                              0x20
+
+#define ANA_POL_MODE_CFG_IPG_SIZE(x)                      (((x) << 5) & GENMASK(9, 5))
+#define ANA_POL_MODE_CFG_IPG_SIZE_M                       GENMASK(9, 5)
+#define ANA_POL_MODE_CFG_IPG_SIZE_X(x)                    (((x) & GENMASK(9, 5)) >> 5)
+#define ANA_POL_MODE_CFG_FRM_MODE(x)                      (((x) << 3) & GENMASK(4, 3))
+#define ANA_POL_MODE_CFG_FRM_MODE_M                       GENMASK(4, 3)
+#define ANA_POL_MODE_CFG_FRM_MODE_X(x)                    (((x) & GENMASK(4, 3)) >> 3)
+#define ANA_POL_MODE_CFG_DLB_COUPLED                      BIT(2)
+#define ANA_POL_MODE_CFG_CIR_ENA                          BIT(1)
+#define ANA_POL_MODE_CFG_OVERSHOOT_ENA                    BIT(0)
+
+#define ANA_POL_PIR_STATE_GSZ                             0x20
+
+#define ANA_POL_CIR_STATE_GSZ                             0x20
+
+#define ANA_POL_STATE_GSZ                                 0x20
+
+#define ANA_POL_FLOWC_RSZ                                 0x4
+
+#define ANA_POL_FLOWC_POL_FLOWC                           BIT(0)
+
+#define ANA_POL_HYST_POL_FC_HYST(x)                       (((x) << 4) & GENMASK(9, 4))
+#define ANA_POL_HYST_POL_FC_HYST_M                        GENMASK(9, 4)
+#define ANA_POL_HYST_POL_FC_HYST_X(x)                     (((x) & GENMASK(9, 4)) >> 4)
+#define ANA_POL_HYST_POL_STOP_HYST(x)                     ((x) & GENMASK(3, 0))
+#define ANA_POL_HYST_POL_STOP_HYST_M                      GENMASK(3, 0)
+
+#define ANA_POL_MISC_CFG_POL_CLOSE_ALL                    BIT(1)
+#define ANA_POL_MISC_CFG_POL_LEAK_DIS                     BIT(0)
+
+#endif
diff --git a/drivers/net/ethernet/mscc/ocelot_board.c b/drivers/net/ethernet/mscc/ocelot_board.c
new file mode 100644 (file)
index 0000000..18df7d9
--- /dev/null
@@ -0,0 +1,316 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/*
+ * Microsemi Ocelot Switch driver
+ *
+ * Copyright (c) 2017 Microsemi Corporation
+ */
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/of_mdio.h>
+#include <linux/of_platform.h>
+#include <linux/skbuff.h>
+
+#include "ocelot.h"
+
+static int ocelot_parse_ifh(u32 *ifh, struct frame_info *info)
+{
+       int i;
+       u8 llen, wlen;
+
+       /* The IFH is in network order, switch to CPU order */
+       for (i = 0; i < IFH_LEN; i++)
+               ifh[i] = ntohl((__force __be32)ifh[i]);
+
+       wlen = (ifh[1] >> 7) & 0xff;
+       llen = (ifh[1] >> 15) & 0x3f;
+       info->len = OCELOT_BUFFER_CELL_SZ * wlen + llen - 80;
+
+       info->port = (ifh[2] & GENMASK(14, 11)) >> 11;
+
+       info->cpuq = (ifh[3] & GENMASK(27, 20)) >> 20;
+       info->tag_type = (ifh[3] & GENMASK(16, 16)) >> 16;
+       info->vid = ifh[3] & GENMASK(11, 0);
+
+       return 0;
+}
+
+static int ocelot_rx_frame_word(struct ocelot *ocelot, u8 grp, bool ifh,
+                               u32 *rval)
+{
+       u32 val;
+       u32 bytes_valid;
+
+       val = ocelot_read_rix(ocelot, QS_XTR_RD, grp);
+       if (val == XTR_NOT_READY) {
+               if (ifh)
+                       return -EIO;
+
+               do {
+                       val = ocelot_read_rix(ocelot, QS_XTR_RD, grp);
+               } while (val == XTR_NOT_READY);
+       }
+
+       switch (val) {
+       case XTR_ABORT:
+               return -EIO;
+       case XTR_EOF_0:
+       case XTR_EOF_1:
+       case XTR_EOF_2:
+       case XTR_EOF_3:
+       case XTR_PRUNED:
+               bytes_valid = XTR_VALID_BYTES(val);
+               val = ocelot_read_rix(ocelot, QS_XTR_RD, grp);
+               if (val == XTR_ESCAPE)
+                       *rval = ocelot_read_rix(ocelot, QS_XTR_RD, grp);
+               else
+                       *rval = val;
+
+               return bytes_valid;
+       case XTR_ESCAPE:
+               *rval = ocelot_read_rix(ocelot, QS_XTR_RD, grp);
+
+               return 4;
+       default:
+               *rval = val;
+
+               return 4;
+       }
+}
+
+static irqreturn_t ocelot_xtr_irq_handler(int irq, void *arg)
+{
+       struct ocelot *ocelot = arg;
+       int i = 0, grp = 0;
+       int err = 0;
+
+       if (!(ocelot_read(ocelot, QS_XTR_DATA_PRESENT) & BIT(grp)))
+               return IRQ_NONE;
+
+       do {
+               struct sk_buff *skb;
+               struct net_device *dev;
+               u32 *buf;
+               int sz, len;
+               u32 ifh[4];
+               u32 val;
+               struct frame_info info;
+
+               for (i = 0; i < IFH_LEN; i++) {
+                       err = ocelot_rx_frame_word(ocelot, grp, true, &ifh[i]);
+                       if (err != 4)
+                               break;
+               }
+
+               if (err != 4)
+                       break;
+
+               ocelot_parse_ifh(ifh, &info);
+
+               dev = ocelot->ports[info.port]->dev;
+
+               skb = netdev_alloc_skb(dev, info.len);
+
+               if (unlikely(!skb)) {
+                       netdev_err(dev, "Unable to allocate sk_buff\n");
+                       err = -ENOMEM;
+                       break;
+               }
+               buf = (u32 *)skb_put(skb, info.len);
+
+               len = 0;
+               do {
+                       sz = ocelot_rx_frame_word(ocelot, grp, false, &val);
+                       *buf++ = val;
+                       len += sz;
+               } while ((sz == 4) && (len < info.len));
+
+               if (sz < 0) {
+                       err = sz;
+                       break;
+               }
+
+               /* Everything we see on an interface that is in the HW bridge
+                * has already been forwarded.
+                */
+               if (ocelot->bridge_mask & BIT(info.port))
+                       skb->offload_fwd_mark = 1;
+
+               skb->protocol = eth_type_trans(skb, dev);
+               netif_rx(skb);
+               dev->stats.rx_bytes += len;
+               dev->stats.rx_packets++;
+       } while (ocelot_read(ocelot, QS_XTR_DATA_PRESENT) & BIT(grp));
+
+       if (err)
+               while (ocelot_read(ocelot, QS_XTR_DATA_PRESENT) & BIT(grp))
+                       ocelot_read_rix(ocelot, QS_XTR_RD, grp);
+
+       return IRQ_HANDLED;
+}
+
+static const struct of_device_id mscc_ocelot_match[] = {
+       { .compatible = "mscc,vsc7514-switch" },
+       { }
+};
+MODULE_DEVICE_TABLE(of, mscc_ocelot_match);
+
+static int mscc_ocelot_probe(struct platform_device *pdev)
+{
+       int err, irq;
+       unsigned int i;
+       struct device_node *np = pdev->dev.of_node;
+       struct device_node *ports, *portnp;
+       struct ocelot *ocelot;
+       u32 val;
+
+       struct {
+               enum ocelot_target id;
+               char *name;
+       } res[] = {
+               { SYS, "sys" },
+               { REW, "rew" },
+               { QSYS, "qsys" },
+               { ANA, "ana" },
+               { QS, "qs" },
+               { HSIO, "hsio" },
+       };
+
+       if (!np && !pdev->dev.platform_data)
+               return -ENODEV;
+
+       ocelot = devm_kzalloc(&pdev->dev, sizeof(*ocelot), GFP_KERNEL);
+       if (!ocelot)
+               return -ENOMEM;
+
+       platform_set_drvdata(pdev, ocelot);
+       ocelot->dev = &pdev->dev;
+
+       for (i = 0; i < ARRAY_SIZE(res); i++) {
+               struct regmap *target;
+
+               target = ocelot_io_platform_init(ocelot, pdev, res[i].name);
+               if (IS_ERR(target))
+                       return PTR_ERR(target);
+
+               ocelot->targets[res[i].id] = target;
+       }
+
+       err = ocelot_chip_init(ocelot);
+       if (err)
+               return err;
+
+       irq = platform_get_irq_byname(pdev, "xtr");
+       if (irq < 0)
+               return -ENODEV;
+
+       err = devm_request_threaded_irq(&pdev->dev, irq, NULL,
+                                       ocelot_xtr_irq_handler, IRQF_ONESHOT,
+                                       "frame extraction", ocelot);
+       if (err)
+               return err;
+
+       regmap_field_write(ocelot->regfields[SYS_RESET_CFG_MEM_INIT], 1);
+       regmap_field_write(ocelot->regfields[SYS_RESET_CFG_MEM_ENA], 1);
+
+       do {
+               msleep(1);
+               regmap_field_read(ocelot->regfields[SYS_RESET_CFG_MEM_INIT],
+                                 &val);
+       } while (val);
+
+       regmap_field_write(ocelot->regfields[SYS_RESET_CFG_MEM_ENA], 1);
+       regmap_field_write(ocelot->regfields[SYS_RESET_CFG_CORE_ENA], 1);
+
+       ocelot->num_cpu_ports = 1; /* 1 port on the switch, two groups */
+
+       ports = of_get_child_by_name(np, "ethernet-ports");
+       if (!ports) {
+               dev_err(&pdev->dev, "no ethernet-ports child node found\n");
+               return -ENODEV;
+       }
+
+       ocelot->num_phys_ports = of_get_child_count(ports);
+
+       ocelot->ports = devm_kcalloc(&pdev->dev, ocelot->num_phys_ports,
+                                    sizeof(struct ocelot_port *), GFP_KERNEL);
+
+       INIT_LIST_HEAD(&ocelot->multicast);
+       ocelot_init(ocelot);
+
+       ocelot_rmw(ocelot, HSIO_HW_CFG_DEV1G_4_MODE |
+                    HSIO_HW_CFG_DEV1G_6_MODE |
+                    HSIO_HW_CFG_DEV1G_9_MODE,
+                    HSIO_HW_CFG_DEV1G_4_MODE |
+                    HSIO_HW_CFG_DEV1G_6_MODE |
+                    HSIO_HW_CFG_DEV1G_9_MODE,
+                    HSIO_HW_CFG);
+
+       for_each_available_child_of_node(ports, portnp) {
+               struct device_node *phy_node;
+               struct phy_device *phy;
+               struct resource *res;
+               void __iomem *regs;
+               char res_name[8];
+               u32 port;
+
+               if (of_property_read_u32(portnp, "reg", &port))
+                       continue;
+
+               snprintf(res_name, sizeof(res_name), "port%d", port);
+
+               res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
+                                                  res_name);
+               regs = devm_ioremap_resource(&pdev->dev, res);
+               if (IS_ERR(regs))
+                       continue;
+
+               phy_node = of_parse_phandle(portnp, "phy-handle", 0);
+               if (!phy_node)
+                       continue;
+
+               phy = of_phy_find_device(phy_node);
+               if (!phy)
+                       continue;
+
+               err = ocelot_probe_port(ocelot, port, regs, phy);
+               if (err) {
+                       dev_err(&pdev->dev, "failed to probe ports\n");
+                       goto err_probe_ports;
+               }
+       }
+
+       register_netdevice_notifier(&ocelot_netdevice_nb);
+
+       dev_info(&pdev->dev, "Ocelot switch probed\n");
+
+       return 0;
+
+err_probe_ports:
+       return err;
+}
+
+static int mscc_ocelot_remove(struct platform_device *pdev)
+{
+       struct ocelot *ocelot = platform_get_drvdata(pdev);
+
+       ocelot_deinit(ocelot);
+       unregister_netdevice_notifier(&ocelot_netdevice_nb);
+
+       return 0;
+}
+
+static struct platform_driver mscc_ocelot_driver = {
+       .probe = mscc_ocelot_probe,
+       .remove = mscc_ocelot_remove,
+       .driver = {
+               .name = "ocelot-switch",
+               .of_match_table = mscc_ocelot_match,
+       },
+};
+
+module_platform_driver(mscc_ocelot_driver);
+
+MODULE_DESCRIPTION("Microsemi Ocelot switch driver");
+MODULE_AUTHOR("Alexandre Belloni <alexandre.belloni@bootlin.com>");
+MODULE_LICENSE("Dual MIT/GPL");
diff --git a/drivers/net/ethernet/mscc/ocelot_dev.h b/drivers/net/ethernet/mscc/ocelot_dev.h
new file mode 100644 (file)
index 0000000..0a50d53
--- /dev/null
@@ -0,0 +1,275 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */
+/*
+ * Microsemi Ocelot Switch driver
+ *
+ * Copyright (c) 2017 Microsemi Corporation
+ */
+
+#ifndef _MSCC_OCELOT_DEV_H_
+#define _MSCC_OCELOT_DEV_H_
+
+#define DEV_CLOCK_CFG                                     0x0
+
+#define DEV_CLOCK_CFG_MAC_TX_RST                          BIT(7)
+#define DEV_CLOCK_CFG_MAC_RX_RST                          BIT(6)
+#define DEV_CLOCK_CFG_PCS_TX_RST                          BIT(5)
+#define DEV_CLOCK_CFG_PCS_RX_RST                          BIT(4)
+#define DEV_CLOCK_CFG_PORT_RST                            BIT(3)
+#define DEV_CLOCK_CFG_PHY_RST                             BIT(2)
+#define DEV_CLOCK_CFG_LINK_SPEED(x)                       ((x) & GENMASK(1, 0))
+#define DEV_CLOCK_CFG_LINK_SPEED_M                        GENMASK(1, 0)
+
+#define DEV_PORT_MISC                                     0x4
+
+#define DEV_PORT_MISC_FWD_ERROR_ENA                       BIT(4)
+#define DEV_PORT_MISC_FWD_PAUSE_ENA                       BIT(3)
+#define DEV_PORT_MISC_FWD_CTRL_ENA                        BIT(2)
+#define DEV_PORT_MISC_DEV_LOOP_ENA                        BIT(1)
+#define DEV_PORT_MISC_HDX_FAST_DIS                        BIT(0)
+
+#define DEV_EVENTS                                        0x8
+
+#define DEV_EEE_CFG                                       0xc
+
+#define DEV_EEE_CFG_EEE_ENA                               BIT(22)
+#define DEV_EEE_CFG_EEE_TIMER_AGE(x)                      (((x) << 15) & GENMASK(21, 15))
+#define DEV_EEE_CFG_EEE_TIMER_AGE_M                       GENMASK(21, 15)
+#define DEV_EEE_CFG_EEE_TIMER_AGE_X(x)                    (((x) & GENMASK(21, 15)) >> 15)
+#define DEV_EEE_CFG_EEE_TIMER_WAKEUP(x)                   (((x) << 8) & GENMASK(14, 8))
+#define DEV_EEE_CFG_EEE_TIMER_WAKEUP_M                    GENMASK(14, 8)
+#define DEV_EEE_CFG_EEE_TIMER_WAKEUP_X(x)                 (((x) & GENMASK(14, 8)) >> 8)
+#define DEV_EEE_CFG_EEE_TIMER_HOLDOFF(x)                  (((x) << 1) & GENMASK(7, 1))
+#define DEV_EEE_CFG_EEE_TIMER_HOLDOFF_M                   GENMASK(7, 1)
+#define DEV_EEE_CFG_EEE_TIMER_HOLDOFF_X(x)                (((x) & GENMASK(7, 1)) >> 1)
+#define DEV_EEE_CFG_PORT_LPI                              BIT(0)
+
+#define DEV_RX_PATH_DELAY                                 0x10
+
+#define DEV_TX_PATH_DELAY                                 0x14
+
+#define DEV_PTP_PREDICT_CFG                               0x18
+
+#define DEV_PTP_PREDICT_CFG_PTP_PHY_PREDICT_CFG(x)        (((x) << 4) & GENMASK(11, 4))
+#define DEV_PTP_PREDICT_CFG_PTP_PHY_PREDICT_CFG_M         GENMASK(11, 4)
+#define DEV_PTP_PREDICT_CFG_PTP_PHY_PREDICT_CFG_X(x)      (((x) & GENMASK(11, 4)) >> 4)
+#define DEV_PTP_PREDICT_CFG_PTP_PHASE_PREDICT_CFG(x)      ((x) & GENMASK(3, 0))
+#define DEV_PTP_PREDICT_CFG_PTP_PHASE_PREDICT_CFG_M       GENMASK(3, 0)
+
+#define DEV_MAC_ENA_CFG                                   0x1c
+
+#define DEV_MAC_ENA_CFG_RX_ENA                            BIT(4)
+#define DEV_MAC_ENA_CFG_TX_ENA                            BIT(0)
+
+#define DEV_MAC_MODE_CFG                                  0x20
+
+#define DEV_MAC_MODE_CFG_FC_WORD_SYNC_ENA                 BIT(8)
+#define DEV_MAC_MODE_CFG_GIGA_MODE_ENA                    BIT(4)
+#define DEV_MAC_MODE_CFG_FDX_ENA                          BIT(0)
+
+#define DEV_MAC_MAXLEN_CFG                                0x24
+
+#define DEV_MAC_TAGS_CFG                                  0x28
+
+#define DEV_MAC_TAGS_CFG_TAG_ID(x)                        (((x) << 16) & GENMASK(31, 16))
+#define DEV_MAC_TAGS_CFG_TAG_ID_M                         GENMASK(31, 16)
+#define DEV_MAC_TAGS_CFG_TAG_ID_X(x)                      (((x) & GENMASK(31, 16)) >> 16)
+#define DEV_MAC_TAGS_CFG_VLAN_LEN_AWR_ENA                 BIT(2)
+#define DEV_MAC_TAGS_CFG_PB_ENA                           BIT(1)
+#define DEV_MAC_TAGS_CFG_VLAN_AWR_ENA                     BIT(0)
+
+#define DEV_MAC_ADV_CHK_CFG                               0x2c
+
+#define DEV_MAC_ADV_CHK_CFG_LEN_DROP_ENA                  BIT(0)
+
+#define DEV_MAC_IFG_CFG                                   0x30
+
+#define DEV_MAC_IFG_CFG_RESTORE_OLD_IPG_CHECK             BIT(17)
+#define DEV_MAC_IFG_CFG_REDUCED_TX_IFG                    BIT(16)
+#define DEV_MAC_IFG_CFG_TX_IFG(x)                         (((x) << 8) & GENMASK(12, 8))
+#define DEV_MAC_IFG_CFG_TX_IFG_M                          GENMASK(12, 8)
+#define DEV_MAC_IFG_CFG_TX_IFG_X(x)                       (((x) & GENMASK(12, 8)) >> 8)
+#define DEV_MAC_IFG_CFG_RX_IFG2(x)                        (((x) << 4) & GENMASK(7, 4))
+#define DEV_MAC_IFG_CFG_RX_IFG2_M                         GENMASK(7, 4)
+#define DEV_MAC_IFG_CFG_RX_IFG2_X(x)                      (((x) & GENMASK(7, 4)) >> 4)
+#define DEV_MAC_IFG_CFG_RX_IFG1(x)                        ((x) & GENMASK(3, 0))
+#define DEV_MAC_IFG_CFG_RX_IFG1_M                         GENMASK(3, 0)
+
+#define DEV_MAC_HDX_CFG                                   0x34
+
+#define DEV_MAC_HDX_CFG_BYPASS_COL_SYNC                   BIT(26)
+#define DEV_MAC_HDX_CFG_OB_ENA                            BIT(25)
+#define DEV_MAC_HDX_CFG_WEXC_DIS                          BIT(24)
+#define DEV_MAC_HDX_CFG_SEED(x)                           (((x) << 16) & GENMASK(23, 16))
+#define DEV_MAC_HDX_CFG_SEED_M                            GENMASK(23, 16)
+#define DEV_MAC_HDX_CFG_SEED_X(x)                         (((x) & GENMASK(23, 16)) >> 16)
+#define DEV_MAC_HDX_CFG_SEED_LOAD                         BIT(12)
+#define DEV_MAC_HDX_CFG_RETRY_AFTER_EXC_COL_ENA           BIT(8)
+#define DEV_MAC_HDX_CFG_LATE_COL_POS(x)                   ((x) & GENMASK(6, 0))
+#define DEV_MAC_HDX_CFG_LATE_COL_POS_M                    GENMASK(6, 0)
+
+#define DEV_MAC_DBG_CFG                                   0x38
+
+#define DEV_MAC_DBG_CFG_TBI_MODE                          BIT(4)
+#define DEV_MAC_DBG_CFG_IFG_CRS_EXT_CHK_ENA               BIT(0)
+
+#define DEV_MAC_FC_MAC_LOW_CFG                            0x3c
+
+#define DEV_MAC_FC_MAC_HIGH_CFG                           0x40
+
+#define DEV_MAC_STICKY                                    0x44
+
+#define DEV_MAC_STICKY_RX_IPG_SHRINK_STICKY               BIT(9)
+#define DEV_MAC_STICKY_RX_PREAM_SHRINK_STICKY             BIT(8)
+#define DEV_MAC_STICKY_RX_CARRIER_EXT_STICKY              BIT(7)
+#define DEV_MAC_STICKY_RX_CARRIER_EXT_ERR_STICKY          BIT(6)
+#define DEV_MAC_STICKY_RX_JUNK_STICKY                     BIT(5)
+#define DEV_MAC_STICKY_TX_RETRANSMIT_STICKY               BIT(4)
+#define DEV_MAC_STICKY_TX_JAM_STICKY                      BIT(3)
+#define DEV_MAC_STICKY_TX_FIFO_OFLW_STICKY                BIT(2)
+#define DEV_MAC_STICKY_TX_FRM_LEN_OVR_STICKY              BIT(1)
+#define DEV_MAC_STICKY_TX_ABORT_STICKY                    BIT(0)
+
+#define PCS1G_CFG                                         0x48
+
+#define PCS1G_CFG_LINK_STATUS_TYPE                        BIT(4)
+#define PCS1G_CFG_AN_LINK_CTRL_ENA                        BIT(1)
+#define PCS1G_CFG_PCS_ENA                                 BIT(0)
+
+#define PCS1G_MODE_CFG                                    0x4c
+
+#define PCS1G_MODE_CFG_UNIDIR_MODE_ENA                    BIT(4)
+#define PCS1G_MODE_CFG_SGMII_MODE_ENA                     BIT(0)
+
+#define PCS1G_SD_CFG                                      0x50
+
+#define PCS1G_SD_CFG_SD_SEL                               BIT(8)
+#define PCS1G_SD_CFG_SD_POL                               BIT(4)
+#define PCS1G_SD_CFG_SD_ENA                               BIT(0)
+
+#define PCS1G_ANEG_CFG                                    0x54
+
+#define PCS1G_ANEG_CFG_ADV_ABILITY(x)                     (((x) << 16) & GENMASK(31, 16))
+#define PCS1G_ANEG_CFG_ADV_ABILITY_M                      GENMASK(31, 16)
+#define PCS1G_ANEG_CFG_ADV_ABILITY_X(x)                   (((x) & GENMASK(31, 16)) >> 16)
+#define PCS1G_ANEG_CFG_SW_RESOLVE_ENA                     BIT(8)
+#define PCS1G_ANEG_CFG_ANEG_RESTART_ONE_SHOT              BIT(1)
+#define PCS1G_ANEG_CFG_ANEG_ENA                           BIT(0)
+
+#define PCS1G_ANEG_NP_CFG                                 0x58
+
+#define PCS1G_ANEG_NP_CFG_NP_TX(x)                        (((x) << 16) & GENMASK(31, 16))
+#define PCS1G_ANEG_NP_CFG_NP_TX_M                         GENMASK(31, 16)
+#define PCS1G_ANEG_NP_CFG_NP_TX_X(x)                      (((x) & GENMASK(31, 16)) >> 16)
+#define PCS1G_ANEG_NP_CFG_NP_LOADED_ONE_SHOT              BIT(0)
+
+#define PCS1G_LB_CFG                                      0x5c
+
+#define PCS1G_LB_CFG_RA_ENA                               BIT(4)
+#define PCS1G_LB_CFG_GMII_PHY_LB_ENA                      BIT(1)
+#define PCS1G_LB_CFG_TBI_HOST_LB_ENA                      BIT(0)
+
+#define PCS1G_DBG_CFG                                     0x60
+
+#define PCS1G_DBG_CFG_UDLT                                BIT(0)
+
+#define PCS1G_CDET_CFG                                    0x64
+
+#define PCS1G_CDET_CFG_CDET_ENA                           BIT(0)
+
+#define PCS1G_ANEG_STATUS                                 0x68
+
+#define PCS1G_ANEG_STATUS_LP_ADV_ABILITY(x)               (((x) << 16) & GENMASK(31, 16))
+#define PCS1G_ANEG_STATUS_LP_ADV_ABILITY_M                GENMASK(31, 16)
+#define PCS1G_ANEG_STATUS_LP_ADV_ABILITY_X(x)             (((x) & GENMASK(31, 16)) >> 16)
+#define PCS1G_ANEG_STATUS_PR                              BIT(4)
+#define PCS1G_ANEG_STATUS_PAGE_RX_STICKY                  BIT(3)
+#define PCS1G_ANEG_STATUS_ANEG_COMPLETE                   BIT(0)
+
+#define PCS1G_ANEG_NP_STATUS                              0x6c
+
+#define PCS1G_LINK_STATUS                                 0x70
+
+#define PCS1G_LINK_STATUS_DELAY_VAR(x)                    (((x) << 12) & GENMASK(15, 12))
+#define PCS1G_LINK_STATUS_DELAY_VAR_M                     GENMASK(15, 12)
+#define PCS1G_LINK_STATUS_DELAY_VAR_X(x)                  (((x) & GENMASK(15, 12)) >> 12)
+#define PCS1G_LINK_STATUS_SIGNAL_DETECT                   BIT(8)
+#define PCS1G_LINK_STATUS_LINK_STATUS                     BIT(4)
+#define PCS1G_LINK_STATUS_SYNC_STATUS                     BIT(0)
+
+#define PCS1G_LINK_DOWN_CNT                               0x74
+
+#define PCS1G_STICKY                                      0x78
+
+#define PCS1G_STICKY_LINK_DOWN_STICKY                     BIT(4)
+#define PCS1G_STICKY_OUT_OF_SYNC_STICKY                   BIT(0)
+
+#define PCS1G_DEBUG_STATUS                                0x7c
+
+#define PCS1G_LPI_CFG                                     0x80
+
+#define PCS1G_LPI_CFG_QSGMII_MS_SEL                       BIT(20)
+#define PCS1G_LPI_CFG_RX_LPI_OUT_DIS                      BIT(17)
+#define PCS1G_LPI_CFG_LPI_TESTMODE                        BIT(16)
+#define PCS1G_LPI_CFG_LPI_RX_WTIM(x)                      (((x) << 4) & GENMASK(5, 4))
+#define PCS1G_LPI_CFG_LPI_RX_WTIM_M                       GENMASK(5, 4)
+#define PCS1G_LPI_CFG_LPI_RX_WTIM_X(x)                    (((x) & GENMASK(5, 4)) >> 4)
+#define PCS1G_LPI_CFG_TX_ASSERT_LPIDLE                    BIT(0)
+
+#define PCS1G_LPI_WAKE_ERROR_CNT                          0x84
+
+#define PCS1G_LPI_STATUS                                  0x88
+
+#define PCS1G_LPI_STATUS_RX_LPI_FAIL                      BIT(16)
+#define PCS1G_LPI_STATUS_RX_LPI_EVENT_STICKY              BIT(12)
+#define PCS1G_LPI_STATUS_RX_QUIET                         BIT(9)
+#define PCS1G_LPI_STATUS_RX_LPI_MODE                      BIT(8)
+#define PCS1G_LPI_STATUS_TX_LPI_EVENT_STICKY              BIT(4)
+#define PCS1G_LPI_STATUS_TX_QUIET                         BIT(1)
+#define PCS1G_LPI_STATUS_TX_LPI_MODE                      BIT(0)
+
+#define PCS1G_TSTPAT_MODE_CFG                             0x8c
+
+#define PCS1G_TSTPAT_STATUS                               0x90
+
+#define PCS1G_TSTPAT_STATUS_JTP_ERR_CNT(x)                (((x) << 8) & GENMASK(15, 8))
+#define PCS1G_TSTPAT_STATUS_JTP_ERR_CNT_M                 GENMASK(15, 8)
+#define PCS1G_TSTPAT_STATUS_JTP_ERR_CNT_X(x)              (((x) & GENMASK(15, 8)) >> 8)
+#define PCS1G_TSTPAT_STATUS_JTP_ERR                       BIT(4)
+#define PCS1G_TSTPAT_STATUS_JTP_LOCK                      BIT(0)
+
+#define DEV_PCS_FX100_CFG                                 0x94
+
+#define DEV_PCS_FX100_CFG_SD_SEL                          BIT(26)
+#define DEV_PCS_FX100_CFG_SD_POL                          BIT(25)
+#define DEV_PCS_FX100_CFG_SD_ENA                          BIT(24)
+#define DEV_PCS_FX100_CFG_LOOPBACK_ENA                    BIT(20)
+#define DEV_PCS_FX100_CFG_SWAP_MII_ENA                    BIT(16)
+#define DEV_PCS_FX100_CFG_RXBITSEL(x)                     (((x) << 12) & GENMASK(15, 12))
+#define DEV_PCS_FX100_CFG_RXBITSEL_M                      GENMASK(15, 12)
+#define DEV_PCS_FX100_CFG_RXBITSEL_X(x)                   (((x) & GENMASK(15, 12)) >> 12)
+#define DEV_PCS_FX100_CFG_SIGDET_CFG(x)                   (((x) << 9) & GENMASK(10, 9))
+#define DEV_PCS_FX100_CFG_SIGDET_CFG_M                    GENMASK(10, 9)
+#define DEV_PCS_FX100_CFG_SIGDET_CFG_X(x)                 (((x) & GENMASK(10, 9)) >> 9)
+#define DEV_PCS_FX100_CFG_LINKHYST_TM_ENA                 BIT(8)
+#define DEV_PCS_FX100_CFG_LINKHYSTTIMER(x)                (((x) << 4) & GENMASK(7, 4))
+#define DEV_PCS_FX100_CFG_LINKHYSTTIMER_M                 GENMASK(7, 4)
+#define DEV_PCS_FX100_CFG_LINKHYSTTIMER_X(x)              (((x) & GENMASK(7, 4)) >> 4)
+#define DEV_PCS_FX100_CFG_UNIDIR_MODE_ENA                 BIT(3)
+#define DEV_PCS_FX100_CFG_FEFCHK_ENA                      BIT(2)
+#define DEV_PCS_FX100_CFG_FEFGEN_ENA                      BIT(1)
+#define DEV_PCS_FX100_CFG_PCS_ENA                         BIT(0)
+
+#define DEV_PCS_FX100_STATUS                              0x98
+
+#define DEV_PCS_FX100_STATUS_EDGE_POS_PTP(x)              (((x) << 8) & GENMASK(11, 8))
+#define DEV_PCS_FX100_STATUS_EDGE_POS_PTP_M               GENMASK(11, 8)
+#define DEV_PCS_FX100_STATUS_EDGE_POS_PTP_X(x)            (((x) & GENMASK(11, 8)) >> 8)
+#define DEV_PCS_FX100_STATUS_PCS_ERROR_STICKY             BIT(7)
+#define DEV_PCS_FX100_STATUS_FEF_FOUND_STICKY             BIT(6)
+#define DEV_PCS_FX100_STATUS_SSD_ERROR_STICKY             BIT(5)
+#define DEV_PCS_FX100_STATUS_SYNC_LOST_STICKY             BIT(4)
+#define DEV_PCS_FX100_STATUS_FEF_STATUS                   BIT(2)
+#define DEV_PCS_FX100_STATUS_SIGNAL_DETECT                BIT(1)
+#define DEV_PCS_FX100_STATUS_SYNC_STATUS                  BIT(0)
+
+#endif
diff --git a/drivers/net/ethernet/mscc/ocelot_dev_gmii.h b/drivers/net/ethernet/mscc/ocelot_dev_gmii.h
new file mode 100644 (file)
index 0000000..6aa40ea
--- /dev/null
@@ -0,0 +1,154 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */
+/*
+ * Microsemi Ocelot Switch driver
+ *
+ * Copyright (c) 2017 Microsemi Corporation
+ */
+
+#ifndef _MSCC_OCELOT_DEV_GMII_H_
+#define _MSCC_OCELOT_DEV_GMII_H_
+
+#define DEV_GMII_PORT_MODE_CLOCK_CFG                      0x0
+
+#define DEV_GMII_PORT_MODE_CLOCK_CFG_MAC_TX_RST           BIT(5)
+#define DEV_GMII_PORT_MODE_CLOCK_CFG_MAC_RX_RST           BIT(4)
+#define DEV_GMII_PORT_MODE_CLOCK_CFG_PORT_RST             BIT(3)
+#define DEV_GMII_PORT_MODE_CLOCK_CFG_PHY_RST              BIT(2)
+#define DEV_GMII_PORT_MODE_CLOCK_CFG_LINK_SPEED(x)        ((x) & GENMASK(1, 0))
+#define DEV_GMII_PORT_MODE_CLOCK_CFG_LINK_SPEED_M         GENMASK(1, 0)
+
+#define DEV_GMII_PORT_MODE_PORT_MISC                      0x4
+
+#define DEV_GMII_PORT_MODE_PORT_MISC_MPLS_RX_ENA          BIT(5)
+#define DEV_GMII_PORT_MODE_PORT_MISC_FWD_ERROR_ENA        BIT(4)
+#define DEV_GMII_PORT_MODE_PORT_MISC_FWD_PAUSE_ENA        BIT(3)
+#define DEV_GMII_PORT_MODE_PORT_MISC_FWD_CTRL_ENA         BIT(2)
+#define DEV_GMII_PORT_MODE_PORT_MISC_GMII_LOOP_ENA        BIT(1)
+#define DEV_GMII_PORT_MODE_PORT_MISC_DEV_LOOP_ENA         BIT(0)
+
+#define DEV_GMII_PORT_MODE_EVENTS                         0x8
+
+#define DEV_GMII_PORT_MODE_EEE_CFG                        0xc
+
+#define DEV_GMII_PORT_MODE_EEE_CFG_EEE_ENA                BIT(22)
+#define DEV_GMII_PORT_MODE_EEE_CFG_EEE_TIMER_AGE(x)       (((x) << 15) & GENMASK(21, 15))
+#define DEV_GMII_PORT_MODE_EEE_CFG_EEE_TIMER_AGE_M        GENMASK(21, 15)
+#define DEV_GMII_PORT_MODE_EEE_CFG_EEE_TIMER_AGE_X(x)     (((x) & GENMASK(21, 15)) >> 15)
+#define DEV_GMII_PORT_MODE_EEE_CFG_EEE_TIMER_WAKEUP(x)    (((x) << 8) & GENMASK(14, 8))
+#define DEV_GMII_PORT_MODE_EEE_CFG_EEE_TIMER_WAKEUP_M     GENMASK(14, 8)
+#define DEV_GMII_PORT_MODE_EEE_CFG_EEE_TIMER_WAKEUP_X(x)  (((x) & GENMASK(14, 8)) >> 8)
+#define DEV_GMII_PORT_MODE_EEE_CFG_EEE_TIMER_HOLDOFF(x)   (((x) << 1) & GENMASK(7, 1))
+#define DEV_GMII_PORT_MODE_EEE_CFG_EEE_TIMER_HOLDOFF_M    GENMASK(7, 1)
+#define DEV_GMII_PORT_MODE_EEE_CFG_EEE_TIMER_HOLDOFF_X(x) (((x) & GENMASK(7, 1)) >> 1)
+#define DEV_GMII_PORT_MODE_EEE_CFG_PORT_LPI               BIT(0)
+
+#define DEV_GMII_PORT_MODE_RX_PATH_DELAY                  0x10
+
+#define DEV_GMII_PORT_MODE_TX_PATH_DELAY                  0x14
+
+#define DEV_GMII_PORT_MODE_PTP_PREDICT_CFG                0x18
+
+#define DEV_GMII_MAC_CFG_STATUS_MAC_ENA_CFG               0x1c
+
+#define DEV_GMII_MAC_CFG_STATUS_MAC_ENA_CFG_RX_ENA        BIT(4)
+#define DEV_GMII_MAC_CFG_STATUS_MAC_ENA_CFG_TX_ENA        BIT(0)
+
+#define DEV_GMII_MAC_CFG_STATUS_MAC_MODE_CFG              0x20
+
+#define DEV_GMII_MAC_CFG_STATUS_MAC_MODE_CFG_FC_WORD_SYNC_ENA BIT(8)
+#define DEV_GMII_MAC_CFG_STATUS_MAC_MODE_CFG_GIGA_MODE_ENA BIT(4)
+#define DEV_GMII_MAC_CFG_STATUS_MAC_MODE_CFG_FDX_ENA      BIT(0)
+
+#define DEV_GMII_MAC_CFG_STATUS_MAC_MAXLEN_CFG            0x24
+
+#define DEV_GMII_MAC_CFG_STATUS_MAC_TAGS_CFG              0x28
+
+#define DEV_GMII_MAC_CFG_STATUS_MAC_TAGS_CFG_TAG_ID(x)    (((x) << 16) & GENMASK(31, 16))
+#define DEV_GMII_MAC_CFG_STATUS_MAC_TAGS_CFG_TAG_ID_M     GENMASK(31, 16)
+#define DEV_GMII_MAC_CFG_STATUS_MAC_TAGS_CFG_TAG_ID_X(x)  (((x) & GENMASK(31, 16)) >> 16)
+#define DEV_GMII_MAC_CFG_STATUS_MAC_TAGS_CFG_PB_ENA       BIT(1)
+#define DEV_GMII_MAC_CFG_STATUS_MAC_TAGS_CFG_VLAN_AWR_ENA BIT(0)
+#define DEV_GMII_MAC_CFG_STATUS_MAC_TAGS_CFG_VLAN_LEN_AWR_ENA BIT(2)
+
+#define DEV_GMII_MAC_CFG_STATUS_MAC_ADV_CHK_CFG           0x2c
+
+#define DEV_GMII_MAC_CFG_STATUS_MAC_ADV_CHK_CFG_LEN_DROP_ENA BIT(0)
+
+#define DEV_GMII_MAC_CFG_STATUS_MAC_IFG_CFG               0x30
+
+#define DEV_GMII_MAC_CFG_STATUS_MAC_IFG_CFG_RESTORE_OLD_IPG_CHECK BIT(17)
+#define DEV_GMII_MAC_CFG_STATUS_MAC_IFG_CFG_REDUCED_TX_IFG BIT(16)
+#define DEV_GMII_MAC_CFG_STATUS_MAC_IFG_CFG_TX_IFG(x)     (((x) << 8) & GENMASK(12, 8))
+#define DEV_GMII_MAC_CFG_STATUS_MAC_IFG_CFG_TX_IFG_M      GENMASK(12, 8)
+#define DEV_GMII_MAC_CFG_STATUS_MAC_IFG_CFG_TX_IFG_X(x)   (((x) & GENMASK(12, 8)) >> 8)
+#define DEV_GMII_MAC_CFG_STATUS_MAC_IFG_CFG_RX_IFG2(x)    (((x) << 4) & GENMASK(7, 4))
+#define DEV_GMII_MAC_CFG_STATUS_MAC_IFG_CFG_RX_IFG2_M     GENMASK(7, 4)
+#define DEV_GMII_MAC_CFG_STATUS_MAC_IFG_CFG_RX_IFG2_X(x)  (((x) & GENMASK(7, 4)) >> 4)
+#define DEV_GMII_MAC_CFG_STATUS_MAC_IFG_CFG_RX_IFG1(x)    ((x) & GENMASK(3, 0))
+#define DEV_GMII_MAC_CFG_STATUS_MAC_IFG_CFG_RX_IFG1_M     GENMASK(3, 0)
+
+#define DEV_GMII_MAC_CFG_STATUS_MAC_HDX_CFG               0x34
+
+#define DEV_GMII_MAC_CFG_STATUS_MAC_HDX_CFG_BYPASS_COL_SYNC BIT(26)
+#define DEV_GMII_MAC_CFG_STATUS_MAC_HDX_CFG_OB_ENA        BIT(25)
+#define DEV_GMII_MAC_CFG_STATUS_MAC_HDX_CFG_WEXC_DIS      BIT(24)
+#define DEV_GMII_MAC_CFG_STATUS_MAC_HDX_CFG_SEED(x)       (((x) << 16) & GENMASK(23, 16))
+#define DEV_GMII_MAC_CFG_STATUS_MAC_HDX_CFG_SEED_M        GENMASK(23, 16)
+#define DEV_GMII_MAC_CFG_STATUS_MAC_HDX_CFG_SEED_X(x)     (((x) & GENMASK(23, 16)) >> 16)
+#define DEV_GMII_MAC_CFG_STATUS_MAC_HDX_CFG_SEED_LOAD     BIT(12)
+#define DEV_GMII_MAC_CFG_STATUS_MAC_HDX_CFG_RETRY_AFTER_EXC_COL_ENA BIT(8)
+#define DEV_GMII_MAC_CFG_STATUS_MAC_HDX_CFG_LATE_COL_POS(x) ((x) & GENMASK(6, 0))
+#define DEV_GMII_MAC_CFG_STATUS_MAC_HDX_CFG_LATE_COL_POS_M GENMASK(6, 0)
+
+#define DEV_GMII_MAC_CFG_STATUS_MAC_DBG_CFG               0x38
+
+#define DEV_GMII_MAC_CFG_STATUS_MAC_DBG_CFG_TBI_MODE      BIT(4)
+#define DEV_GMII_MAC_CFG_STATUS_MAC_DBG_CFG_IFG_CRS_EXT_CHK_ENA BIT(0)
+
+#define DEV_GMII_MAC_CFG_STATUS_MAC_FC_MAC_LOW_CFG        0x3c
+
+#define DEV_GMII_MAC_CFG_STATUS_MAC_FC_MAC_HIGH_CFG       0x40
+
+#define DEV_GMII_MAC_CFG_STATUS_MAC_STICKY                0x44
+
+#define DEV_GMII_MAC_CFG_STATUS_MAC_STICKY_RX_IPG_SHRINK_STICKY BIT(9)
+#define DEV_GMII_MAC_CFG_STATUS_MAC_STICKY_RX_PREAM_SHRINK_STICKY BIT(8)
+#define DEV_GMII_MAC_CFG_STATUS_MAC_STICKY_RX_CARRIER_EXT_STICKY BIT(7)
+#define DEV_GMII_MAC_CFG_STATUS_MAC_STICKY_RX_CARRIER_EXT_ERR_STICKY BIT(6)
+#define DEV_GMII_MAC_CFG_STATUS_MAC_STICKY_RX_JUNK_STICKY BIT(5)
+#define DEV_GMII_MAC_CFG_STATUS_MAC_STICKY_TX_RETRANSMIT_STICKY BIT(4)
+#define DEV_GMII_MAC_CFG_STATUS_MAC_STICKY_TX_JAM_STICKY  BIT(3)
+#define DEV_GMII_MAC_CFG_STATUS_MAC_STICKY_TX_FIFO_OFLW_STICKY BIT(2)
+#define DEV_GMII_MAC_CFG_STATUS_MAC_STICKY_TX_FRM_LEN_OVR_STICKY BIT(1)
+#define DEV_GMII_MAC_CFG_STATUS_MAC_STICKY_TX_ABORT_STICKY BIT(0)
+
+#define DEV_GMII_MM_CONFIG_ENABLE_CONFIG                  0x48
+
+#define DEV_GMII_MM_CONFIG_ENABLE_CONFIG_MM_RX_ENA        BIT(0)
+#define DEV_GMII_MM_CONFIG_ENABLE_CONFIG_MM_TX_ENA        BIT(4)
+#define DEV_GMII_MM_CONFIG_ENABLE_CONFIG_KEEP_S_AFTER_D   BIT(8)
+
+#define DEV_GMII_MM_CONFIG_VERIF_CONFIG                   0x4c
+
+#define DEV_GMII_MM_CONFIG_VERIF_CONFIG_PRM_VERIFY_DIS    BIT(0)
+#define DEV_GMII_MM_CONFIG_VERIF_CONFIG_PRM_VERIFY_TIME(x) (((x) << 4) & GENMASK(11, 4))
+#define DEV_GMII_MM_CONFIG_VERIF_CONFIG_PRM_VERIFY_TIME_M GENMASK(11, 4)
+#define DEV_GMII_MM_CONFIG_VERIF_CONFIG_PRM_VERIFY_TIME_X(x) (((x) & GENMASK(11, 4)) >> 4)
+#define DEV_GMII_MM_CONFIG_VERIF_CONFIG_VERIF_TIMER_UNITS(x) (((x) << 12) & GENMASK(13, 12))
+#define DEV_GMII_MM_CONFIG_VERIF_CONFIG_VERIF_TIMER_UNITS_M GENMASK(13, 12)
+#define DEV_GMII_MM_CONFIG_VERIF_CONFIG_VERIF_TIMER_UNITS_X(x) (((x) & GENMASK(13, 12)) >> 12)
+
+#define DEV_GMII_MM_STATISTICS_MM_STATUS                  0x50
+
+#define DEV_GMII_MM_STATISTICS_MM_STATUS_PRMPT_ACTIVE_STATUS BIT(0)
+#define DEV_GMII_MM_STATISTICS_MM_STATUS_PRMPT_ACTIVE_STICKY BIT(4)
+#define DEV_GMII_MM_STATISTICS_MM_STATUS_PRMPT_VERIFY_STATE(x) (((x) << 8) & GENMASK(10, 8))
+#define DEV_GMII_MM_STATISTICS_MM_STATUS_PRMPT_VERIFY_STATE_M GENMASK(10, 8)
+#define DEV_GMII_MM_STATISTICS_MM_STATUS_PRMPT_VERIFY_STATE_X(x) (((x) & GENMASK(10, 8)) >> 8)
+#define DEV_GMII_MM_STATISTICS_MM_STATUS_UNEXP_RX_PFRM_STICKY BIT(12)
+#define DEV_GMII_MM_STATISTICS_MM_STATUS_UNEXP_TX_PFRM_STICKY BIT(16)
+#define DEV_GMII_MM_STATISTICS_MM_STATUS_MM_RX_FRAME_STATUS BIT(20)
+#define DEV_GMII_MM_STATISTICS_MM_STATUS_MM_TX_FRAME_STATUS BIT(24)
+#define DEV_GMII_MM_STATISTICS_MM_STATUS_MM_TX_PRMPT_STATUS BIT(28)
+
+#endif
diff --git a/drivers/net/ethernet/mscc/ocelot_hsio.h b/drivers/net/ethernet/mscc/ocelot_hsio.h
new file mode 100644 (file)
index 0000000..d93ddec
--- /dev/null
@@ -0,0 +1,785 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */
+/*
+ * Microsemi Ocelot Switch driver
+ *
+ * Copyright (c) 2017 Microsemi Corporation
+ */
+
+#ifndef _MSCC_OCELOT_HSIO_H_
+#define _MSCC_OCELOT_HSIO_H_
+
+#define HSIO_PLL5G_CFG0_ENA_ROT                           BIT(31)
+#define HSIO_PLL5G_CFG0_ENA_LANE                          BIT(30)
+#define HSIO_PLL5G_CFG0_ENA_CLKTREE                       BIT(29)
+#define HSIO_PLL5G_CFG0_DIV4                              BIT(28)
+#define HSIO_PLL5G_CFG0_ENA_LOCK_FINE                     BIT(27)
+#define HSIO_PLL5G_CFG0_SELBGV820(x)                      (((x) << 23) & GENMASK(26, 23))
+#define HSIO_PLL5G_CFG0_SELBGV820_M                       GENMASK(26, 23)
+#define HSIO_PLL5G_CFG0_SELBGV820_X(x)                    (((x) & GENMASK(26, 23)) >> 23)
+#define HSIO_PLL5G_CFG0_LOOP_BW_RES(x)                    (((x) << 18) & GENMASK(22, 18))
+#define HSIO_PLL5G_CFG0_LOOP_BW_RES_M                     GENMASK(22, 18)
+#define HSIO_PLL5G_CFG0_LOOP_BW_RES_X(x)                  (((x) & GENMASK(22, 18)) >> 18)
+#define HSIO_PLL5G_CFG0_SELCPI(x)                         (((x) << 16) & GENMASK(17, 16))
+#define HSIO_PLL5G_CFG0_SELCPI_M                          GENMASK(17, 16)
+#define HSIO_PLL5G_CFG0_SELCPI_X(x)                       (((x) & GENMASK(17, 16)) >> 16)
+#define HSIO_PLL5G_CFG0_ENA_VCO_CONTRH                    BIT(15)
+#define HSIO_PLL5G_CFG0_ENA_CP1                           BIT(14)
+#define HSIO_PLL5G_CFG0_ENA_VCO_BUF                       BIT(13)
+#define HSIO_PLL5G_CFG0_ENA_BIAS                          BIT(12)
+#define HSIO_PLL5G_CFG0_CPU_CLK_DIV(x)                    (((x) << 6) & GENMASK(11, 6))
+#define HSIO_PLL5G_CFG0_CPU_CLK_DIV_M                     GENMASK(11, 6)
+#define HSIO_PLL5G_CFG0_CPU_CLK_DIV_X(x)                  (((x) & GENMASK(11, 6)) >> 6)
+#define HSIO_PLL5G_CFG0_CORE_CLK_DIV(x)                   ((x) & GENMASK(5, 0))
+#define HSIO_PLL5G_CFG0_CORE_CLK_DIV_M                    GENMASK(5, 0)
+
+#define HSIO_PLL5G_CFG1_ENA_DIRECT                        BIT(18)
+#define HSIO_PLL5G_CFG1_ROT_SPEED                         BIT(17)
+#define HSIO_PLL5G_CFG1_ROT_DIR                           BIT(16)
+#define HSIO_PLL5G_CFG1_READBACK_DATA_SEL                 BIT(15)
+#define HSIO_PLL5G_CFG1_RC_ENABLE                         BIT(14)
+#define HSIO_PLL5G_CFG1_RC_CTRL_DATA(x)                   (((x) << 6) & GENMASK(13, 6))
+#define HSIO_PLL5G_CFG1_RC_CTRL_DATA_M                    GENMASK(13, 6)
+#define HSIO_PLL5G_CFG1_RC_CTRL_DATA_X(x)                 (((x) & GENMASK(13, 6)) >> 6)
+#define HSIO_PLL5G_CFG1_QUARTER_RATE                      BIT(5)
+#define HSIO_PLL5G_CFG1_PWD_TX                            BIT(4)
+#define HSIO_PLL5G_CFG1_PWD_RX                            BIT(3)
+#define HSIO_PLL5G_CFG1_OUT_OF_RANGE_RECAL_ENA            BIT(2)
+#define HSIO_PLL5G_CFG1_HALF_RATE                         BIT(1)
+#define HSIO_PLL5G_CFG1_FORCE_SET_ENA                     BIT(0)
+
+#define HSIO_PLL5G_CFG2_ENA_TEST_MODE                     BIT(30)
+#define HSIO_PLL5G_CFG2_ENA_PFD_IN_FLIP                   BIT(29)
+#define HSIO_PLL5G_CFG2_ENA_VCO_NREF_TESTOUT              BIT(28)
+#define HSIO_PLL5G_CFG2_ENA_FBTESTOUT                     BIT(27)
+#define HSIO_PLL5G_CFG2_ENA_RCPLL                         BIT(26)
+#define HSIO_PLL5G_CFG2_ENA_CP2                           BIT(25)
+#define HSIO_PLL5G_CFG2_ENA_CLK_BYPASS1                   BIT(24)
+#define HSIO_PLL5G_CFG2_AMPC_SEL(x)                       (((x) << 16) & GENMASK(23, 16))
+#define HSIO_PLL5G_CFG2_AMPC_SEL_M                        GENMASK(23, 16)
+#define HSIO_PLL5G_CFG2_AMPC_SEL_X(x)                     (((x) & GENMASK(23, 16)) >> 16)
+#define HSIO_PLL5G_CFG2_ENA_CLK_BYPASS                    BIT(15)
+#define HSIO_PLL5G_CFG2_PWD_AMPCTRL_N                     BIT(14)
+#define HSIO_PLL5G_CFG2_ENA_AMPCTRL                       BIT(13)
+#define HSIO_PLL5G_CFG2_ENA_AMP_CTRL_FORCE                BIT(12)
+#define HSIO_PLL5G_CFG2_FRC_FSM_POR                       BIT(11)
+#define HSIO_PLL5G_CFG2_DISABLE_FSM_POR                   BIT(10)
+#define HSIO_PLL5G_CFG2_GAIN_TEST(x)                      (((x) << 5) & GENMASK(9, 5))
+#define HSIO_PLL5G_CFG2_GAIN_TEST_M                       GENMASK(9, 5)
+#define HSIO_PLL5G_CFG2_GAIN_TEST_X(x)                    (((x) & GENMASK(9, 5)) >> 5)
+#define HSIO_PLL5G_CFG2_EN_RESET_OVERRUN                  BIT(4)
+#define HSIO_PLL5G_CFG2_EN_RESET_LIM_DET                  BIT(3)
+#define HSIO_PLL5G_CFG2_EN_RESET_FRQ_DET                  BIT(2)
+#define HSIO_PLL5G_CFG2_DISABLE_FSM                       BIT(1)
+#define HSIO_PLL5G_CFG2_ENA_GAIN_TEST                     BIT(0)
+
+#define HSIO_PLL5G_CFG3_TEST_ANA_OUT_SEL(x)               (((x) << 22) & GENMASK(23, 22))
+#define HSIO_PLL5G_CFG3_TEST_ANA_OUT_SEL_M                GENMASK(23, 22)
+#define HSIO_PLL5G_CFG3_TEST_ANA_OUT_SEL_X(x)             (((x) & GENMASK(23, 22)) >> 22)
+#define HSIO_PLL5G_CFG3_TESTOUT_SEL(x)                    (((x) << 19) & GENMASK(21, 19))
+#define HSIO_PLL5G_CFG3_TESTOUT_SEL_M                     GENMASK(21, 19)
+#define HSIO_PLL5G_CFG3_TESTOUT_SEL_X(x)                  (((x) & GENMASK(21, 19)) >> 19)
+#define HSIO_PLL5G_CFG3_ENA_ANA_TEST_OUT                  BIT(18)
+#define HSIO_PLL5G_CFG3_ENA_TEST_OUT                      BIT(17)
+#define HSIO_PLL5G_CFG3_SEL_FBDCLK                        BIT(16)
+#define HSIO_PLL5G_CFG3_SEL_CML_CMOS_PFD                  BIT(15)
+#define HSIO_PLL5G_CFG3_RST_FB_N                          BIT(14)
+#define HSIO_PLL5G_CFG3_FORCE_VCO_CONTRH                  BIT(13)
+#define HSIO_PLL5G_CFG3_FORCE_LO                          BIT(12)
+#define HSIO_PLL5G_CFG3_FORCE_HI                          BIT(11)
+#define HSIO_PLL5G_CFG3_FORCE_ENA                         BIT(10)
+#define HSIO_PLL5G_CFG3_FORCE_CP                          BIT(9)
+#define HSIO_PLL5G_CFG3_FBDIVSEL_TST_ENA                  BIT(8)
+#define HSIO_PLL5G_CFG3_FBDIVSEL(x)                       ((x) & GENMASK(7, 0))
+#define HSIO_PLL5G_CFG3_FBDIVSEL_M                        GENMASK(7, 0)
+
+#define HSIO_PLL5G_CFG4_IB_BIAS_CTRL(x)                   (((x) << 16) & GENMASK(23, 16))
+#define HSIO_PLL5G_CFG4_IB_BIAS_CTRL_M                    GENMASK(23, 16)
+#define HSIO_PLL5G_CFG4_IB_BIAS_CTRL_X(x)                 (((x) & GENMASK(23, 16)) >> 16)
+#define HSIO_PLL5G_CFG4_IB_CTRL(x)                        ((x) & GENMASK(15, 0))
+#define HSIO_PLL5G_CFG4_IB_CTRL_M                         GENMASK(15, 0)
+
+#define HSIO_PLL5G_CFG5_OB_BIAS_CTRL(x)                   (((x) << 16) & GENMASK(23, 16))
+#define HSIO_PLL5G_CFG5_OB_BIAS_CTRL_M                    GENMASK(23, 16)
+#define HSIO_PLL5G_CFG5_OB_BIAS_CTRL_X(x)                 (((x) & GENMASK(23, 16)) >> 16)
+#define HSIO_PLL5G_CFG5_OB_CTRL(x)                        ((x) & GENMASK(15, 0))
+#define HSIO_PLL5G_CFG5_OB_CTRL_M                         GENMASK(15, 0)
+
+#define HSIO_PLL5G_CFG6_REFCLK_SEL_SRC                    BIT(23)
+#define HSIO_PLL5G_CFG6_REFCLK_SEL(x)                     (((x) << 20) & GENMASK(22, 20))
+#define HSIO_PLL5G_CFG6_REFCLK_SEL_M                      GENMASK(22, 20)
+#define HSIO_PLL5G_CFG6_REFCLK_SEL_X(x)                   (((x) & GENMASK(22, 20)) >> 20)
+#define HSIO_PLL5G_CFG6_REFCLK_SRC                        BIT(19)
+#define HSIO_PLL5G_CFG6_POR_DEL_SEL(x)                    (((x) << 16) & GENMASK(17, 16))
+#define HSIO_PLL5G_CFG6_POR_DEL_SEL_M                     GENMASK(17, 16)
+#define HSIO_PLL5G_CFG6_POR_DEL_SEL_X(x)                  (((x) & GENMASK(17, 16)) >> 16)
+#define HSIO_PLL5G_CFG6_DIV125REF_SEL(x)                  (((x) << 8) & GENMASK(15, 8))
+#define HSIO_PLL5G_CFG6_DIV125REF_SEL_M                   GENMASK(15, 8)
+#define HSIO_PLL5G_CFG6_DIV125REF_SEL_X(x)                (((x) & GENMASK(15, 8)) >> 8)
+#define HSIO_PLL5G_CFG6_ENA_REFCLKC2                      BIT(7)
+#define HSIO_PLL5G_CFG6_ENA_FBCLKC2                       BIT(6)
+#define HSIO_PLL5G_CFG6_DDR_CLK_DIV(x)                    ((x) & GENMASK(5, 0))
+#define HSIO_PLL5G_CFG6_DDR_CLK_DIV_M                     GENMASK(5, 0)
+
+#define HSIO_PLL5G_STATUS0_RANGE_LIM                      BIT(12)
+#define HSIO_PLL5G_STATUS0_OUT_OF_RANGE_ERR               BIT(11)
+#define HSIO_PLL5G_STATUS0_CALIBRATION_ERR                BIT(10)
+#define HSIO_PLL5G_STATUS0_CALIBRATION_DONE               BIT(9)
+#define HSIO_PLL5G_STATUS0_READBACK_DATA(x)               (((x) << 1) & GENMASK(8, 1))
+#define HSIO_PLL5G_STATUS0_READBACK_DATA_M                GENMASK(8, 1)
+#define HSIO_PLL5G_STATUS0_READBACK_DATA_X(x)             (((x) & GENMASK(8, 1)) >> 1)
+#define HSIO_PLL5G_STATUS0_LOCK_STATUS                    BIT(0)
+
+#define HSIO_PLL5G_STATUS1_SIG_DEL(x)                     (((x) << 21) & GENMASK(28, 21))
+#define HSIO_PLL5G_STATUS1_SIG_DEL_M                      GENMASK(28, 21)
+#define HSIO_PLL5G_STATUS1_SIG_DEL_X(x)                   (((x) & GENMASK(28, 21)) >> 21)
+#define HSIO_PLL5G_STATUS1_GAIN_STAT(x)                   (((x) << 16) & GENMASK(20, 16))
+#define HSIO_PLL5G_STATUS1_GAIN_STAT_M                    GENMASK(20, 16)
+#define HSIO_PLL5G_STATUS1_GAIN_STAT_X(x)                 (((x) & GENMASK(20, 16)) >> 16)
+#define HSIO_PLL5G_STATUS1_FBCNT_DIF(x)                   (((x) << 4) & GENMASK(13, 4))
+#define HSIO_PLL5G_STATUS1_FBCNT_DIF_M                    GENMASK(13, 4)
+#define HSIO_PLL5G_STATUS1_FBCNT_DIF_X(x)                 (((x) & GENMASK(13, 4)) >> 4)
+#define HSIO_PLL5G_STATUS1_FSM_STAT(x)                    (((x) << 1) & GENMASK(3, 1))
+#define HSIO_PLL5G_STATUS1_FSM_STAT_M                     GENMASK(3, 1)
+#define HSIO_PLL5G_STATUS1_FSM_STAT_X(x)                  (((x) & GENMASK(3, 1)) >> 1)
+#define HSIO_PLL5G_STATUS1_FSM_LOCK                       BIT(0)
+
+#define HSIO_PLL5G_BIST_CFG0_PLLB_START_BIST              BIT(31)
+#define HSIO_PLL5G_BIST_CFG0_PLLB_MEAS_MODE               BIT(30)
+#define HSIO_PLL5G_BIST_CFG0_PLLB_LOCK_REPEAT(x)          (((x) << 20) & GENMASK(23, 20))
+#define HSIO_PLL5G_BIST_CFG0_PLLB_LOCK_REPEAT_M           GENMASK(23, 20)
+#define HSIO_PLL5G_BIST_CFG0_PLLB_LOCK_REPEAT_X(x)        (((x) & GENMASK(23, 20)) >> 20)
+#define HSIO_PLL5G_BIST_CFG0_PLLB_LOCK_UNCERT(x)          (((x) << 16) & GENMASK(19, 16))
+#define HSIO_PLL5G_BIST_CFG0_PLLB_LOCK_UNCERT_M           GENMASK(19, 16)
+#define HSIO_PLL5G_BIST_CFG0_PLLB_LOCK_UNCERT_X(x)        (((x) & GENMASK(19, 16)) >> 16)
+#define HSIO_PLL5G_BIST_CFG0_PLLB_DIV_FACTOR_PRE(x)       ((x) & GENMASK(15, 0))
+#define HSIO_PLL5G_BIST_CFG0_PLLB_DIV_FACTOR_PRE_M        GENMASK(15, 0)
+
+#define HSIO_PLL5G_BIST_STAT0_PLLB_FSM_STAT(x)            (((x) << 4) & GENMASK(7, 4))
+#define HSIO_PLL5G_BIST_STAT0_PLLB_FSM_STAT_M             GENMASK(7, 4)
+#define HSIO_PLL5G_BIST_STAT0_PLLB_FSM_STAT_X(x)          (((x) & GENMASK(7, 4)) >> 4)
+#define HSIO_PLL5G_BIST_STAT0_PLLB_BUSY                   BIT(2)
+#define HSIO_PLL5G_BIST_STAT0_PLLB_DONE_N                 BIT(1)
+#define HSIO_PLL5G_BIST_STAT0_PLLB_FAIL                   BIT(0)
+
+#define HSIO_PLL5G_BIST_STAT1_PLLB_CNT_OUT(x)             (((x) << 16) & GENMASK(31, 16))
+#define HSIO_PLL5G_BIST_STAT1_PLLB_CNT_OUT_M              GENMASK(31, 16)
+#define HSIO_PLL5G_BIST_STAT1_PLLB_CNT_OUT_X(x)           (((x) & GENMASK(31, 16)) >> 16)
+#define HSIO_PLL5G_BIST_STAT1_PLLB_CNT_REF_DIFF(x)        ((x) & GENMASK(15, 0))
+#define HSIO_PLL5G_BIST_STAT1_PLLB_CNT_REF_DIFF_M         GENMASK(15, 0)
+
+#define HSIO_RCOMP_CFG0_PWD_ENA                           BIT(13)
+#define HSIO_RCOMP_CFG0_RUN_CAL                           BIT(12)
+#define HSIO_RCOMP_CFG0_SPEED_SEL(x)                      (((x) << 10) & GENMASK(11, 10))
+#define HSIO_RCOMP_CFG0_SPEED_SEL_M                       GENMASK(11, 10)
+#define HSIO_RCOMP_CFG0_SPEED_SEL_X(x)                    (((x) & GENMASK(11, 10)) >> 10)
+#define HSIO_RCOMP_CFG0_MODE_SEL(x)                       (((x) << 8) & GENMASK(9, 8))
+#define HSIO_RCOMP_CFG0_MODE_SEL_M                        GENMASK(9, 8)
+#define HSIO_RCOMP_CFG0_MODE_SEL_X(x)                     (((x) & GENMASK(9, 8)) >> 8)
+#define HSIO_RCOMP_CFG0_FORCE_ENA                         BIT(4)
+#define HSIO_RCOMP_CFG0_RCOMP_VAL(x)                      ((x) & GENMASK(3, 0))
+#define HSIO_RCOMP_CFG0_RCOMP_VAL_M                       GENMASK(3, 0)
+
+#define HSIO_RCOMP_STATUS_BUSY                            BIT(12)
+#define HSIO_RCOMP_STATUS_DELTA_ALERT                     BIT(7)
+#define HSIO_RCOMP_STATUS_RCOMP(x)                        ((x) & GENMASK(3, 0))
+#define HSIO_RCOMP_STATUS_RCOMP_M                         GENMASK(3, 0)
+
+#define HSIO_SYNC_ETH_CFG_RSZ                             0x4
+
+#define HSIO_SYNC_ETH_CFG_SEL_RECO_CLK_SRC(x)             (((x) << 4) & GENMASK(7, 4))
+#define HSIO_SYNC_ETH_CFG_SEL_RECO_CLK_SRC_M              GENMASK(7, 4)
+#define HSIO_SYNC_ETH_CFG_SEL_RECO_CLK_SRC_X(x)           (((x) & GENMASK(7, 4)) >> 4)
+#define HSIO_SYNC_ETH_CFG_SEL_RECO_CLK_DIV(x)             (((x) << 1) & GENMASK(3, 1))
+#define HSIO_SYNC_ETH_CFG_SEL_RECO_CLK_DIV_M              GENMASK(3, 1)
+#define HSIO_SYNC_ETH_CFG_SEL_RECO_CLK_DIV_X(x)           (((x) & GENMASK(3, 1)) >> 1)
+#define HSIO_SYNC_ETH_CFG_RECO_CLK_ENA                    BIT(0)
+
+#define HSIO_SYNC_ETH_PLL_CFG_PLL_AUTO_SQUELCH_ENA        BIT(0)
+
+#define HSIO_S1G_DES_CFG_DES_PHS_CTRL(x)                  (((x) << 13) & GENMASK(16, 13))
+#define HSIO_S1G_DES_CFG_DES_PHS_CTRL_M                   GENMASK(16, 13)
+#define HSIO_S1G_DES_CFG_DES_PHS_CTRL_X(x)                (((x) & GENMASK(16, 13)) >> 13)
+#define HSIO_S1G_DES_CFG_DES_CPMD_SEL(x)                  (((x) << 11) & GENMASK(12, 11))
+#define HSIO_S1G_DES_CFG_DES_CPMD_SEL_M                   GENMASK(12, 11)
+#define HSIO_S1G_DES_CFG_DES_CPMD_SEL_X(x)                (((x) & GENMASK(12, 11)) >> 11)
+#define HSIO_S1G_DES_CFG_DES_MBTR_CTRL(x)                 (((x) << 8) & GENMASK(10, 8))
+#define HSIO_S1G_DES_CFG_DES_MBTR_CTRL_M                  GENMASK(10, 8)
+#define HSIO_S1G_DES_CFG_DES_MBTR_CTRL_X(x)               (((x) & GENMASK(10, 8)) >> 8)
+#define HSIO_S1G_DES_CFG_DES_BW_ANA(x)                    (((x) << 5) & GENMASK(7, 5))
+#define HSIO_S1G_DES_CFG_DES_BW_ANA_M                     GENMASK(7, 5)
+#define HSIO_S1G_DES_CFG_DES_BW_ANA_X(x)                  (((x) & GENMASK(7, 5)) >> 5)
+#define HSIO_S1G_DES_CFG_DES_SWAP_ANA                     BIT(4)
+#define HSIO_S1G_DES_CFG_DES_BW_HYST(x)                   (((x) << 1) & GENMASK(3, 1))
+#define HSIO_S1G_DES_CFG_DES_BW_HYST_M                    GENMASK(3, 1)
+#define HSIO_S1G_DES_CFG_DES_BW_HYST_X(x)                 (((x) & GENMASK(3, 1)) >> 1)
+#define HSIO_S1G_DES_CFG_DES_SWAP_HYST                    BIT(0)
+
+#define HSIO_S1G_IB_CFG_IB_FX100_ENA                      BIT(27)
+#define HSIO_S1G_IB_CFG_ACJTAG_HYST(x)                    (((x) << 24) & GENMASK(26, 24))
+#define HSIO_S1G_IB_CFG_ACJTAG_HYST_M                     GENMASK(26, 24)
+#define HSIO_S1G_IB_CFG_ACJTAG_HYST_X(x)                  (((x) & GENMASK(26, 24)) >> 24)
+#define HSIO_S1G_IB_CFG_IB_DET_LEV(x)                     (((x) << 19) & GENMASK(21, 19))
+#define HSIO_S1G_IB_CFG_IB_DET_LEV_M                      GENMASK(21, 19)
+#define HSIO_S1G_IB_CFG_IB_DET_LEV_X(x)                   (((x) & GENMASK(21, 19)) >> 19)
+#define HSIO_S1G_IB_CFG_IB_HYST_LEV                       BIT(14)
+#define HSIO_S1G_IB_CFG_IB_ENA_CMV_TERM                   BIT(13)
+#define HSIO_S1G_IB_CFG_IB_ENA_DC_COUPLING                BIT(12)
+#define HSIO_S1G_IB_CFG_IB_ENA_DETLEV                     BIT(11)
+#define HSIO_S1G_IB_CFG_IB_ENA_HYST                       BIT(10)
+#define HSIO_S1G_IB_CFG_IB_ENA_OFFSET_COMP                BIT(9)
+#define HSIO_S1G_IB_CFG_IB_EQ_GAIN(x)                     (((x) << 6) & GENMASK(8, 6))
+#define HSIO_S1G_IB_CFG_IB_EQ_GAIN_M                      GENMASK(8, 6)
+#define HSIO_S1G_IB_CFG_IB_EQ_GAIN_X(x)                   (((x) & GENMASK(8, 6)) >> 6)
+#define HSIO_S1G_IB_CFG_IB_SEL_CORNER_FREQ(x)             (((x) << 4) & GENMASK(5, 4))
+#define HSIO_S1G_IB_CFG_IB_SEL_CORNER_FREQ_M              GENMASK(5, 4)
+#define HSIO_S1G_IB_CFG_IB_SEL_CORNER_FREQ_X(x)           (((x) & GENMASK(5, 4)) >> 4)
+#define HSIO_S1G_IB_CFG_IB_RESISTOR_CTRL(x)               ((x) & GENMASK(3, 0))
+#define HSIO_S1G_IB_CFG_IB_RESISTOR_CTRL_M                GENMASK(3, 0)
+
+#define HSIO_S1G_OB_CFG_OB_SLP(x)                         (((x) << 17) & GENMASK(18, 17))
+#define HSIO_S1G_OB_CFG_OB_SLP_M                          GENMASK(18, 17)
+#define HSIO_S1G_OB_CFG_OB_SLP_X(x)                       (((x) & GENMASK(18, 17)) >> 17)
+#define HSIO_S1G_OB_CFG_OB_AMP_CTRL(x)                    (((x) << 13) & GENMASK(16, 13))
+#define HSIO_S1G_OB_CFG_OB_AMP_CTRL_M                     GENMASK(16, 13)
+#define HSIO_S1G_OB_CFG_OB_AMP_CTRL_X(x)                  (((x) & GENMASK(16, 13)) >> 13)
+#define HSIO_S1G_OB_CFG_OB_CMM_BIAS_CTRL(x)               (((x) << 10) & GENMASK(12, 10))
+#define HSIO_S1G_OB_CFG_OB_CMM_BIAS_CTRL_M                GENMASK(12, 10)
+#define HSIO_S1G_OB_CFG_OB_CMM_BIAS_CTRL_X(x)             (((x) & GENMASK(12, 10)) >> 10)
+#define HSIO_S1G_OB_CFG_OB_DIS_VCM_CTRL                   BIT(9)
+#define HSIO_S1G_OB_CFG_OB_EN_MEAS_VREG                   BIT(8)
+#define HSIO_S1G_OB_CFG_OB_VCM_CTRL(x)                    (((x) << 4) & GENMASK(7, 4))
+#define HSIO_S1G_OB_CFG_OB_VCM_CTRL_M                     GENMASK(7, 4)
+#define HSIO_S1G_OB_CFG_OB_VCM_CTRL_X(x)                  (((x) & GENMASK(7, 4)) >> 4)
+#define HSIO_S1G_OB_CFG_OB_RESISTOR_CTRL(x)               ((x) & GENMASK(3, 0))
+#define HSIO_S1G_OB_CFG_OB_RESISTOR_CTRL_M                GENMASK(3, 0)
+
+#define HSIO_S1G_SER_CFG_SER_IDLE                         BIT(9)
+#define HSIO_S1G_SER_CFG_SER_DEEMPH                       BIT(8)
+#define HSIO_S1G_SER_CFG_SER_CPMD_SEL                     BIT(7)
+#define HSIO_S1G_SER_CFG_SER_SWAP_CPMD                    BIT(6)
+#define HSIO_S1G_SER_CFG_SER_ALISEL(x)                    (((x) << 4) & GENMASK(5, 4))
+#define HSIO_S1G_SER_CFG_SER_ALISEL_M                     GENMASK(5, 4)
+#define HSIO_S1G_SER_CFG_SER_ALISEL_X(x)                  (((x) & GENMASK(5, 4)) >> 4)
+#define HSIO_S1G_SER_CFG_SER_ENHYS                        BIT(3)
+#define HSIO_S1G_SER_CFG_SER_BIG_WIN                      BIT(2)
+#define HSIO_S1G_SER_CFG_SER_EN_WIN                       BIT(1)
+#define HSIO_S1G_SER_CFG_SER_ENALI                        BIT(0)
+
+#define HSIO_S1G_COMMON_CFG_SYS_RST                       BIT(31)
+#define HSIO_S1G_COMMON_CFG_SE_AUTO_SQUELCH_ENA           BIT(21)
+#define HSIO_S1G_COMMON_CFG_ENA_LANE                      BIT(18)
+#define HSIO_S1G_COMMON_CFG_PWD_RX                        BIT(17)
+#define HSIO_S1G_COMMON_CFG_PWD_TX                        BIT(16)
+#define HSIO_S1G_COMMON_CFG_LANE_CTRL(x)                  (((x) << 13) & GENMASK(15, 13))
+#define HSIO_S1G_COMMON_CFG_LANE_CTRL_M                   GENMASK(15, 13)
+#define HSIO_S1G_COMMON_CFG_LANE_CTRL_X(x)                (((x) & GENMASK(15, 13)) >> 13)
+#define HSIO_S1G_COMMON_CFG_ENA_DIRECT                    BIT(12)
+#define HSIO_S1G_COMMON_CFG_ENA_ELOOP                     BIT(11)
+#define HSIO_S1G_COMMON_CFG_ENA_FLOOP                     BIT(10)
+#define HSIO_S1G_COMMON_CFG_ENA_ILOOP                     BIT(9)
+#define HSIO_S1G_COMMON_CFG_ENA_PLOOP                     BIT(8)
+#define HSIO_S1G_COMMON_CFG_HRATE                         BIT(7)
+#define HSIO_S1G_COMMON_CFG_IF_MODE                       BIT(0)
+
+#define HSIO_S1G_PLL_CFG_PLL_ENA_FB_DIV2                  BIT(22)
+#define HSIO_S1G_PLL_CFG_PLL_ENA_RC_DIV2                  BIT(21)
+#define HSIO_S1G_PLL_CFG_PLL_FSM_CTRL_DATA(x)             (((x) << 8) & GENMASK(15, 8))
+#define HSIO_S1G_PLL_CFG_PLL_FSM_CTRL_DATA_M              GENMASK(15, 8)
+#define HSIO_S1G_PLL_CFG_PLL_FSM_CTRL_DATA_X(x)           (((x) & GENMASK(15, 8)) >> 8)
+#define HSIO_S1G_PLL_CFG_PLL_FSM_ENA                      BIT(7)
+#define HSIO_S1G_PLL_CFG_PLL_FSM_FORCE_SET_ENA            BIT(6)
+#define HSIO_S1G_PLL_CFG_PLL_FSM_OOR_RECAL_ENA            BIT(5)
+#define HSIO_S1G_PLL_CFG_PLL_RB_DATA_SEL                  BIT(3)
+
+#define HSIO_S1G_PLL_STATUS_PLL_CAL_NOT_DONE              BIT(12)
+#define HSIO_S1G_PLL_STATUS_PLL_CAL_ERR                   BIT(11)
+#define HSIO_S1G_PLL_STATUS_PLL_OUT_OF_RANGE_ERR          BIT(10)
+#define HSIO_S1G_PLL_STATUS_PLL_RB_DATA(x)                ((x) & GENMASK(7, 0))
+#define HSIO_S1G_PLL_STATUS_PLL_RB_DATA_M                 GENMASK(7, 0)
+
+#define HSIO_S1G_DFT_CFG0_LAZYBIT                         BIT(31)
+#define HSIO_S1G_DFT_CFG0_INV_DIS                         BIT(23)
+#define HSIO_S1G_DFT_CFG0_PRBS_SEL(x)                     (((x) << 20) & GENMASK(21, 20))
+#define HSIO_S1G_DFT_CFG0_PRBS_SEL_M                      GENMASK(21, 20)
+#define HSIO_S1G_DFT_CFG0_PRBS_SEL_X(x)                   (((x) & GENMASK(21, 20)) >> 20)
+#define HSIO_S1G_DFT_CFG0_TEST_MODE(x)                    (((x) << 16) & GENMASK(18, 16))
+#define HSIO_S1G_DFT_CFG0_TEST_MODE_M                     GENMASK(18, 16)
+#define HSIO_S1G_DFT_CFG0_TEST_MODE_X(x)                  (((x) & GENMASK(18, 16)) >> 16)
+#define HSIO_S1G_DFT_CFG0_RX_PHS_CORR_DIS                 BIT(4)
+#define HSIO_S1G_DFT_CFG0_RX_PDSENS_ENA                   BIT(3)
+#define HSIO_S1G_DFT_CFG0_RX_DFT_ENA                      BIT(2)
+#define HSIO_S1G_DFT_CFG0_TX_DFT_ENA                      BIT(0)
+
+#define HSIO_S1G_DFT_CFG1_TX_JITTER_AMPL(x)               (((x) << 8) & GENMASK(17, 8))
+#define HSIO_S1G_DFT_CFG1_TX_JITTER_AMPL_M                GENMASK(17, 8)
+#define HSIO_S1G_DFT_CFG1_TX_JITTER_AMPL_X(x)             (((x) & GENMASK(17, 8)) >> 8)
+#define HSIO_S1G_DFT_CFG1_TX_STEP_FREQ(x)                 (((x) << 4) & GENMASK(7, 4))
+#define HSIO_S1G_DFT_CFG1_TX_STEP_FREQ_M                  GENMASK(7, 4)
+#define HSIO_S1G_DFT_CFG1_TX_STEP_FREQ_X(x)               (((x) & GENMASK(7, 4)) >> 4)
+#define HSIO_S1G_DFT_CFG1_TX_JI_ENA                       BIT(3)
+#define HSIO_S1G_DFT_CFG1_TX_WAVEFORM_SEL                 BIT(2)
+#define HSIO_S1G_DFT_CFG1_TX_FREQOFF_DIR                  BIT(1)
+#define HSIO_S1G_DFT_CFG1_TX_FREQOFF_ENA                  BIT(0)
+
+#define HSIO_S1G_DFT_CFG2_RX_JITTER_AMPL(x)               (((x) << 8) & GENMASK(17, 8))
+#define HSIO_S1G_DFT_CFG2_RX_JITTER_AMPL_M                GENMASK(17, 8)
+#define HSIO_S1G_DFT_CFG2_RX_JITTER_AMPL_X(x)             (((x) & GENMASK(17, 8)) >> 8)
+#define HSIO_S1G_DFT_CFG2_RX_STEP_FREQ(x)                 (((x) << 4) & GENMASK(7, 4))
+#define HSIO_S1G_DFT_CFG2_RX_STEP_FREQ_M                  GENMASK(7, 4)
+#define HSIO_S1G_DFT_CFG2_RX_STEP_FREQ_X(x)               (((x) & GENMASK(7, 4)) >> 4)
+#define HSIO_S1G_DFT_CFG2_RX_JI_ENA                       BIT(3)
+#define HSIO_S1G_DFT_CFG2_RX_WAVEFORM_SEL                 BIT(2)
+#define HSIO_S1G_DFT_CFG2_RX_FREQOFF_DIR                  BIT(1)
+#define HSIO_S1G_DFT_CFG2_RX_FREQOFF_ENA                  BIT(0)
+
+#define HSIO_S1G_RC_PLL_BIST_CFG_PLL_BIST_ENA             BIT(20)
+#define HSIO_S1G_RC_PLL_BIST_CFG_PLL_BIST_FBS_HIGH(x)     (((x) << 16) & GENMASK(17, 16))
+#define HSIO_S1G_RC_PLL_BIST_CFG_PLL_BIST_FBS_HIGH_M      GENMASK(17, 16)
+#define HSIO_S1G_RC_PLL_BIST_CFG_PLL_BIST_FBS_HIGH_X(x)   (((x) & GENMASK(17, 16)) >> 16)
+#define HSIO_S1G_RC_PLL_BIST_CFG_PLL_BIST_HIGH(x)         (((x) << 8) & GENMASK(15, 8))
+#define HSIO_S1G_RC_PLL_BIST_CFG_PLL_BIST_HIGH_M          GENMASK(15, 8)
+#define HSIO_S1G_RC_PLL_BIST_CFG_PLL_BIST_HIGH_X(x)       (((x) & GENMASK(15, 8)) >> 8)
+#define HSIO_S1G_RC_PLL_BIST_CFG_PLL_BIST_LOW(x)          ((x) & GENMASK(7, 0))
+#define HSIO_S1G_RC_PLL_BIST_CFG_PLL_BIST_LOW_M           GENMASK(7, 0)
+
+#define HSIO_S1G_MISC_CFG_DES_100FX_KICK_MODE(x)          (((x) << 11) & GENMASK(12, 11))
+#define HSIO_S1G_MISC_CFG_DES_100FX_KICK_MODE_M           GENMASK(12, 11)
+#define HSIO_S1G_MISC_CFG_DES_100FX_KICK_MODE_X(x)        (((x) & GENMASK(12, 11)) >> 11)
+#define HSIO_S1G_MISC_CFG_DES_100FX_CPMD_SWAP             BIT(10)
+#define HSIO_S1G_MISC_CFG_DES_100FX_CPMD_MODE             BIT(9)
+#define HSIO_S1G_MISC_CFG_DES_100FX_CPMD_ENA              BIT(8)
+#define HSIO_S1G_MISC_CFG_RX_LPI_MODE_ENA                 BIT(5)
+#define HSIO_S1G_MISC_CFG_TX_LPI_MODE_ENA                 BIT(4)
+#define HSIO_S1G_MISC_CFG_RX_DATA_INV_ENA                 BIT(3)
+#define HSIO_S1G_MISC_CFG_TX_DATA_INV_ENA                 BIT(2)
+#define HSIO_S1G_MISC_CFG_LANE_RST                        BIT(0)
+
+#define HSIO_S1G_DFT_STATUS_PLL_BIST_NOT_DONE             BIT(7)
+#define HSIO_S1G_DFT_STATUS_PLL_BIST_FAILED               BIT(6)
+#define HSIO_S1G_DFT_STATUS_PLL_BIST_TIMEOUT_ERR          BIT(5)
+#define HSIO_S1G_DFT_STATUS_BIST_ACTIVE                   BIT(3)
+#define HSIO_S1G_DFT_STATUS_BIST_NOSYNC                   BIT(2)
+#define HSIO_S1G_DFT_STATUS_BIST_COMPLETE_N               BIT(1)
+#define HSIO_S1G_DFT_STATUS_BIST_ERROR                    BIT(0)
+
+#define HSIO_S1G_MISC_STATUS_DES_100FX_PHASE_SEL          BIT(0)
+
+#define HSIO_MCB_S1G_ADDR_CFG_SERDES1G_WR_ONE_SHOT        BIT(31)
+#define HSIO_MCB_S1G_ADDR_CFG_SERDES1G_RD_ONE_SHOT        BIT(30)
+#define HSIO_MCB_S1G_ADDR_CFG_SERDES1G_ADDR(x)            ((x) & GENMASK(8, 0))
+#define HSIO_MCB_S1G_ADDR_CFG_SERDES1G_ADDR_M             GENMASK(8, 0)
+
+#define HSIO_S6G_DIG_CFG_GP(x)                            (((x) << 16) & GENMASK(18, 16))
+#define HSIO_S6G_DIG_CFG_GP_M                             GENMASK(18, 16)
+#define HSIO_S6G_DIG_CFG_GP_X(x)                          (((x) & GENMASK(18, 16)) >> 16)
+#define HSIO_S6G_DIG_CFG_TX_BIT_DOUBLING_MODE_ENA         BIT(7)
+#define HSIO_S6G_DIG_CFG_SIGDET_TESTMODE                  BIT(6)
+#define HSIO_S6G_DIG_CFG_SIGDET_AST(x)                    (((x) << 3) & GENMASK(5, 3))
+#define HSIO_S6G_DIG_CFG_SIGDET_AST_M                     GENMASK(5, 3)
+#define HSIO_S6G_DIG_CFG_SIGDET_AST_X(x)                  (((x) & GENMASK(5, 3)) >> 3)
+#define HSIO_S6G_DIG_CFG_SIGDET_DST(x)                    ((x) & GENMASK(2, 0))
+#define HSIO_S6G_DIG_CFG_SIGDET_DST_M                     GENMASK(2, 0)
+
+#define HSIO_S6G_DFT_CFG0_LAZYBIT                         BIT(31)
+#define HSIO_S6G_DFT_CFG0_INV_DIS                         BIT(23)
+#define HSIO_S6G_DFT_CFG0_PRBS_SEL(x)                     (((x) << 20) & GENMASK(21, 20))
+#define HSIO_S6G_DFT_CFG0_PRBS_SEL_M                      GENMASK(21, 20)
+#define HSIO_S6G_DFT_CFG0_PRBS_SEL_X(x)                   (((x) & GENMASK(21, 20)) >> 20)
+#define HSIO_S6G_DFT_CFG0_TEST_MODE(x)                    (((x) << 16) & GENMASK(18, 16))
+#define HSIO_S6G_DFT_CFG0_TEST_MODE_M                     GENMASK(18, 16)
+#define HSIO_S6G_DFT_CFG0_TEST_MODE_X(x)                  (((x) & GENMASK(18, 16)) >> 16)
+#define HSIO_S6G_DFT_CFG0_RX_PHS_CORR_DIS                 BIT(4)
+#define HSIO_S6G_DFT_CFG0_RX_PDSENS_ENA                   BIT(3)
+#define HSIO_S6G_DFT_CFG0_RX_DFT_ENA                      BIT(2)
+#define HSIO_S6G_DFT_CFG0_TX_DFT_ENA                      BIT(0)
+
+#define HSIO_S6G_DFT_CFG1_TX_JITTER_AMPL(x)               (((x) << 8) & GENMASK(17, 8))
+#define HSIO_S6G_DFT_CFG1_TX_JITTER_AMPL_M                GENMASK(17, 8)
+#define HSIO_S6G_DFT_CFG1_TX_JITTER_AMPL_X(x)             (((x) & GENMASK(17, 8)) >> 8)
+#define HSIO_S6G_DFT_CFG1_TX_STEP_FREQ(x)                 (((x) << 4) & GENMASK(7, 4))
+#define HSIO_S6G_DFT_CFG1_TX_STEP_FREQ_M                  GENMASK(7, 4)
+#define HSIO_S6G_DFT_CFG1_TX_STEP_FREQ_X(x)               (((x) & GENMASK(7, 4)) >> 4)
+#define HSIO_S6G_DFT_CFG1_TX_JI_ENA                       BIT(3)
+#define HSIO_S6G_DFT_CFG1_TX_WAVEFORM_SEL                 BIT(2)
+#define HSIO_S6G_DFT_CFG1_TX_FREQOFF_DIR                  BIT(1)
+#define HSIO_S6G_DFT_CFG1_TX_FREQOFF_ENA                  BIT(0)
+
+#define HSIO_S6G_DFT_CFG2_RX_JITTER_AMPL(x)               (((x) << 8) & GENMASK(17, 8))
+#define HSIO_S6G_DFT_CFG2_RX_JITTER_AMPL_M                GENMASK(17, 8)
+#define HSIO_S6G_DFT_CFG2_RX_JITTER_AMPL_X(x)             (((x) & GENMASK(17, 8)) >> 8)
+#define HSIO_S6G_DFT_CFG2_RX_STEP_FREQ(x)                 (((x) << 4) & GENMASK(7, 4))
+#define HSIO_S6G_DFT_CFG2_RX_STEP_FREQ_M                  GENMASK(7, 4)
+#define HSIO_S6G_DFT_CFG2_RX_STEP_FREQ_X(x)               (((x) & GENMASK(7, 4)) >> 4)
+#define HSIO_S6G_DFT_CFG2_RX_JI_ENA                       BIT(3)
+#define HSIO_S6G_DFT_CFG2_RX_WAVEFORM_SEL                 BIT(2)
+#define HSIO_S6G_DFT_CFG2_RX_FREQOFF_DIR                  BIT(1)
+#define HSIO_S6G_DFT_CFG2_RX_FREQOFF_ENA                  BIT(0)
+
+#define HSIO_S6G_RC_PLL_BIST_CFG_PLL_BIST_ENA             BIT(20)
+#define HSIO_S6G_RC_PLL_BIST_CFG_PLL_BIST_FBS_HIGH(x)     (((x) << 16) & GENMASK(19, 16))
+#define HSIO_S6G_RC_PLL_BIST_CFG_PLL_BIST_FBS_HIGH_M      GENMASK(19, 16)
+#define HSIO_S6G_RC_PLL_BIST_CFG_PLL_BIST_FBS_HIGH_X(x)   (((x) & GENMASK(19, 16)) >> 16)
+#define HSIO_S6G_RC_PLL_BIST_CFG_PLL_BIST_HIGH(x)         (((x) << 8) & GENMASK(15, 8))
+#define HSIO_S6G_RC_PLL_BIST_CFG_PLL_BIST_HIGH_M          GENMASK(15, 8)
+#define HSIO_S6G_RC_PLL_BIST_CFG_PLL_BIST_HIGH_X(x)       (((x) & GENMASK(15, 8)) >> 8)
+#define HSIO_S6G_RC_PLL_BIST_CFG_PLL_BIST_LOW(x)          ((x) & GENMASK(7, 0))
+#define HSIO_S6G_RC_PLL_BIST_CFG_PLL_BIST_LOW_M           GENMASK(7, 0)
+
+#define HSIO_S6G_MISC_CFG_SEL_RECO_CLK(x)                 (((x) << 13) & GENMASK(14, 13))
+#define HSIO_S6G_MISC_CFG_SEL_RECO_CLK_M                  GENMASK(14, 13)
+#define HSIO_S6G_MISC_CFG_SEL_RECO_CLK_X(x)               (((x) & GENMASK(14, 13)) >> 13)
+#define HSIO_S6G_MISC_CFG_DES_100FX_KICK_MODE(x)          (((x) << 11) & GENMASK(12, 11))
+#define HSIO_S6G_MISC_CFG_DES_100FX_KICK_MODE_M           GENMASK(12, 11)
+#define HSIO_S6G_MISC_CFG_DES_100FX_KICK_MODE_X(x)        (((x) & GENMASK(12, 11)) >> 11)
+#define HSIO_S6G_MISC_CFG_DES_100FX_CPMD_SWAP             BIT(10)
+#define HSIO_S6G_MISC_CFG_DES_100FX_CPMD_MODE             BIT(9)
+#define HSIO_S6G_MISC_CFG_DES_100FX_CPMD_ENA              BIT(8)
+#define HSIO_S6G_MISC_CFG_RX_BUS_FLIP_ENA                 BIT(7)
+#define HSIO_S6G_MISC_CFG_TX_BUS_FLIP_ENA                 BIT(6)
+#define HSIO_S6G_MISC_CFG_RX_LPI_MODE_ENA                 BIT(5)
+#define HSIO_S6G_MISC_CFG_TX_LPI_MODE_ENA                 BIT(4)
+#define HSIO_S6G_MISC_CFG_RX_DATA_INV_ENA                 BIT(3)
+#define HSIO_S6G_MISC_CFG_TX_DATA_INV_ENA                 BIT(2)
+#define HSIO_S6G_MISC_CFG_LANE_RST                        BIT(0)
+
+#define HSIO_S6G_OB_ANEG_CFG_AN_OB_POST0(x)               (((x) << 23) & GENMASK(28, 23))
+#define HSIO_S6G_OB_ANEG_CFG_AN_OB_POST0_M                GENMASK(28, 23)
+#define HSIO_S6G_OB_ANEG_CFG_AN_OB_POST0_X(x)             (((x) & GENMASK(28, 23)) >> 23)
+#define HSIO_S6G_OB_ANEG_CFG_AN_OB_POST1(x)               (((x) << 18) & GENMASK(22, 18))
+#define HSIO_S6G_OB_ANEG_CFG_AN_OB_POST1_M                GENMASK(22, 18)
+#define HSIO_S6G_OB_ANEG_CFG_AN_OB_POST1_X(x)             (((x) & GENMASK(22, 18)) >> 18)
+#define HSIO_S6G_OB_ANEG_CFG_AN_OB_PREC(x)                (((x) << 13) & GENMASK(17, 13))
+#define HSIO_S6G_OB_ANEG_CFG_AN_OB_PREC_M                 GENMASK(17, 13)
+#define HSIO_S6G_OB_ANEG_CFG_AN_OB_PREC_X(x)              (((x) & GENMASK(17, 13)) >> 13)
+#define HSIO_S6G_OB_ANEG_CFG_AN_OB_ENA_CAS(x)             (((x) << 6) & GENMASK(8, 6))
+#define HSIO_S6G_OB_ANEG_CFG_AN_OB_ENA_CAS_M              GENMASK(8, 6)
+#define HSIO_S6G_OB_ANEG_CFG_AN_OB_ENA_CAS_X(x)           (((x) & GENMASK(8, 6)) >> 6)
+#define HSIO_S6G_OB_ANEG_CFG_AN_OB_LEV(x)                 ((x) & GENMASK(5, 0))
+#define HSIO_S6G_OB_ANEG_CFG_AN_OB_LEV_M                  GENMASK(5, 0)
+
+#define HSIO_S6G_DFT_STATUS_PRBS_SYNC_STAT                BIT(8)
+#define HSIO_S6G_DFT_STATUS_PLL_BIST_NOT_DONE             BIT(7)
+#define HSIO_S6G_DFT_STATUS_PLL_BIST_FAILED               BIT(6)
+#define HSIO_S6G_DFT_STATUS_PLL_BIST_TIMEOUT_ERR          BIT(5)
+#define HSIO_S6G_DFT_STATUS_BIST_ACTIVE                   BIT(3)
+#define HSIO_S6G_DFT_STATUS_BIST_NOSYNC                   BIT(2)
+#define HSIO_S6G_DFT_STATUS_BIST_COMPLETE_N               BIT(1)
+#define HSIO_S6G_DFT_STATUS_BIST_ERROR                    BIT(0)
+
+#define HSIO_S6G_MISC_STATUS_DES_100FX_PHASE_SEL          BIT(0)
+
+#define HSIO_S6G_DES_CFG_DES_PHS_CTRL(x)                  (((x) << 13) & GENMASK(16, 13))
+#define HSIO_S6G_DES_CFG_DES_PHS_CTRL_M                   GENMASK(16, 13)
+#define HSIO_S6G_DES_CFG_DES_PHS_CTRL_X(x)                (((x) & GENMASK(16, 13)) >> 13)
+#define HSIO_S6G_DES_CFG_DES_MBTR_CTRL(x)                 (((x) << 10) & GENMASK(12, 10))
+#define HSIO_S6G_DES_CFG_DES_MBTR_CTRL_M                  GENMASK(12, 10)
+#define HSIO_S6G_DES_CFG_DES_MBTR_CTRL_X(x)               (((x) & GENMASK(12, 10)) >> 10)
+#define HSIO_S6G_DES_CFG_DES_CPMD_SEL(x)                  (((x) << 8) & GENMASK(9, 8))
+#define HSIO_S6G_DES_CFG_DES_CPMD_SEL_M                   GENMASK(9, 8)
+#define HSIO_S6G_DES_CFG_DES_CPMD_SEL_X(x)                (((x) & GENMASK(9, 8)) >> 8)
+#define HSIO_S6G_DES_CFG_DES_BW_HYST(x)                   (((x) << 5) & GENMASK(7, 5))
+#define HSIO_S6G_DES_CFG_DES_BW_HYST_M                    GENMASK(7, 5)
+#define HSIO_S6G_DES_CFG_DES_BW_HYST_X(x)                 (((x) & GENMASK(7, 5)) >> 5)
+#define HSIO_S6G_DES_CFG_DES_SWAP_HYST                    BIT(4)
+#define HSIO_S6G_DES_CFG_DES_BW_ANA(x)                    (((x) << 1) & GENMASK(3, 1))
+#define HSIO_S6G_DES_CFG_DES_BW_ANA_M                     GENMASK(3, 1)
+#define HSIO_S6G_DES_CFG_DES_BW_ANA_X(x)                  (((x) & GENMASK(3, 1)) >> 1)
+#define HSIO_S6G_DES_CFG_DES_SWAP_ANA                     BIT(0)
+
+#define HSIO_S6G_IB_CFG_IB_SOFSI(x)                       (((x) << 29) & GENMASK(30, 29))
+#define HSIO_S6G_IB_CFG_IB_SOFSI_M                        GENMASK(30, 29)
+#define HSIO_S6G_IB_CFG_IB_SOFSI_X(x)                     (((x) & GENMASK(30, 29)) >> 29)
+#define HSIO_S6G_IB_CFG_IB_VBULK_SEL                      BIT(28)
+#define HSIO_S6G_IB_CFG_IB_RTRM_ADJ(x)                    (((x) << 24) & GENMASK(27, 24))
+#define HSIO_S6G_IB_CFG_IB_RTRM_ADJ_M                     GENMASK(27, 24)
+#define HSIO_S6G_IB_CFG_IB_RTRM_ADJ_X(x)                  (((x) & GENMASK(27, 24)) >> 24)
+#define HSIO_S6G_IB_CFG_IB_ICML_ADJ(x)                    (((x) << 20) & GENMASK(23, 20))
+#define HSIO_S6G_IB_CFG_IB_ICML_ADJ_M                     GENMASK(23, 20)
+#define HSIO_S6G_IB_CFG_IB_ICML_ADJ_X(x)                  (((x) & GENMASK(23, 20)) >> 20)
+#define HSIO_S6G_IB_CFG_IB_TERM_MODE_SEL(x)               (((x) << 18) & GENMASK(19, 18))
+#define HSIO_S6G_IB_CFG_IB_TERM_MODE_SEL_M                GENMASK(19, 18)
+#define HSIO_S6G_IB_CFG_IB_TERM_MODE_SEL_X(x)             (((x) & GENMASK(19, 18)) >> 18)
+#define HSIO_S6G_IB_CFG_IB_SIG_DET_CLK_SEL(x)             (((x) << 15) & GENMASK(17, 15))
+#define HSIO_S6G_IB_CFG_IB_SIG_DET_CLK_SEL_M              GENMASK(17, 15)
+#define HSIO_S6G_IB_CFG_IB_SIG_DET_CLK_SEL_X(x)           (((x) & GENMASK(17, 15)) >> 15)
+#define HSIO_S6G_IB_CFG_IB_REG_PAT_SEL_HP(x)              (((x) << 13) & GENMASK(14, 13))
+#define HSIO_S6G_IB_CFG_IB_REG_PAT_SEL_HP_M               GENMASK(14, 13)
+#define HSIO_S6G_IB_CFG_IB_REG_PAT_SEL_HP_X(x)            (((x) & GENMASK(14, 13)) >> 13)
+#define HSIO_S6G_IB_CFG_IB_REG_PAT_SEL_MID(x)             (((x) << 11) & GENMASK(12, 11))
+#define HSIO_S6G_IB_CFG_IB_REG_PAT_SEL_MID_M              GENMASK(12, 11)
+#define HSIO_S6G_IB_CFG_IB_REG_PAT_SEL_MID_X(x)           (((x) & GENMASK(12, 11)) >> 11)
+#define HSIO_S6G_IB_CFG_IB_REG_PAT_SEL_LP(x)              (((x) << 9) & GENMASK(10, 9))
+#define HSIO_S6G_IB_CFG_IB_REG_PAT_SEL_LP_M               GENMASK(10, 9)
+#define HSIO_S6G_IB_CFG_IB_REG_PAT_SEL_LP_X(x)            (((x) & GENMASK(10, 9)) >> 9)
+#define HSIO_S6G_IB_CFG_IB_REG_PAT_SEL_OFFSET(x)          (((x) << 7) & GENMASK(8, 7))
+#define HSIO_S6G_IB_CFG_IB_REG_PAT_SEL_OFFSET_M           GENMASK(8, 7)
+#define HSIO_S6G_IB_CFG_IB_REG_PAT_SEL_OFFSET_X(x)        (((x) & GENMASK(8, 7)) >> 7)
+#define HSIO_S6G_IB_CFG_IB_ANA_TEST_ENA                   BIT(6)
+#define HSIO_S6G_IB_CFG_IB_SIG_DET_ENA                    BIT(5)
+#define HSIO_S6G_IB_CFG_IB_CONCUR                         BIT(4)
+#define HSIO_S6G_IB_CFG_IB_CAL_ENA                        BIT(3)
+#define HSIO_S6G_IB_CFG_IB_SAM_ENA                        BIT(2)
+#define HSIO_S6G_IB_CFG_IB_EQZ_ENA                        BIT(1)
+#define HSIO_S6G_IB_CFG_IB_REG_ENA                        BIT(0)
+
+#define HSIO_S6G_IB_CFG1_IB_TJTAG(x)                      (((x) << 17) & GENMASK(21, 17))
+#define HSIO_S6G_IB_CFG1_IB_TJTAG_M                       GENMASK(21, 17)
+#define HSIO_S6G_IB_CFG1_IB_TJTAG_X(x)                    (((x) & GENMASK(21, 17)) >> 17)
+#define HSIO_S6G_IB_CFG1_IB_TSDET(x)                      (((x) << 12) & GENMASK(16, 12))
+#define HSIO_S6G_IB_CFG1_IB_TSDET_M                       GENMASK(16, 12)
+#define HSIO_S6G_IB_CFG1_IB_TSDET_X(x)                    (((x) & GENMASK(16, 12)) >> 12)
+#define HSIO_S6G_IB_CFG1_IB_SCALY(x)                      (((x) << 8) & GENMASK(11, 8))
+#define HSIO_S6G_IB_CFG1_IB_SCALY_M                       GENMASK(11, 8)
+#define HSIO_S6G_IB_CFG1_IB_SCALY_X(x)                    (((x) & GENMASK(11, 8)) >> 8)
+#define HSIO_S6G_IB_CFG1_IB_FILT_HP                       BIT(7)
+#define HSIO_S6G_IB_CFG1_IB_FILT_MID                      BIT(6)
+#define HSIO_S6G_IB_CFG1_IB_FILT_LP                       BIT(5)
+#define HSIO_S6G_IB_CFG1_IB_FILT_OFFSET                   BIT(4)
+#define HSIO_S6G_IB_CFG1_IB_FRC_HP                        BIT(3)
+#define HSIO_S6G_IB_CFG1_IB_FRC_MID                       BIT(2)
+#define HSIO_S6G_IB_CFG1_IB_FRC_LP                        BIT(1)
+#define HSIO_S6G_IB_CFG1_IB_FRC_OFFSET                    BIT(0)
+
+#define HSIO_S6G_IB_CFG2_IB_TINFV(x)                      (((x) << 27) & GENMASK(29, 27))
+#define HSIO_S6G_IB_CFG2_IB_TINFV_M                       GENMASK(29, 27)
+#define HSIO_S6G_IB_CFG2_IB_TINFV_X(x)                    (((x) & GENMASK(29, 27)) >> 27)
+#define HSIO_S6G_IB_CFG2_IB_OINFI(x)                      (((x) << 22) & GENMASK(26, 22))
+#define HSIO_S6G_IB_CFG2_IB_OINFI_M                       GENMASK(26, 22)
+#define HSIO_S6G_IB_CFG2_IB_OINFI_X(x)                    (((x) & GENMASK(26, 22)) >> 22)
+#define HSIO_S6G_IB_CFG2_IB_TAUX(x)                       (((x) << 19) & GENMASK(21, 19))
+#define HSIO_S6G_IB_CFG2_IB_TAUX_M                        GENMASK(21, 19)
+#define HSIO_S6G_IB_CFG2_IB_TAUX_X(x)                     (((x) & GENMASK(21, 19)) >> 19)
+#define HSIO_S6G_IB_CFG2_IB_OINFS(x)                      (((x) << 16) & GENMASK(18, 16))
+#define HSIO_S6G_IB_CFG2_IB_OINFS_M                       GENMASK(18, 16)
+#define HSIO_S6G_IB_CFG2_IB_OINFS_X(x)                    (((x) & GENMASK(18, 16)) >> 16)
+#define HSIO_S6G_IB_CFG2_IB_OCALS(x)                      (((x) << 10) & GENMASK(15, 10))
+#define HSIO_S6G_IB_CFG2_IB_OCALS_M                       GENMASK(15, 10)
+#define HSIO_S6G_IB_CFG2_IB_OCALS_X(x)                    (((x) & GENMASK(15, 10)) >> 10)
+#define HSIO_S6G_IB_CFG2_IB_TCALV(x)                      (((x) << 5) & GENMASK(9, 5))
+#define HSIO_S6G_IB_CFG2_IB_TCALV_M                       GENMASK(9, 5)
+#define HSIO_S6G_IB_CFG2_IB_TCALV_X(x)                    (((x) & GENMASK(9, 5)) >> 5)
+#define HSIO_S6G_IB_CFG2_IB_UMAX(x)                       (((x) << 3) & GENMASK(4, 3))
+#define HSIO_S6G_IB_CFG2_IB_UMAX_M                        GENMASK(4, 3)
+#define HSIO_S6G_IB_CFG2_IB_UMAX_X(x)                     (((x) & GENMASK(4, 3)) >> 3)
+#define HSIO_S6G_IB_CFG2_IB_UREG(x)                       ((x) & GENMASK(2, 0))
+#define HSIO_S6G_IB_CFG2_IB_UREG_M                        GENMASK(2, 0)
+
+#define HSIO_S6G_IB_CFG3_IB_INI_HP(x)                     (((x) << 18) & GENMASK(23, 18))
+#define HSIO_S6G_IB_CFG3_IB_INI_HP_M                      GENMASK(23, 18)
+#define HSIO_S6G_IB_CFG3_IB_INI_HP_X(x)                   (((x) & GENMASK(23, 18)) >> 18)
+#define HSIO_S6G_IB_CFG3_IB_INI_MID(x)                    (((x) << 12) & GENMASK(17, 12))
+#define HSIO_S6G_IB_CFG3_IB_INI_MID_M                     GENMASK(17, 12)
+#define HSIO_S6G_IB_CFG3_IB_INI_MID_X(x)                  (((x) & GENMASK(17, 12)) >> 12)
+#define HSIO_S6G_IB_CFG3_IB_INI_LP(x)                     (((x) << 6) & GENMASK(11, 6))
+#define HSIO_S6G_IB_CFG3_IB_INI_LP_M                      GENMASK(11, 6)
+#define HSIO_S6G_IB_CFG3_IB_INI_LP_X(x)                   (((x) & GENMASK(11, 6)) >> 6)
+#define HSIO_S6G_IB_CFG3_IB_INI_OFFSET(x)                 ((x) & GENMASK(5, 0))
+#define HSIO_S6G_IB_CFG3_IB_INI_OFFSET_M                  GENMASK(5, 0)
+
+#define HSIO_S6G_IB_CFG4_IB_MAX_HP(x)                     (((x) << 18) & GENMASK(23, 18))
+#define HSIO_S6G_IB_CFG4_IB_MAX_HP_M                      GENMASK(23, 18)
+#define HSIO_S6G_IB_CFG4_IB_MAX_HP_X(x)                   (((x) & GENMASK(23, 18)) >> 18)
+#define HSIO_S6G_IB_CFG4_IB_MAX_MID(x)                    (((x) << 12) & GENMASK(17, 12))
+#define HSIO_S6G_IB_CFG4_IB_MAX_MID_M                     GENMASK(17, 12)
+#define HSIO_S6G_IB_CFG4_IB_MAX_MID_X(x)                  (((x) & GENMASK(17, 12)) >> 12)
+#define HSIO_S6G_IB_CFG4_IB_MAX_LP(x)                     (((x) << 6) & GENMASK(11, 6))
+#define HSIO_S6G_IB_CFG4_IB_MAX_LP_M                      GENMASK(11, 6)
+#define HSIO_S6G_IB_CFG4_IB_MAX_LP_X(x)                   (((x) & GENMASK(11, 6)) >> 6)
+#define HSIO_S6G_IB_CFG4_IB_MAX_OFFSET(x)                 ((x) & GENMASK(5, 0))
+#define HSIO_S6G_IB_CFG4_IB_MAX_OFFSET_M                  GENMASK(5, 0)
+
+#define HSIO_S6G_IB_CFG5_IB_MIN_HP(x)                     (((x) << 18) & GENMASK(23, 18))
+#define HSIO_S6G_IB_CFG5_IB_MIN_HP_M                      GENMASK(23, 18)
+#define HSIO_S6G_IB_CFG5_IB_MIN_HP_X(x)                   (((x) & GENMASK(23, 18)) >> 18)
+#define HSIO_S6G_IB_CFG5_IB_MIN_MID(x)                    (((x) << 12) & GENMASK(17, 12))
+#define HSIO_S6G_IB_CFG5_IB_MIN_MID_M                     GENMASK(17, 12)
+#define HSIO_S6G_IB_CFG5_IB_MIN_MID_X(x)                  (((x) & GENMASK(17, 12)) >> 12)
+#define HSIO_S6G_IB_CFG5_IB_MIN_LP(x)                     (((x) << 6) & GENMASK(11, 6))
+#define HSIO_S6G_IB_CFG5_IB_MIN_LP_M                      GENMASK(11, 6)
+#define HSIO_S6G_IB_CFG5_IB_MIN_LP_X(x)                   (((x) & GENMASK(11, 6)) >> 6)
+#define HSIO_S6G_IB_CFG5_IB_MIN_OFFSET(x)                 ((x) & GENMASK(5, 0))
+#define HSIO_S6G_IB_CFG5_IB_MIN_OFFSET_M                  GENMASK(5, 0)
+
+#define HSIO_S6G_OB_CFG_OB_IDLE                           BIT(31)
+#define HSIO_S6G_OB_CFG_OB_ENA1V_MODE                     BIT(30)
+#define HSIO_S6G_OB_CFG_OB_POL                            BIT(29)
+#define HSIO_S6G_OB_CFG_OB_POST0(x)                       (((x) << 23) & GENMASK(28, 23))
+#define HSIO_S6G_OB_CFG_OB_POST0_M                        GENMASK(28, 23)
+#define HSIO_S6G_OB_CFG_OB_POST0_X(x)                     (((x) & GENMASK(28, 23)) >> 23)
+#define HSIO_S6G_OB_CFG_OB_PREC(x)                        (((x) << 18) & GENMASK(22, 18))
+#define HSIO_S6G_OB_CFG_OB_PREC_M                         GENMASK(22, 18)
+#define HSIO_S6G_OB_CFG_OB_PREC_X(x)                      (((x) & GENMASK(22, 18)) >> 18)
+#define HSIO_S6G_OB_CFG_OB_R_ADJ_MUX                      BIT(17)
+#define HSIO_S6G_OB_CFG_OB_R_ADJ_PDR                      BIT(16)
+#define HSIO_S6G_OB_CFG_OB_POST1(x)                       (((x) << 11) & GENMASK(15, 11))
+#define HSIO_S6G_OB_CFG_OB_POST1_M                        GENMASK(15, 11)
+#define HSIO_S6G_OB_CFG_OB_POST1_X(x)                     (((x) & GENMASK(15, 11)) >> 11)
+#define HSIO_S6G_OB_CFG_OB_R_COR                          BIT(10)
+#define HSIO_S6G_OB_CFG_OB_SEL_RCTRL                      BIT(9)
+#define HSIO_S6G_OB_CFG_OB_SR_H                           BIT(8)
+#define HSIO_S6G_OB_CFG_OB_SR(x)                          (((x) << 4) & GENMASK(7, 4))
+#define HSIO_S6G_OB_CFG_OB_SR_M                           GENMASK(7, 4)
+#define HSIO_S6G_OB_CFG_OB_SR_X(x)                        (((x) & GENMASK(7, 4)) >> 4)
+#define HSIO_S6G_OB_CFG_OB_RESISTOR_CTRL(x)               ((x) & GENMASK(3, 0))
+#define HSIO_S6G_OB_CFG_OB_RESISTOR_CTRL_M                GENMASK(3, 0)
+
+#define HSIO_S6G_OB_CFG1_OB_ENA_CAS(x)                    (((x) << 6) & GENMASK(8, 6))
+#define HSIO_S6G_OB_CFG1_OB_ENA_CAS_M                     GENMASK(8, 6)
+#define HSIO_S6G_OB_CFG1_OB_ENA_CAS_X(x)                  (((x) & GENMASK(8, 6)) >> 6)
+#define HSIO_S6G_OB_CFG1_OB_LEV(x)                        ((x) & GENMASK(5, 0))
+#define HSIO_S6G_OB_CFG1_OB_LEV_M                         GENMASK(5, 0)
+
+#define HSIO_S6G_SER_CFG_SER_4TAP_ENA                     BIT(8)
+#define HSIO_S6G_SER_CFG_SER_CPMD_SEL                     BIT(7)
+#define HSIO_S6G_SER_CFG_SER_SWAP_CPMD                    BIT(6)
+#define HSIO_S6G_SER_CFG_SER_ALISEL(x)                    (((x) << 4) & GENMASK(5, 4))
+#define HSIO_S6G_SER_CFG_SER_ALISEL_M                     GENMASK(5, 4)
+#define HSIO_S6G_SER_CFG_SER_ALISEL_X(x)                  (((x) & GENMASK(5, 4)) >> 4)
+#define HSIO_S6G_SER_CFG_SER_ENHYS                        BIT(3)
+#define HSIO_S6G_SER_CFG_SER_BIG_WIN                      BIT(2)
+#define HSIO_S6G_SER_CFG_SER_EN_WIN                       BIT(1)
+#define HSIO_S6G_SER_CFG_SER_ENALI                        BIT(0)
+
+#define HSIO_S6G_COMMON_CFG_SYS_RST                       BIT(17)
+#define HSIO_S6G_COMMON_CFG_SE_DIV2_ENA                   BIT(16)
+#define HSIO_S6G_COMMON_CFG_SE_AUTO_SQUELCH_ENA           BIT(15)
+#define HSIO_S6G_COMMON_CFG_ENA_LANE                      BIT(14)
+#define HSIO_S6G_COMMON_CFG_PWD_RX                        BIT(13)
+#define HSIO_S6G_COMMON_CFG_PWD_TX                        BIT(12)
+#define HSIO_S6G_COMMON_CFG_LANE_CTRL(x)                  (((x) << 9) & GENMASK(11, 9))
+#define HSIO_S6G_COMMON_CFG_LANE_CTRL_M                   GENMASK(11, 9)
+#define HSIO_S6G_COMMON_CFG_LANE_CTRL_X(x)                (((x) & GENMASK(11, 9)) >> 9)
+#define HSIO_S6G_COMMON_CFG_ENA_DIRECT                    BIT(8)
+#define HSIO_S6G_COMMON_CFG_ENA_ELOOP                     BIT(7)
+#define HSIO_S6G_COMMON_CFG_ENA_FLOOP                     BIT(6)
+#define HSIO_S6G_COMMON_CFG_ENA_ILOOP                     BIT(5)
+#define HSIO_S6G_COMMON_CFG_ENA_PLOOP                     BIT(4)
+#define HSIO_S6G_COMMON_CFG_HRATE                         BIT(3)
+#define HSIO_S6G_COMMON_CFG_QRATE                         BIT(2)
+#define HSIO_S6G_COMMON_CFG_IF_MODE(x)                    ((x) & GENMASK(1, 0))
+#define HSIO_S6G_COMMON_CFG_IF_MODE_M                     GENMASK(1, 0)
+
+#define HSIO_S6G_PLL_CFG_PLL_ENA_OFFS(x)                  (((x) << 16) & GENMASK(17, 16))
+#define HSIO_S6G_PLL_CFG_PLL_ENA_OFFS_M                   GENMASK(17, 16)
+#define HSIO_S6G_PLL_CFG_PLL_ENA_OFFS_X(x)                (((x) & GENMASK(17, 16)) >> 16)
+#define HSIO_S6G_PLL_CFG_PLL_DIV4                         BIT(15)
+#define HSIO_S6G_PLL_CFG_PLL_ENA_ROT                      BIT(14)
+#define HSIO_S6G_PLL_CFG_PLL_FSM_CTRL_DATA(x)             (((x) << 6) & GENMASK(13, 6))
+#define HSIO_S6G_PLL_CFG_PLL_FSM_CTRL_DATA_M              GENMASK(13, 6)
+#define HSIO_S6G_PLL_CFG_PLL_FSM_CTRL_DATA_X(x)           (((x) & GENMASK(13, 6)) >> 6)
+#define HSIO_S6G_PLL_CFG_PLL_FSM_ENA                      BIT(5)
+#define HSIO_S6G_PLL_CFG_PLL_FSM_FORCE_SET_ENA            BIT(4)
+#define HSIO_S6G_PLL_CFG_PLL_FSM_OOR_RECAL_ENA            BIT(3)
+#define HSIO_S6G_PLL_CFG_PLL_RB_DATA_SEL                  BIT(2)
+#define HSIO_S6G_PLL_CFG_PLL_ROT_DIR                      BIT(1)
+#define HSIO_S6G_PLL_CFG_PLL_ROT_FRQ                      BIT(0)
+
+#define HSIO_S6G_ACJTAG_CFG_ACJTAG_INIT_DATA_N            BIT(5)
+#define HSIO_S6G_ACJTAG_CFG_ACJTAG_INIT_DATA_P            BIT(4)
+#define HSIO_S6G_ACJTAG_CFG_ACJTAG_INIT_CLK               BIT(3)
+#define HSIO_S6G_ACJTAG_CFG_OB_DIRECT                     BIT(2)
+#define HSIO_S6G_ACJTAG_CFG_ACJTAG_ENA                    BIT(1)
+#define HSIO_S6G_ACJTAG_CFG_JTAG_CTRL_ENA                 BIT(0)
+
+#define HSIO_S6G_GP_CFG_GP_MSB(x)                         (((x) << 16) & GENMASK(31, 16))
+#define HSIO_S6G_GP_CFG_GP_MSB_M                          GENMASK(31, 16)
+#define HSIO_S6G_GP_CFG_GP_MSB_X(x)                       (((x) & GENMASK(31, 16)) >> 16)
+#define HSIO_S6G_GP_CFG_GP_LSB(x)                         ((x) & GENMASK(15, 0))
+#define HSIO_S6G_GP_CFG_GP_LSB_M                          GENMASK(15, 0)
+
+#define HSIO_S6G_IB_STATUS0_IB_CAL_DONE                   BIT(8)
+#define HSIO_S6G_IB_STATUS0_IB_HP_GAIN_ACT                BIT(7)
+#define HSIO_S6G_IB_STATUS0_IB_MID_GAIN_ACT               BIT(6)
+#define HSIO_S6G_IB_STATUS0_IB_LP_GAIN_ACT                BIT(5)
+#define HSIO_S6G_IB_STATUS0_IB_OFFSET_ACT                 BIT(4)
+#define HSIO_S6G_IB_STATUS0_IB_OFFSET_VLD                 BIT(3)
+#define HSIO_S6G_IB_STATUS0_IB_OFFSET_ERR                 BIT(2)
+#define HSIO_S6G_IB_STATUS0_IB_OFFSDIR                    BIT(1)
+#define HSIO_S6G_IB_STATUS0_IB_SIG_DET                    BIT(0)
+
+#define HSIO_S6G_IB_STATUS1_IB_HP_GAIN_STAT(x)            (((x) << 18) & GENMASK(23, 18))
+#define HSIO_S6G_IB_STATUS1_IB_HP_GAIN_STAT_M             GENMASK(23, 18)
+#define HSIO_S6G_IB_STATUS1_IB_HP_GAIN_STAT_X(x)          (((x) & GENMASK(23, 18)) >> 18)
+#define HSIO_S6G_IB_STATUS1_IB_MID_GAIN_STAT(x)           (((x) << 12) & GENMASK(17, 12))
+#define HSIO_S6G_IB_STATUS1_IB_MID_GAIN_STAT_M            GENMASK(17, 12)
+#define HSIO_S6G_IB_STATUS1_IB_MID_GAIN_STAT_X(x)         (((x) & GENMASK(17, 12)) >> 12)
+#define HSIO_S6G_IB_STATUS1_IB_LP_GAIN_STAT(x)            (((x) << 6) & GENMASK(11, 6))
+#define HSIO_S6G_IB_STATUS1_IB_LP_GAIN_STAT_M             GENMASK(11, 6)
+#define HSIO_S6G_IB_STATUS1_IB_LP_GAIN_STAT_X(x)          (((x) & GENMASK(11, 6)) >> 6)
+#define HSIO_S6G_IB_STATUS1_IB_OFFSET_STAT(x)             ((x) & GENMASK(5, 0))
+#define HSIO_S6G_IB_STATUS1_IB_OFFSET_STAT_M              GENMASK(5, 0)
+
+#define HSIO_S6G_ACJTAG_STATUS_ACJTAG_CAPT_DATA_N         BIT(2)
+#define HSIO_S6G_ACJTAG_STATUS_ACJTAG_CAPT_DATA_P         BIT(1)
+#define HSIO_S6G_ACJTAG_STATUS_IB_DIRECT                  BIT(0)
+
+#define HSIO_S6G_PLL_STATUS_PLL_CAL_NOT_DONE              BIT(10)
+#define HSIO_S6G_PLL_STATUS_PLL_CAL_ERR                   BIT(9)
+#define HSIO_S6G_PLL_STATUS_PLL_OUT_OF_RANGE_ERR          BIT(8)
+#define HSIO_S6G_PLL_STATUS_PLL_RB_DATA(x)                ((x) & GENMASK(7, 0))
+#define HSIO_S6G_PLL_STATUS_PLL_RB_DATA_M                 GENMASK(7, 0)
+
+#define HSIO_S6G_REVID_SERDES_REV(x)                      (((x) << 26) & GENMASK(31, 26))
+#define HSIO_S6G_REVID_SERDES_REV_M                       GENMASK(31, 26)
+#define HSIO_S6G_REVID_SERDES_REV_X(x)                    (((x) & GENMASK(31, 26)) >> 26)
+#define HSIO_S6G_REVID_RCPLL_REV(x)                       (((x) << 21) & GENMASK(25, 21))
+#define HSIO_S6G_REVID_RCPLL_REV_M                        GENMASK(25, 21)
+#define HSIO_S6G_REVID_RCPLL_REV_X(x)                     (((x) & GENMASK(25, 21)) >> 21)
+#define HSIO_S6G_REVID_SER_REV(x)                         (((x) << 16) & GENMASK(20, 16))
+#define HSIO_S6G_REVID_SER_REV_M                          GENMASK(20, 16)
+#define HSIO_S6G_REVID_SER_REV_X(x)                       (((x) & GENMASK(20, 16)) >> 16)
+#define HSIO_S6G_REVID_DES_REV(x)                         (((x) << 10) & GENMASK(15, 10))
+#define HSIO_S6G_REVID_DES_REV_M                          GENMASK(15, 10)
+#define HSIO_S6G_REVID_DES_REV_X(x)                       (((x) & GENMASK(15, 10)) >> 10)
+#define HSIO_S6G_REVID_OB_REV(x)                          (((x) << 5) & GENMASK(9, 5))
+#define HSIO_S6G_REVID_OB_REV_M                           GENMASK(9, 5)
+#define HSIO_S6G_REVID_OB_REV_X(x)                        (((x) & GENMASK(9, 5)) >> 5)
+#define HSIO_S6G_REVID_IB_REV(x)                          ((x) & GENMASK(4, 0))
+#define HSIO_S6G_REVID_IB_REV_M                           GENMASK(4, 0)
+
+#define HSIO_MCB_S6G_ADDR_CFG_SERDES6G_WR_ONE_SHOT        BIT(31)
+#define HSIO_MCB_S6G_ADDR_CFG_SERDES6G_RD_ONE_SHOT        BIT(30)
+#define HSIO_MCB_S6G_ADDR_CFG_SERDES6G_ADDR(x)            ((x) & GENMASK(24, 0))
+#define HSIO_MCB_S6G_ADDR_CFG_SERDES6G_ADDR_M             GENMASK(24, 0)
+
+#define HSIO_HW_CFG_DEV2G5_10_MODE                        BIT(6)
+#define HSIO_HW_CFG_DEV1G_9_MODE                          BIT(5)
+#define HSIO_HW_CFG_DEV1G_6_MODE                          BIT(4)
+#define HSIO_HW_CFG_DEV1G_5_MODE                          BIT(3)
+#define HSIO_HW_CFG_DEV1G_4_MODE                          BIT(2)
+#define HSIO_HW_CFG_PCIE_ENA                              BIT(1)
+#define HSIO_HW_CFG_QSGMII_ENA                            BIT(0)
+
+#define HSIO_HW_QSGMII_CFG_SHYST_DIS                      BIT(3)
+#define HSIO_HW_QSGMII_CFG_E_DET_ENA                      BIT(2)
+#define HSIO_HW_QSGMII_CFG_USE_I1_ENA                     BIT(1)
+#define HSIO_HW_QSGMII_CFG_FLIP_LANES                     BIT(0)
+
+#define HSIO_HW_QSGMII_STAT_DELAY_VAR_X200PS(x)           (((x) << 1) & GENMASK(6, 1))
+#define HSIO_HW_QSGMII_STAT_DELAY_VAR_X200PS_M            GENMASK(6, 1)
+#define HSIO_HW_QSGMII_STAT_DELAY_VAR_X200PS_X(x)         (((x) & GENMASK(6, 1)) >> 1)
+#define HSIO_HW_QSGMII_STAT_SYNC                          BIT(0)
+
+#define HSIO_CLK_CFG_CLKDIV_PHY(x)                        (((x) << 1) & GENMASK(8, 1))
+#define HSIO_CLK_CFG_CLKDIV_PHY_M                         GENMASK(8, 1)
+#define HSIO_CLK_CFG_CLKDIV_PHY_X(x)                      (((x) & GENMASK(8, 1)) >> 1)
+#define HSIO_CLK_CFG_CLKDIV_PHY_DIS                       BIT(0)
+
+#define HSIO_TEMP_SENSOR_CTRL_FORCE_TEMP_RD               BIT(5)
+#define HSIO_TEMP_SENSOR_CTRL_FORCE_RUN                   BIT(4)
+#define HSIO_TEMP_SENSOR_CTRL_FORCE_NO_RST                BIT(3)
+#define HSIO_TEMP_SENSOR_CTRL_FORCE_POWER_UP              BIT(2)
+#define HSIO_TEMP_SENSOR_CTRL_FORCE_CLK                   BIT(1)
+#define HSIO_TEMP_SENSOR_CTRL_SAMPLE_ENA                  BIT(0)
+
+#define HSIO_TEMP_SENSOR_CFG_RUN_WID(x)                   (((x) << 8) & GENMASK(15, 8))
+#define HSIO_TEMP_SENSOR_CFG_RUN_WID_M                    GENMASK(15, 8)
+#define HSIO_TEMP_SENSOR_CFG_RUN_WID_X(x)                 (((x) & GENMASK(15, 8)) >> 8)
+#define HSIO_TEMP_SENSOR_CFG_SAMPLE_PER(x)                ((x) & GENMASK(7, 0))
+#define HSIO_TEMP_SENSOR_CFG_SAMPLE_PER_M                 GENMASK(7, 0)
+
+#define HSIO_TEMP_SENSOR_STAT_TEMP_VALID                  BIT(8)
+#define HSIO_TEMP_SENSOR_STAT_TEMP(x)                     ((x) & GENMASK(7, 0))
+#define HSIO_TEMP_SENSOR_STAT_TEMP_M                      GENMASK(7, 0)
+
+#endif
diff --git a/drivers/net/ethernet/mscc/ocelot_io.c b/drivers/net/ethernet/mscc/ocelot_io.c
new file mode 100644 (file)
index 0000000..c6db8ad
--- /dev/null
@@ -0,0 +1,116 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/*
+ * Microsemi Ocelot Switch driver
+ *
+ * Copyright (c) 2017 Microsemi Corporation
+ */
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+
+#include "ocelot.h"
+
+u32 __ocelot_read_ix(struct ocelot *ocelot, u32 reg, u32 offset)
+{
+       u16 target = reg >> TARGET_OFFSET;
+       u32 val;
+
+       WARN_ON(!target);
+
+       regmap_read(ocelot->targets[target],
+                   ocelot->map[target][reg & REG_MASK] + offset, &val);
+       return val;
+}
+EXPORT_SYMBOL(__ocelot_read_ix);
+
+void __ocelot_write_ix(struct ocelot *ocelot, u32 val, u32 reg, u32 offset)
+{
+       u16 target = reg >> TARGET_OFFSET;
+
+       WARN_ON(!target);
+
+       regmap_write(ocelot->targets[target],
+                    ocelot->map[target][reg & REG_MASK] + offset, val);
+}
+EXPORT_SYMBOL(__ocelot_write_ix);
+
+void __ocelot_rmw_ix(struct ocelot *ocelot, u32 val, u32 mask, u32 reg,
+                    u32 offset)
+{
+       u16 target = reg >> TARGET_OFFSET;
+
+       WARN_ON(!target);
+
+       regmap_update_bits(ocelot->targets[target],
+                          ocelot->map[target][reg & REG_MASK] + offset,
+                          mask, val);
+}
+EXPORT_SYMBOL(__ocelot_rmw_ix);
+
+u32 ocelot_port_readl(struct ocelot_port *port, u32 reg)
+{
+       return readl(port->regs + reg);
+}
+EXPORT_SYMBOL(ocelot_port_readl);
+
+void ocelot_port_writel(struct ocelot_port *port, u32 val, u32 reg)
+{
+       writel(val, port->regs + reg);
+}
+EXPORT_SYMBOL(ocelot_port_writel);
+
+int ocelot_regfields_init(struct ocelot *ocelot,
+                         const struct reg_field *const regfields)
+{
+       unsigned int i;
+       u16 target;
+
+       for (i = 0; i < REGFIELD_MAX; i++) {
+               struct reg_field regfield = {};
+               u32 reg = regfields[i].reg;
+
+               if (!reg)
+                       continue;
+
+               target = regfields[i].reg >> TARGET_OFFSET;
+
+               regfield.reg = ocelot->map[target][reg & REG_MASK];
+               regfield.lsb = regfields[i].lsb;
+               regfield.msb = regfields[i].msb;
+
+               ocelot->regfields[i] =
+               devm_regmap_field_alloc(ocelot->dev,
+                                       ocelot->targets[target],
+                                       regfield);
+
+               if (IS_ERR(ocelot->regfields[i]))
+                       return PTR_ERR(ocelot->regfields[i]);
+       }
+
+       return 0;
+}
+EXPORT_SYMBOL(ocelot_regfields_init);
+
+static struct regmap_config ocelot_regmap_config = {
+       .reg_bits       = 32,
+       .val_bits       = 32,
+       .reg_stride     = 4,
+};
+
+struct regmap *ocelot_io_platform_init(struct ocelot *ocelot,
+                                      struct platform_device *pdev,
+                                      const char *name)
+{
+       struct resource *res;
+       void __iomem *regs;
+
+       res = platform_get_resource_byname(pdev, IORESOURCE_MEM, name);
+       regs = devm_ioremap_resource(ocelot->dev, res);
+       if (IS_ERR(regs))
+               return ERR_CAST(regs);
+
+       ocelot_regmap_config.name = name;
+       return devm_regmap_init_mmio(ocelot->dev, regs,
+                                    &ocelot_regmap_config);
+}
+EXPORT_SYMBOL(ocelot_io_platform_init);
diff --git a/drivers/net/ethernet/mscc/ocelot_qs.h b/drivers/net/ethernet/mscc/ocelot_qs.h
new file mode 100644 (file)
index 0000000..d18ae72
--- /dev/null
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */
+/*
+ * Microsemi Ocelot Switch driver
+ *
+ * Copyright (c) 2017 Microsemi Corporation
+ */
+
+#ifndef _MSCC_OCELOT_QS_H_
+#define _MSCC_OCELOT_QS_H_
+
+/* TODO handle BE */
+#define XTR_EOF_0          0x00000080U
+#define XTR_EOF_1          0x01000080U
+#define XTR_EOF_2          0x02000080U
+#define XTR_EOF_3          0x03000080U
+#define XTR_PRUNED         0x04000080U
+#define XTR_ABORT          0x05000080U
+#define XTR_ESCAPE         0x06000080U
+#define XTR_NOT_READY      0x07000080U
+#define XTR_VALID_BYTES(x) (4 - (((x) >> 24) & 3))
+
+#define QS_XTR_GRP_CFG_RSZ                                0x4
+
+#define QS_XTR_GRP_CFG_MODE(x)                            (((x) << 2) & GENMASK(3, 2))
+#define QS_XTR_GRP_CFG_MODE_M                             GENMASK(3, 2)
+#define QS_XTR_GRP_CFG_MODE_X(x)                          (((x) & GENMASK(3, 2)) >> 2)
+#define QS_XTR_GRP_CFG_STATUS_WORD_POS                    BIT(1)
+#define QS_XTR_GRP_CFG_BYTE_SWAP                          BIT(0)
+
+#define QS_XTR_RD_RSZ                                     0x4
+
+#define QS_XTR_FRM_PRUNING_RSZ                            0x4
+
+#define QS_XTR_CFG_DP_WM(x)                               (((x) << 5) & GENMASK(7, 5))
+#define QS_XTR_CFG_DP_WM_M                                GENMASK(7, 5)
+#define QS_XTR_CFG_DP_WM_X(x)                             (((x) & GENMASK(7, 5)) >> 5)
+#define QS_XTR_CFG_SCH_WM(x)                              (((x) << 2) & GENMASK(4, 2))
+#define QS_XTR_CFG_SCH_WM_M                               GENMASK(4, 2)
+#define QS_XTR_CFG_SCH_WM_X(x)                            (((x) & GENMASK(4, 2)) >> 2)
+#define QS_XTR_CFG_OFLW_ERR_STICKY(x)                     ((x) & GENMASK(1, 0))
+#define QS_XTR_CFG_OFLW_ERR_STICKY_M                      GENMASK(1, 0)
+
+#define QS_INJ_GRP_CFG_RSZ                                0x4
+
+#define QS_INJ_GRP_CFG_MODE(x)                            (((x) << 2) & GENMASK(3, 2))
+#define QS_INJ_GRP_CFG_MODE_M                             GENMASK(3, 2)
+#define QS_INJ_GRP_CFG_MODE_X(x)                          (((x) & GENMASK(3, 2)) >> 2)
+#define QS_INJ_GRP_CFG_BYTE_SWAP                          BIT(0)
+
+#define QS_INJ_WR_RSZ                                     0x4
+
+#define QS_INJ_CTRL_RSZ                                   0x4
+
+#define QS_INJ_CTRL_GAP_SIZE(x)                           (((x) << 21) & GENMASK(24, 21))
+#define QS_INJ_CTRL_GAP_SIZE_M                            GENMASK(24, 21)
+#define QS_INJ_CTRL_GAP_SIZE_X(x)                         (((x) & GENMASK(24, 21)) >> 21)
+#define QS_INJ_CTRL_ABORT                                 BIT(20)
+#define QS_INJ_CTRL_EOF                                   BIT(19)
+#define QS_INJ_CTRL_SOF                                   BIT(18)
+#define QS_INJ_CTRL_VLD_BYTES(x)                          (((x) << 16) & GENMASK(17, 16))
+#define QS_INJ_CTRL_VLD_BYTES_M                           GENMASK(17, 16)
+#define QS_INJ_CTRL_VLD_BYTES_X(x)                        (((x) & GENMASK(17, 16)) >> 16)
+
+#define QS_INJ_STATUS_WMARK_REACHED(x)                    (((x) << 4) & GENMASK(5, 4))
+#define QS_INJ_STATUS_WMARK_REACHED_M                     GENMASK(5, 4)
+#define QS_INJ_STATUS_WMARK_REACHED_X(x)                  (((x) & GENMASK(5, 4)) >> 4)
+#define QS_INJ_STATUS_FIFO_RDY(x)                         (((x) << 2) & GENMASK(3, 2))
+#define QS_INJ_STATUS_FIFO_RDY_M                          GENMASK(3, 2)
+#define QS_INJ_STATUS_FIFO_RDY_X(x)                       (((x) & GENMASK(3, 2)) >> 2)
+#define QS_INJ_STATUS_INJ_IN_PROGRESS(x)                  ((x) & GENMASK(1, 0))
+#define QS_INJ_STATUS_INJ_IN_PROGRESS_M                   GENMASK(1, 0)
+
+#define QS_INJ_ERR_RSZ                                    0x4
+
+#define QS_INJ_ERR_ABORT_ERR_STICKY                       BIT(1)
+#define QS_INJ_ERR_WR_ERR_STICKY                          BIT(0)
+
+#endif
diff --git a/drivers/net/ethernet/mscc/ocelot_qsys.h b/drivers/net/ethernet/mscc/ocelot_qsys.h
new file mode 100644 (file)
index 0000000..d8c63aa
--- /dev/null
@@ -0,0 +1,270 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */
+/*
+ * Microsemi Ocelot Switch driver
+ *
+ * Copyright (c) 2017 Microsemi Corporation
+ */
+
+#ifndef _MSCC_OCELOT_QSYS_H_
+#define _MSCC_OCELOT_QSYS_H_
+
+#define QSYS_PORT_MODE_RSZ                                0x4
+
+#define QSYS_PORT_MODE_DEQUEUE_DIS                        BIT(1)
+#define QSYS_PORT_MODE_DEQUEUE_LATE                       BIT(0)
+
+#define QSYS_SWITCH_PORT_MODE_RSZ                         0x4
+
+#define QSYS_SWITCH_PORT_MODE_PORT_ENA                    BIT(14)
+#define QSYS_SWITCH_PORT_MODE_SCH_NEXT_CFG(x)             (((x) << 11) & GENMASK(13, 11))
+#define QSYS_SWITCH_PORT_MODE_SCH_NEXT_CFG_M              GENMASK(13, 11)
+#define QSYS_SWITCH_PORT_MODE_SCH_NEXT_CFG_X(x)           (((x) & GENMASK(13, 11)) >> 11)
+#define QSYS_SWITCH_PORT_MODE_YEL_RSRVD                   BIT(10)
+#define QSYS_SWITCH_PORT_MODE_INGRESS_DROP_MODE           BIT(9)
+#define QSYS_SWITCH_PORT_MODE_TX_PFC_ENA(x)               (((x) << 1) & GENMASK(8, 1))
+#define QSYS_SWITCH_PORT_MODE_TX_PFC_ENA_M                GENMASK(8, 1)
+#define QSYS_SWITCH_PORT_MODE_TX_PFC_ENA_X(x)             (((x) & GENMASK(8, 1)) >> 1)
+#define QSYS_SWITCH_PORT_MODE_TX_PFC_MODE                 BIT(0)
+
+#define QSYS_STAT_CNT_CFG_TX_GREEN_CNT_MODE               BIT(5)
+#define QSYS_STAT_CNT_CFG_TX_YELLOW_CNT_MODE              BIT(4)
+#define QSYS_STAT_CNT_CFG_DROP_GREEN_CNT_MODE             BIT(3)
+#define QSYS_STAT_CNT_CFG_DROP_YELLOW_CNT_MODE            BIT(2)
+#define QSYS_STAT_CNT_CFG_DROP_COUNT_ONCE                 BIT(1)
+#define QSYS_STAT_CNT_CFG_DROP_COUNT_EGRESS               BIT(0)
+
+#define QSYS_EEE_CFG_RSZ                                  0x4
+
+#define QSYS_EEE_THRES_EEE_HIGH_BYTES(x)                  (((x) << 8) & GENMASK(15, 8))
+#define QSYS_EEE_THRES_EEE_HIGH_BYTES_M                   GENMASK(15, 8)
+#define QSYS_EEE_THRES_EEE_HIGH_BYTES_X(x)                (((x) & GENMASK(15, 8)) >> 8)
+#define QSYS_EEE_THRES_EEE_HIGH_FRAMES(x)                 ((x) & GENMASK(7, 0))
+#define QSYS_EEE_THRES_EEE_HIGH_FRAMES_M                  GENMASK(7, 0)
+
+#define QSYS_SW_STATUS_RSZ                                0x4
+
+#define QSYS_EXT_CPU_CFG_EXT_CPU_PORT(x)                  (((x) << 8) & GENMASK(12, 8))
+#define QSYS_EXT_CPU_CFG_EXT_CPU_PORT_M                   GENMASK(12, 8)
+#define QSYS_EXT_CPU_CFG_EXT_CPU_PORT_X(x)                (((x) & GENMASK(12, 8)) >> 8)
+#define QSYS_EXT_CPU_CFG_EXT_CPUQ_MSK(x)                  ((x) & GENMASK(7, 0))
+#define QSYS_EXT_CPU_CFG_EXT_CPUQ_MSK_M                   GENMASK(7, 0)
+
+#define QSYS_QMAP_GSZ                                     0x4
+
+#define QSYS_QMAP_SE_BASE(x)                              (((x) << 5) & GENMASK(12, 5))
+#define QSYS_QMAP_SE_BASE_M                               GENMASK(12, 5)
+#define QSYS_QMAP_SE_BASE_X(x)                            (((x) & GENMASK(12, 5)) >> 5)
+#define QSYS_QMAP_SE_IDX_SEL(x)                           (((x) << 2) & GENMASK(4, 2))
+#define QSYS_QMAP_SE_IDX_SEL_M                            GENMASK(4, 2)
+#define QSYS_QMAP_SE_IDX_SEL_X(x)                         (((x) & GENMASK(4, 2)) >> 2)
+#define QSYS_QMAP_SE_INP_SEL(x)                           ((x) & GENMASK(1, 0))
+#define QSYS_QMAP_SE_INP_SEL_M                            GENMASK(1, 0)
+
+#define QSYS_ISDX_SGRP_GSZ                                0x4
+
+#define QSYS_TIMED_FRAME_ENTRY_GSZ                        0x4
+
+#define QSYS_TFRM_MISC_TIMED_CANCEL_SLOT(x)               (((x) << 9) & GENMASK(18, 9))
+#define QSYS_TFRM_MISC_TIMED_CANCEL_SLOT_M                GENMASK(18, 9)
+#define QSYS_TFRM_MISC_TIMED_CANCEL_SLOT_X(x)             (((x) & GENMASK(18, 9)) >> 9)
+#define QSYS_TFRM_MISC_TIMED_CANCEL_1SHOT                 BIT(8)
+#define QSYS_TFRM_MISC_TIMED_SLOT_MODE_MC                 BIT(7)
+#define QSYS_TFRM_MISC_TIMED_ENTRY_FAST_CNT(x)            ((x) & GENMASK(6, 0))
+#define QSYS_TFRM_MISC_TIMED_ENTRY_FAST_CNT_M             GENMASK(6, 0)
+
+#define QSYS_RED_PROFILE_RSZ                              0x4
+
+#define QSYS_RED_PROFILE_WM_RED_LOW(x)                    (((x) << 8) & GENMASK(15, 8))
+#define QSYS_RED_PROFILE_WM_RED_LOW_M                     GENMASK(15, 8)
+#define QSYS_RED_PROFILE_WM_RED_LOW_X(x)                  (((x) & GENMASK(15, 8)) >> 8)
+#define QSYS_RED_PROFILE_WM_RED_HIGH(x)                   ((x) & GENMASK(7, 0))
+#define QSYS_RED_PROFILE_WM_RED_HIGH_M                    GENMASK(7, 0)
+
+#define QSYS_RES_CFG_GSZ                                  0x8
+
+#define QSYS_RES_STAT_GSZ                                 0x8
+
+#define QSYS_RES_STAT_INUSE(x)                            (((x) << 12) & GENMASK(23, 12))
+#define QSYS_RES_STAT_INUSE_M                             GENMASK(23, 12)
+#define QSYS_RES_STAT_INUSE_X(x)                          (((x) & GENMASK(23, 12)) >> 12)
+#define QSYS_RES_STAT_MAXUSE(x)                           ((x) & GENMASK(11, 0))
+#define QSYS_RES_STAT_MAXUSE_M                            GENMASK(11, 0)
+
+#define QSYS_EVENTS_CORE_EV_FDC(x)                        (((x) << 2) & GENMASK(4, 2))
+#define QSYS_EVENTS_CORE_EV_FDC_M                         GENMASK(4, 2)
+#define QSYS_EVENTS_CORE_EV_FDC_X(x)                      (((x) & GENMASK(4, 2)) >> 2)
+#define QSYS_EVENTS_CORE_EV_FRD(x)                        ((x) & GENMASK(1, 0))
+#define QSYS_EVENTS_CORE_EV_FRD_M                         GENMASK(1, 0)
+
+#define QSYS_QMAXSDU_CFG_0_RSZ                            0x4
+
+#define QSYS_QMAXSDU_CFG_1_RSZ                            0x4
+
+#define QSYS_QMAXSDU_CFG_2_RSZ                            0x4
+
+#define QSYS_QMAXSDU_CFG_3_RSZ                            0x4
+
+#define QSYS_QMAXSDU_CFG_4_RSZ                            0x4
+
+#define QSYS_QMAXSDU_CFG_5_RSZ                            0x4
+
+#define QSYS_QMAXSDU_CFG_6_RSZ                            0x4
+
+#define QSYS_QMAXSDU_CFG_7_RSZ                            0x4
+
+#define QSYS_PREEMPTION_CFG_RSZ                           0x4
+
+#define QSYS_PREEMPTION_CFG_P_QUEUES(x)                   ((x) & GENMASK(7, 0))
+#define QSYS_PREEMPTION_CFG_P_QUEUES_M                    GENMASK(7, 0)
+#define QSYS_PREEMPTION_CFG_MM_ADD_FRAG_SIZE(x)           (((x) << 8) & GENMASK(9, 8))
+#define QSYS_PREEMPTION_CFG_MM_ADD_FRAG_SIZE_M            GENMASK(9, 8)
+#define QSYS_PREEMPTION_CFG_MM_ADD_FRAG_SIZE_X(x)         (((x) & GENMASK(9, 8)) >> 8)
+#define QSYS_PREEMPTION_CFG_STRICT_IPG(x)                 (((x) << 12) & GENMASK(13, 12))
+#define QSYS_PREEMPTION_CFG_STRICT_IPG_M                  GENMASK(13, 12)
+#define QSYS_PREEMPTION_CFG_STRICT_IPG_X(x)               (((x) & GENMASK(13, 12)) >> 12)
+#define QSYS_PREEMPTION_CFG_HOLD_ADVANCE(x)               (((x) << 16) & GENMASK(31, 16))
+#define QSYS_PREEMPTION_CFG_HOLD_ADVANCE_M                GENMASK(31, 16)
+#define QSYS_PREEMPTION_CFG_HOLD_ADVANCE_X(x)             (((x) & GENMASK(31, 16)) >> 16)
+
+#define QSYS_CIR_CFG_GSZ                                  0x80
+
+#define QSYS_CIR_CFG_CIR_RATE(x)                          (((x) << 6) & GENMASK(20, 6))
+#define QSYS_CIR_CFG_CIR_RATE_M                           GENMASK(20, 6)
+#define QSYS_CIR_CFG_CIR_RATE_X(x)                        (((x) & GENMASK(20, 6)) >> 6)
+#define QSYS_CIR_CFG_CIR_BURST(x)                         ((x) & GENMASK(5, 0))
+#define QSYS_CIR_CFG_CIR_BURST_M                          GENMASK(5, 0)
+
+#define QSYS_EIR_CFG_GSZ                                  0x80
+
+#define QSYS_EIR_CFG_EIR_RATE(x)                          (((x) << 7) & GENMASK(21, 7))
+#define QSYS_EIR_CFG_EIR_RATE_M                           GENMASK(21, 7)
+#define QSYS_EIR_CFG_EIR_RATE_X(x)                        (((x) & GENMASK(21, 7)) >> 7)
+#define QSYS_EIR_CFG_EIR_BURST(x)                         (((x) << 1) & GENMASK(6, 1))
+#define QSYS_EIR_CFG_EIR_BURST_M                          GENMASK(6, 1)
+#define QSYS_EIR_CFG_EIR_BURST_X(x)                       (((x) & GENMASK(6, 1)) >> 1)
+#define QSYS_EIR_CFG_EIR_MARK_ENA                         BIT(0)
+
+#define QSYS_SE_CFG_GSZ                                   0x80
+
+#define QSYS_SE_CFG_SE_DWRR_CNT(x)                        (((x) << 6) & GENMASK(9, 6))
+#define QSYS_SE_CFG_SE_DWRR_CNT_M                         GENMASK(9, 6)
+#define QSYS_SE_CFG_SE_DWRR_CNT_X(x)                      (((x) & GENMASK(9, 6)) >> 6)
+#define QSYS_SE_CFG_SE_RR_ENA                             BIT(5)
+#define QSYS_SE_CFG_SE_AVB_ENA                            BIT(4)
+#define QSYS_SE_CFG_SE_FRM_MODE(x)                        (((x) << 2) & GENMASK(3, 2))
+#define QSYS_SE_CFG_SE_FRM_MODE_M                         GENMASK(3, 2)
+#define QSYS_SE_CFG_SE_FRM_MODE_X(x)                      (((x) & GENMASK(3, 2)) >> 2)
+#define QSYS_SE_CFG_SE_EXC_ENA                            BIT(1)
+#define QSYS_SE_CFG_SE_EXC_FWD                            BIT(0)
+
+#define QSYS_SE_DWRR_CFG_GSZ                              0x80
+#define QSYS_SE_DWRR_CFG_RSZ                              0x4
+
+#define QSYS_SE_CONNECT_GSZ                               0x80
+
+#define QSYS_SE_CONNECT_SE_OUTP_IDX(x)                    (((x) << 17) & GENMASK(24, 17))
+#define QSYS_SE_CONNECT_SE_OUTP_IDX_M                     GENMASK(24, 17)
+#define QSYS_SE_CONNECT_SE_OUTP_IDX_X(x)                  (((x) & GENMASK(24, 17)) >> 17)
+#define QSYS_SE_CONNECT_SE_INP_IDX(x)                     (((x) << 9) & GENMASK(16, 9))
+#define QSYS_SE_CONNECT_SE_INP_IDX_M                      GENMASK(16, 9)
+#define QSYS_SE_CONNECT_SE_INP_IDX_X(x)                   (((x) & GENMASK(16, 9)) >> 9)
+#define QSYS_SE_CONNECT_SE_OUTP_CON(x)                    (((x) << 5) & GENMASK(8, 5))
+#define QSYS_SE_CONNECT_SE_OUTP_CON_M                     GENMASK(8, 5)
+#define QSYS_SE_CONNECT_SE_OUTP_CON_X(x)                  (((x) & GENMASK(8, 5)) >> 5)
+#define QSYS_SE_CONNECT_SE_INP_CNT(x)                     (((x) << 1) & GENMASK(4, 1))
+#define QSYS_SE_CONNECT_SE_INP_CNT_M                      GENMASK(4, 1)
+#define QSYS_SE_CONNECT_SE_INP_CNT_X(x)                   (((x) & GENMASK(4, 1)) >> 1)
+#define QSYS_SE_CONNECT_SE_TERMINAL                       BIT(0)
+
+#define QSYS_SE_DLB_SENSE_GSZ                             0x80
+
+#define QSYS_SE_DLB_SENSE_SE_DLB_PRIO(x)                  (((x) << 11) & GENMASK(13, 11))
+#define QSYS_SE_DLB_SENSE_SE_DLB_PRIO_M                   GENMASK(13, 11)
+#define QSYS_SE_DLB_SENSE_SE_DLB_PRIO_X(x)                (((x) & GENMASK(13, 11)) >> 11)
+#define QSYS_SE_DLB_SENSE_SE_DLB_SPORT(x)                 (((x) << 7) & GENMASK(10, 7))
+#define QSYS_SE_DLB_SENSE_SE_DLB_SPORT_M                  GENMASK(10, 7)
+#define QSYS_SE_DLB_SENSE_SE_DLB_SPORT_X(x)               (((x) & GENMASK(10, 7)) >> 7)
+#define QSYS_SE_DLB_SENSE_SE_DLB_DPORT(x)                 (((x) << 3) & GENMASK(6, 3))
+#define QSYS_SE_DLB_SENSE_SE_DLB_DPORT_M                  GENMASK(6, 3)
+#define QSYS_SE_DLB_SENSE_SE_DLB_DPORT_X(x)               (((x) & GENMASK(6, 3)) >> 3)
+#define QSYS_SE_DLB_SENSE_SE_DLB_PRIO_ENA                 BIT(2)
+#define QSYS_SE_DLB_SENSE_SE_DLB_SPORT_ENA                BIT(1)
+#define QSYS_SE_DLB_SENSE_SE_DLB_DPORT_ENA                BIT(0)
+
+#define QSYS_CIR_STATE_GSZ                                0x80
+
+#define QSYS_CIR_STATE_CIR_LVL(x)                         (((x) << 4) & GENMASK(25, 4))
+#define QSYS_CIR_STATE_CIR_LVL_M                          GENMASK(25, 4)
+#define QSYS_CIR_STATE_CIR_LVL_X(x)                       (((x) & GENMASK(25, 4)) >> 4)
+#define QSYS_CIR_STATE_SHP_TIME(x)                        ((x) & GENMASK(3, 0))
+#define QSYS_CIR_STATE_SHP_TIME_M                         GENMASK(3, 0)
+
+#define QSYS_EIR_STATE_GSZ                                0x80
+
+#define QSYS_SE_STATE_GSZ                                 0x80
+
+#define QSYS_SE_STATE_SE_OUTP_LVL(x)                      (((x) << 1) & GENMASK(2, 1))
+#define QSYS_SE_STATE_SE_OUTP_LVL_M                       GENMASK(2, 1)
+#define QSYS_SE_STATE_SE_OUTP_LVL_X(x)                    (((x) & GENMASK(2, 1)) >> 1)
+#define QSYS_SE_STATE_SE_WAS_YEL                          BIT(0)
+
+#define QSYS_HSCH_MISC_CFG_SE_CONNECT_VLD                 BIT(8)
+#define QSYS_HSCH_MISC_CFG_FRM_ADJ(x)                     (((x) << 3) & GENMASK(7, 3))
+#define QSYS_HSCH_MISC_CFG_FRM_ADJ_M                      GENMASK(7, 3)
+#define QSYS_HSCH_MISC_CFG_FRM_ADJ_X(x)                   (((x) & GENMASK(7, 3)) >> 3)
+#define QSYS_HSCH_MISC_CFG_LEAK_DIS                       BIT(2)
+#define QSYS_HSCH_MISC_CFG_QSHP_EXC_ENA                   BIT(1)
+#define QSYS_HSCH_MISC_CFG_PFC_BYP_UPD                    BIT(0)
+
+#define QSYS_TAG_CONFIG_RSZ                               0x4
+
+#define QSYS_TAG_CONFIG_ENABLE                            BIT(0)
+#define QSYS_TAG_CONFIG_LINK_SPEED(x)                     (((x) << 4) & GENMASK(5, 4))
+#define QSYS_TAG_CONFIG_LINK_SPEED_M                      GENMASK(5, 4)
+#define QSYS_TAG_CONFIG_LINK_SPEED_X(x)                   (((x) & GENMASK(5, 4)) >> 4)
+#define QSYS_TAG_CONFIG_INIT_GATE_STATE(x)                (((x) << 8) & GENMASK(15, 8))
+#define QSYS_TAG_CONFIG_INIT_GATE_STATE_M                 GENMASK(15, 8)
+#define QSYS_TAG_CONFIG_INIT_GATE_STATE_X(x)              (((x) & GENMASK(15, 8)) >> 8)
+#define QSYS_TAG_CONFIG_SCH_TRAFFIC_QUEUES(x)             (((x) << 16) & GENMASK(23, 16))
+#define QSYS_TAG_CONFIG_SCH_TRAFFIC_QUEUES_M              GENMASK(23, 16)
+#define QSYS_TAG_CONFIG_SCH_TRAFFIC_QUEUES_X(x)           (((x) & GENMASK(23, 16)) >> 16)
+
+#define QSYS_TAS_PARAM_CFG_CTRL_PORT_NUM(x)               ((x) & GENMASK(7, 0))
+#define QSYS_TAS_PARAM_CFG_CTRL_PORT_NUM_M                GENMASK(7, 0)
+#define QSYS_TAS_PARAM_CFG_CTRL_ALWAYS_GUARD_BAND_SCH_Q   BIT(8)
+#define QSYS_TAS_PARAM_CFG_CTRL_CONFIG_CHANGE             BIT(16)
+
+#define QSYS_PORT_MAX_SDU_RSZ                             0x4
+
+#define QSYS_PARAM_CFG_REG_3_BASE_TIME_SEC_MSB(x)         ((x) & GENMASK(15, 0))
+#define QSYS_PARAM_CFG_REG_3_BASE_TIME_SEC_MSB_M          GENMASK(15, 0)
+#define QSYS_PARAM_CFG_REG_3_LIST_LENGTH(x)               (((x) << 16) & GENMASK(31, 16))
+#define QSYS_PARAM_CFG_REG_3_LIST_LENGTH_M                GENMASK(31, 16)
+#define QSYS_PARAM_CFG_REG_3_LIST_LENGTH_X(x)             (((x) & GENMASK(31, 16)) >> 16)
+
+#define QSYS_GCL_CFG_REG_1_GCL_ENTRY_NUM(x)               ((x) & GENMASK(5, 0))
+#define QSYS_GCL_CFG_REG_1_GCL_ENTRY_NUM_M                GENMASK(5, 0)
+#define QSYS_GCL_CFG_REG_1_GATE_STATE(x)                  (((x) << 8) & GENMASK(15, 8))
+#define QSYS_GCL_CFG_REG_1_GATE_STATE_M                   GENMASK(15, 8)
+#define QSYS_GCL_CFG_REG_1_GATE_STATE_X(x)                (((x) & GENMASK(15, 8)) >> 8)
+
+#define QSYS_PARAM_STATUS_REG_3_BASE_TIME_SEC_MSB(x)      ((x) & GENMASK(15, 0))
+#define QSYS_PARAM_STATUS_REG_3_BASE_TIME_SEC_MSB_M       GENMASK(15, 0)
+#define QSYS_PARAM_STATUS_REG_3_LIST_LENGTH(x)            (((x) << 16) & GENMASK(31, 16))
+#define QSYS_PARAM_STATUS_REG_3_LIST_LENGTH_M             GENMASK(31, 16)
+#define QSYS_PARAM_STATUS_REG_3_LIST_LENGTH_X(x)          (((x) & GENMASK(31, 16)) >> 16)
+
+#define QSYS_PARAM_STATUS_REG_8_CFG_CHG_TIME_SEC_MSB(x)   ((x) & GENMASK(15, 0))
+#define QSYS_PARAM_STATUS_REG_8_CFG_CHG_TIME_SEC_MSB_M    GENMASK(15, 0)
+#define QSYS_PARAM_STATUS_REG_8_OPER_GATE_STATE(x)        (((x) << 16) & GENMASK(23, 16))
+#define QSYS_PARAM_STATUS_REG_8_OPER_GATE_STATE_M         GENMASK(23, 16)
+#define QSYS_PARAM_STATUS_REG_8_OPER_GATE_STATE_X(x)      (((x) & GENMASK(23, 16)) >> 16)
+#define QSYS_PARAM_STATUS_REG_8_CONFIG_PENDING            BIT(24)
+
+#define QSYS_GCL_STATUS_REG_1_GCL_ENTRY_NUM(x)            ((x) & GENMASK(5, 0))
+#define QSYS_GCL_STATUS_REG_1_GCL_ENTRY_NUM_M             GENMASK(5, 0)
+#define QSYS_GCL_STATUS_REG_1_GATE_STATE(x)               (((x) << 8) & GENMASK(15, 8))
+#define QSYS_GCL_STATUS_REG_1_GATE_STATE_M                GENMASK(15, 8)
+#define QSYS_GCL_STATUS_REG_1_GATE_STATE_X(x)             (((x) & GENMASK(15, 8)) >> 8)
+
+#endif
diff --git a/drivers/net/ethernet/mscc/ocelot_regs.c b/drivers/net/ethernet/mscc/ocelot_regs.c
new file mode 100644 (file)
index 0000000..e334b40
--- /dev/null
@@ -0,0 +1,497 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/*
+ * Microsemi Ocelot Switch driver
+ *
+ * Copyright (c) 2017 Microsemi Corporation
+ */
+#include "ocelot.h"
+
+static const u32 ocelot_ana_regmap[] = {
+       REG(ANA_ADVLEARN,                  0x009000),
+       REG(ANA_VLANMASK,                  0x009004),
+       REG(ANA_PORT_B_DOMAIN,             0x009008),
+       REG(ANA_ANAGEFIL,                  0x00900c),
+       REG(ANA_ANEVENTS,                  0x009010),
+       REG(ANA_STORMLIMIT_BURST,          0x009014),
+       REG(ANA_STORMLIMIT_CFG,            0x009018),
+       REG(ANA_ISOLATED_PORTS,            0x009028),
+       REG(ANA_COMMUNITY_PORTS,           0x00902c),
+       REG(ANA_AUTOAGE,                   0x009030),
+       REG(ANA_MACTOPTIONS,               0x009034),
+       REG(ANA_LEARNDISC,                 0x009038),
+       REG(ANA_AGENCTRL,                  0x00903c),
+       REG(ANA_MIRRORPORTS,               0x009040),
+       REG(ANA_EMIRRORPORTS,              0x009044),
+       REG(ANA_FLOODING,                  0x009048),
+       REG(ANA_FLOODING_IPMC,             0x00904c),
+       REG(ANA_SFLOW_CFG,                 0x009050),
+       REG(ANA_PORT_MODE,                 0x009080),
+       REG(ANA_PGID_PGID,                 0x008c00),
+       REG(ANA_TABLES_ANMOVED,            0x008b30),
+       REG(ANA_TABLES_MACHDATA,           0x008b34),
+       REG(ANA_TABLES_MACLDATA,           0x008b38),
+       REG(ANA_TABLES_MACACCESS,          0x008b3c),
+       REG(ANA_TABLES_MACTINDX,           0x008b40),
+       REG(ANA_TABLES_VLANACCESS,         0x008b44),
+       REG(ANA_TABLES_VLANTIDX,           0x008b48),
+       REG(ANA_TABLES_ISDXACCESS,         0x008b4c),
+       REG(ANA_TABLES_ISDXTIDX,           0x008b50),
+       REG(ANA_TABLES_ENTRYLIM,           0x008b00),
+       REG(ANA_TABLES_PTP_ID_HIGH,        0x008b54),
+       REG(ANA_TABLES_PTP_ID_LOW,         0x008b58),
+       REG(ANA_MSTI_STATE,                0x008e00),
+       REG(ANA_PORT_VLAN_CFG,             0x007000),
+       REG(ANA_PORT_DROP_CFG,             0x007004),
+       REG(ANA_PORT_QOS_CFG,              0x007008),
+       REG(ANA_PORT_VCAP_CFG,             0x00700c),
+       REG(ANA_PORT_VCAP_S1_KEY_CFG,      0x007010),
+       REG(ANA_PORT_VCAP_S2_CFG,          0x00701c),
+       REG(ANA_PORT_PCP_DEI_MAP,          0x007020),
+       REG(ANA_PORT_CPU_FWD_CFG,          0x007060),
+       REG(ANA_PORT_CPU_FWD_BPDU_CFG,     0x007064),
+       REG(ANA_PORT_CPU_FWD_GARP_CFG,     0x007068),
+       REG(ANA_PORT_CPU_FWD_CCM_CFG,      0x00706c),
+       REG(ANA_PORT_PORT_CFG,             0x007070),
+       REG(ANA_PORT_POL_CFG,              0x007074),
+       REG(ANA_PORT_PTP_CFG,              0x007078),
+       REG(ANA_PORT_PTP_DLY1_CFG,         0x00707c),
+       REG(ANA_OAM_UPM_LM_CNT,            0x007c00),
+       REG(ANA_PORT_PTP_DLY2_CFG,         0x007080),
+       REG(ANA_PFC_PFC_CFG,               0x008800),
+       REG(ANA_PFC_PFC_TIMER,             0x008804),
+       REG(ANA_IPT_OAM_MEP_CFG,           0x008000),
+       REG(ANA_IPT_IPT,                   0x008004),
+       REG(ANA_PPT_PPT,                   0x008ac0),
+       REG(ANA_FID_MAP_FID_MAP,           0x000000),
+       REG(ANA_AGGR_CFG,                  0x0090b4),
+       REG(ANA_CPUQ_CFG,                  0x0090b8),
+       REG(ANA_CPUQ_CFG2,                 0x0090bc),
+       REG(ANA_CPUQ_8021_CFG,             0x0090c0),
+       REG(ANA_DSCP_CFG,                  0x009100),
+       REG(ANA_DSCP_REWR_CFG,             0x009200),
+       REG(ANA_VCAP_RNG_TYPE_CFG,         0x009240),
+       REG(ANA_VCAP_RNG_VAL_CFG,          0x009260),
+       REG(ANA_VRAP_CFG,                  0x009280),
+       REG(ANA_VRAP_HDR_DATA,             0x009284),
+       REG(ANA_VRAP_HDR_MASK,             0x009288),
+       REG(ANA_DISCARD_CFG,               0x00928c),
+       REG(ANA_FID_CFG,                   0x009290),
+       REG(ANA_POL_PIR_CFG,               0x004000),
+       REG(ANA_POL_CIR_CFG,               0x004004),
+       REG(ANA_POL_MODE_CFG,              0x004008),
+       REG(ANA_POL_PIR_STATE,             0x00400c),
+       REG(ANA_POL_CIR_STATE,             0x004010),
+       REG(ANA_POL_STATE,                 0x004014),
+       REG(ANA_POL_FLOWC,                 0x008b80),
+       REG(ANA_POL_HYST,                  0x008bec),
+       REG(ANA_POL_MISC_CFG,              0x008bf0),
+};
+
+static const u32 ocelot_qs_regmap[] = {
+       REG(QS_XTR_GRP_CFG,                0x000000),
+       REG(QS_XTR_RD,                     0x000008),
+       REG(QS_XTR_FRM_PRUNING,            0x000010),
+       REG(QS_XTR_FLUSH,                  0x000018),
+       REG(QS_XTR_DATA_PRESENT,           0x00001c),
+       REG(QS_XTR_CFG,                    0x000020),
+       REG(QS_INJ_GRP_CFG,                0x000024),
+       REG(QS_INJ_WR,                     0x00002c),
+       REG(QS_INJ_CTRL,                   0x000034),
+       REG(QS_INJ_STATUS,                 0x00003c),
+       REG(QS_INJ_ERR,                    0x000040),
+       REG(QS_INH_DBG,                    0x000048),
+};
+
+static const u32 ocelot_hsio_regmap[] = {
+       REG(HSIO_PLL5G_CFG0,               0x000000),
+       REG(HSIO_PLL5G_CFG1,               0x000004),
+       REG(HSIO_PLL5G_CFG2,               0x000008),
+       REG(HSIO_PLL5G_CFG3,               0x00000c),
+       REG(HSIO_PLL5G_CFG4,               0x000010),
+       REG(HSIO_PLL5G_CFG5,               0x000014),
+       REG(HSIO_PLL5G_CFG6,               0x000018),
+       REG(HSIO_PLL5G_STATUS0,            0x00001c),
+       REG(HSIO_PLL5G_STATUS1,            0x000020),
+       REG(HSIO_PLL5G_BIST_CFG0,          0x000024),
+       REG(HSIO_PLL5G_BIST_CFG1,          0x000028),
+       REG(HSIO_PLL5G_BIST_CFG2,          0x00002c),
+       REG(HSIO_PLL5G_BIST_STAT0,         0x000030),
+       REG(HSIO_PLL5G_BIST_STAT1,         0x000034),
+       REG(HSIO_RCOMP_CFG0,               0x000038),
+       REG(HSIO_RCOMP_STATUS,             0x00003c),
+       REG(HSIO_SYNC_ETH_CFG,             0x000040),
+       REG(HSIO_SYNC_ETH_PLL_CFG,         0x000048),
+       REG(HSIO_S1G_DES_CFG,              0x00004c),
+       REG(HSIO_S1G_IB_CFG,               0x000050),
+       REG(HSIO_S1G_OB_CFG,               0x000054),
+       REG(HSIO_S1G_SER_CFG,              0x000058),
+       REG(HSIO_S1G_COMMON_CFG,           0x00005c),
+       REG(HSIO_S1G_PLL_CFG,              0x000060),
+       REG(HSIO_S1G_PLL_STATUS,           0x000064),
+       REG(HSIO_S1G_DFT_CFG0,             0x000068),
+       REG(HSIO_S1G_DFT_CFG1,             0x00006c),
+       REG(HSIO_S1G_DFT_CFG2,             0x000070),
+       REG(HSIO_S1G_TP_CFG,               0x000074),
+       REG(HSIO_S1G_RC_PLL_BIST_CFG,      0x000078),
+       REG(HSIO_S1G_MISC_CFG,             0x00007c),
+       REG(HSIO_S1G_DFT_STATUS,           0x000080),
+       REG(HSIO_S1G_MISC_STATUS,          0x000084),
+       REG(HSIO_MCB_S1G_ADDR_CFG,         0x000088),
+       REG(HSIO_S6G_DIG_CFG,              0x00008c),
+       REG(HSIO_S6G_DFT_CFG0,             0x000090),
+       REG(HSIO_S6G_DFT_CFG1,             0x000094),
+       REG(HSIO_S6G_DFT_CFG2,             0x000098),
+       REG(HSIO_S6G_TP_CFG0,              0x00009c),
+       REG(HSIO_S6G_TP_CFG1,              0x0000a0),
+       REG(HSIO_S6G_RC_PLL_BIST_CFG,      0x0000a4),
+       REG(HSIO_S6G_MISC_CFG,             0x0000a8),
+       REG(HSIO_S6G_OB_ANEG_CFG,          0x0000ac),
+       REG(HSIO_S6G_DFT_STATUS,           0x0000b0),
+       REG(HSIO_S6G_ERR_CNT,              0x0000b4),
+       REG(HSIO_S6G_MISC_STATUS,          0x0000b8),
+       REG(HSIO_S6G_DES_CFG,              0x0000bc),
+       REG(HSIO_S6G_IB_CFG,               0x0000c0),
+       REG(HSIO_S6G_IB_CFG1,              0x0000c4),
+       REG(HSIO_S6G_IB_CFG2,              0x0000c8),
+       REG(HSIO_S6G_IB_CFG3,              0x0000cc),
+       REG(HSIO_S6G_IB_CFG4,              0x0000d0),
+       REG(HSIO_S6G_IB_CFG5,              0x0000d4),
+       REG(HSIO_S6G_OB_CFG,               0x0000d8),
+       REG(HSIO_S6G_OB_CFG1,              0x0000dc),
+       REG(HSIO_S6G_SER_CFG,              0x0000e0),
+       REG(HSIO_S6G_COMMON_CFG,           0x0000e4),
+       REG(HSIO_S6G_PLL_CFG,              0x0000e8),
+       REG(HSIO_S6G_ACJTAG_CFG,           0x0000ec),
+       REG(HSIO_S6G_GP_CFG,               0x0000f0),
+       REG(HSIO_S6G_IB_STATUS0,           0x0000f4),
+       REG(HSIO_S6G_IB_STATUS1,           0x0000f8),
+       REG(HSIO_S6G_ACJTAG_STATUS,        0x0000fc),
+       REG(HSIO_S6G_PLL_STATUS,           0x000100),
+       REG(HSIO_S6G_REVID,                0x000104),
+       REG(HSIO_MCB_S6G_ADDR_CFG,         0x000108),
+       REG(HSIO_HW_CFG,                   0x00010c),
+       REG(HSIO_HW_QSGMII_CFG,            0x000110),
+       REG(HSIO_HW_QSGMII_STAT,           0x000114),
+       REG(HSIO_CLK_CFG,                  0x000118),
+       REG(HSIO_TEMP_SENSOR_CTRL,         0x00011c),
+       REG(HSIO_TEMP_SENSOR_CFG,          0x000120),
+       REG(HSIO_TEMP_SENSOR_STAT,         0x000124),
+};
+
+static const u32 ocelot_qsys_regmap[] = {
+       REG(QSYS_PORT_MODE,                0x011200),
+       REG(QSYS_SWITCH_PORT_MODE,         0x011234),
+       REG(QSYS_STAT_CNT_CFG,             0x011264),
+       REG(QSYS_EEE_CFG,                  0x011268),
+       REG(QSYS_EEE_THRES,                0x011294),
+       REG(QSYS_IGR_NO_SHARING,           0x011298),
+       REG(QSYS_EGR_NO_SHARING,           0x01129c),
+       REG(QSYS_SW_STATUS,                0x0112a0),
+       REG(QSYS_EXT_CPU_CFG,              0x0112d0),
+       REG(QSYS_PAD_CFG,                  0x0112d4),
+       REG(QSYS_CPU_GROUP_MAP,            0x0112d8),
+       REG(QSYS_QMAP,                     0x0112dc),
+       REG(QSYS_ISDX_SGRP,                0x011400),
+       REG(QSYS_TIMED_FRAME_ENTRY,        0x014000),
+       REG(QSYS_TFRM_MISC,                0x011310),
+       REG(QSYS_TFRM_PORT_DLY,            0x011314),
+       REG(QSYS_TFRM_TIMER_CFG_1,         0x011318),
+       REG(QSYS_TFRM_TIMER_CFG_2,         0x01131c),
+       REG(QSYS_TFRM_TIMER_CFG_3,         0x011320),
+       REG(QSYS_TFRM_TIMER_CFG_4,         0x011324),
+       REG(QSYS_TFRM_TIMER_CFG_5,         0x011328),
+       REG(QSYS_TFRM_TIMER_CFG_6,         0x01132c),
+       REG(QSYS_TFRM_TIMER_CFG_7,         0x011330),
+       REG(QSYS_TFRM_TIMER_CFG_8,         0x011334),
+       REG(QSYS_RED_PROFILE,              0x011338),
+       REG(QSYS_RES_QOS_MODE,             0x011378),
+       REG(QSYS_RES_CFG,                  0x012000),
+       REG(QSYS_RES_STAT,                 0x012004),
+       REG(QSYS_EGR_DROP_MODE,            0x01137c),
+       REG(QSYS_EQ_CTRL,                  0x011380),
+       REG(QSYS_EVENTS_CORE,              0x011384),
+       REG(QSYS_CIR_CFG,                  0x000000),
+       REG(QSYS_EIR_CFG,                  0x000004),
+       REG(QSYS_SE_CFG,                   0x000008),
+       REG(QSYS_SE_DWRR_CFG,              0x00000c),
+       REG(QSYS_SE_CONNECT,               0x00003c),
+       REG(QSYS_SE_DLB_SENSE,             0x000040),
+       REG(QSYS_CIR_STATE,                0x000044),
+       REG(QSYS_EIR_STATE,                0x000048),
+       REG(QSYS_SE_STATE,                 0x00004c),
+       REG(QSYS_HSCH_MISC_CFG,            0x011388),
+};
+
+static const u32 ocelot_rew_regmap[] = {
+       REG(REW_PORT_VLAN_CFG,             0x000000),
+       REG(REW_TAG_CFG,                   0x000004),
+       REG(REW_PORT_CFG,                  0x000008),
+       REG(REW_DSCP_CFG,                  0x00000c),
+       REG(REW_PCP_DEI_QOS_MAP_CFG,       0x000010),
+       REG(REW_PTP_CFG,                   0x000050),
+       REG(REW_PTP_DLY1_CFG,              0x000054),
+       REG(REW_DSCP_REMAP_DP1_CFG,        0x000690),
+       REG(REW_DSCP_REMAP_CFG,            0x000790),
+       REG(REW_STAT_CFG,                  0x000890),
+       REG(REW_PPT,                       0x000680),
+};
+
+static const u32 ocelot_sys_regmap[] = {
+       REG(SYS_COUNT_RX_OCTETS,           0x000000),
+       REG(SYS_COUNT_RX_UNICAST,          0x000004),
+       REG(SYS_COUNT_RX_MULTICAST,        0x000008),
+       REG(SYS_COUNT_RX_BROADCAST,        0x00000c),
+       REG(SYS_COUNT_RX_SHORTS,           0x000010),
+       REG(SYS_COUNT_RX_FRAGMENTS,        0x000014),
+       REG(SYS_COUNT_RX_JABBERS,          0x000018),
+       REG(SYS_COUNT_RX_CRC_ALIGN_ERRS,   0x00001c),
+       REG(SYS_COUNT_RX_SYM_ERRS,         0x000020),
+       REG(SYS_COUNT_RX_64,               0x000024),
+       REG(SYS_COUNT_RX_65_127,           0x000028),
+       REG(SYS_COUNT_RX_128_255,          0x00002c),
+       REG(SYS_COUNT_RX_256_1023,         0x000030),
+       REG(SYS_COUNT_RX_1024_1526,        0x000034),
+       REG(SYS_COUNT_RX_1527_MAX,         0x000038),
+       REG(SYS_COUNT_RX_PAUSE,            0x00003c),
+       REG(SYS_COUNT_RX_CONTROL,          0x000040),
+       REG(SYS_COUNT_RX_LONGS,            0x000044),
+       REG(SYS_COUNT_RX_CLASSIFIED_DROPS, 0x000048),
+       REG(SYS_COUNT_TX_OCTETS,           0x000100),
+       REG(SYS_COUNT_TX_UNICAST,          0x000104),
+       REG(SYS_COUNT_TX_MULTICAST,        0x000108),
+       REG(SYS_COUNT_TX_BROADCAST,        0x00010c),
+       REG(SYS_COUNT_TX_COLLISION,        0x000110),
+       REG(SYS_COUNT_TX_DROPS,            0x000114),
+       REG(SYS_COUNT_TX_PAUSE,            0x000118),
+       REG(SYS_COUNT_TX_64,               0x00011c),
+       REG(SYS_COUNT_TX_65_127,           0x000120),
+       REG(SYS_COUNT_TX_128_511,          0x000124),
+       REG(SYS_COUNT_TX_512_1023,         0x000128),
+       REG(SYS_COUNT_TX_1024_1526,        0x00012c),
+       REG(SYS_COUNT_TX_1527_MAX,         0x000130),
+       REG(SYS_COUNT_TX_AGING,            0x000170),
+       REG(SYS_RESET_CFG,                 0x000508),
+       REG(SYS_CMID,                      0x00050c),
+       REG(SYS_VLAN_ETYPE_CFG,            0x000510),
+       REG(SYS_PORT_MODE,                 0x000514),
+       REG(SYS_FRONT_PORT_MODE,           0x000548),
+       REG(SYS_FRM_AGING,                 0x000574),
+       REG(SYS_STAT_CFG,                  0x000578),
+       REG(SYS_SW_STATUS,                 0x00057c),
+       REG(SYS_MISC_CFG,                  0x0005ac),
+       REG(SYS_REW_MAC_HIGH_CFG,          0x0005b0),
+       REG(SYS_REW_MAC_LOW_CFG,           0x0005dc),
+       REG(SYS_CM_ADDR,                   0x000500),
+       REG(SYS_CM_DATA,                   0x000504),
+       REG(SYS_PAUSE_CFG,                 0x000608),
+       REG(SYS_PAUSE_TOT_CFG,             0x000638),
+       REG(SYS_ATOP,                      0x00063c),
+       REG(SYS_ATOP_TOT_CFG,              0x00066c),
+       REG(SYS_MAC_FC_CFG,                0x000670),
+       REG(SYS_MMGT,                      0x00069c),
+       REG(SYS_MMGT_FAST,                 0x0006a0),
+       REG(SYS_EVENTS_DIF,                0x0006a4),
+       REG(SYS_EVENTS_CORE,               0x0006b4),
+       REG(SYS_CNT,                       0x000000),
+       REG(SYS_PTP_STATUS,                0x0006b8),
+       REG(SYS_PTP_TXSTAMP,               0x0006bc),
+       REG(SYS_PTP_NXT,                   0x0006c0),
+       REG(SYS_PTP_CFG,                   0x0006c4),
+};
+
+static const u32 *ocelot_regmap[] = {
+       [ANA] = ocelot_ana_regmap,
+       [QS] = ocelot_qs_regmap,
+       [HSIO] = ocelot_hsio_regmap,
+       [QSYS] = ocelot_qsys_regmap,
+       [REW] = ocelot_rew_regmap,
+       [SYS] = ocelot_sys_regmap,
+};
+
+static const struct reg_field ocelot_regfields[] = {
+       [ANA_ADVLEARN_VLAN_CHK] = REG_FIELD(ANA_ADVLEARN, 11, 11),
+       [ANA_ADVLEARN_LEARN_MIRROR] = REG_FIELD(ANA_ADVLEARN, 0, 10),
+       [ANA_ANEVENTS_MSTI_DROP] = REG_FIELD(ANA_ANEVENTS, 27, 27),
+       [ANA_ANEVENTS_ACLKILL] = REG_FIELD(ANA_ANEVENTS, 26, 26),
+       [ANA_ANEVENTS_ACLUSED] = REG_FIELD(ANA_ANEVENTS, 25, 25),
+       [ANA_ANEVENTS_AUTOAGE] = REG_FIELD(ANA_ANEVENTS, 24, 24),
+       [ANA_ANEVENTS_VS2TTL1] = REG_FIELD(ANA_ANEVENTS, 23, 23),
+       [ANA_ANEVENTS_STORM_DROP] = REG_FIELD(ANA_ANEVENTS, 22, 22),
+       [ANA_ANEVENTS_LEARN_DROP] = REG_FIELD(ANA_ANEVENTS, 21, 21),
+       [ANA_ANEVENTS_AGED_ENTRY] = REG_FIELD(ANA_ANEVENTS, 20, 20),
+       [ANA_ANEVENTS_CPU_LEARN_FAILED] = REG_FIELD(ANA_ANEVENTS, 19, 19),
+       [ANA_ANEVENTS_AUTO_LEARN_FAILED] = REG_FIELD(ANA_ANEVENTS, 18, 18),
+       [ANA_ANEVENTS_LEARN_REMOVE] = REG_FIELD(ANA_ANEVENTS, 17, 17),
+       [ANA_ANEVENTS_AUTO_LEARNED] = REG_FIELD(ANA_ANEVENTS, 16, 16),
+       [ANA_ANEVENTS_AUTO_MOVED] = REG_FIELD(ANA_ANEVENTS, 15, 15),
+       [ANA_ANEVENTS_DROPPED] = REG_FIELD(ANA_ANEVENTS, 14, 14),
+       [ANA_ANEVENTS_CLASSIFIED_DROP] = REG_FIELD(ANA_ANEVENTS, 13, 13),
+       [ANA_ANEVENTS_CLASSIFIED_COPY] = REG_FIELD(ANA_ANEVENTS, 12, 12),
+       [ANA_ANEVENTS_VLAN_DISCARD] = REG_FIELD(ANA_ANEVENTS, 11, 11),
+       [ANA_ANEVENTS_FWD_DISCARD] = REG_FIELD(ANA_ANEVENTS, 10, 10),
+       [ANA_ANEVENTS_MULTICAST_FLOOD] = REG_FIELD(ANA_ANEVENTS, 9, 9),
+       [ANA_ANEVENTS_UNICAST_FLOOD] = REG_FIELD(ANA_ANEVENTS, 8, 8),
+       [ANA_ANEVENTS_DEST_KNOWN] = REG_FIELD(ANA_ANEVENTS, 7, 7),
+       [ANA_ANEVENTS_BUCKET3_MATCH] = REG_FIELD(ANA_ANEVENTS, 6, 6),
+       [ANA_ANEVENTS_BUCKET2_MATCH] = REG_FIELD(ANA_ANEVENTS, 5, 5),
+       [ANA_ANEVENTS_BUCKET1_MATCH] = REG_FIELD(ANA_ANEVENTS, 4, 4),
+       [ANA_ANEVENTS_BUCKET0_MATCH] = REG_FIELD(ANA_ANEVENTS, 3, 3),
+       [ANA_ANEVENTS_CPU_OPERATION] = REG_FIELD(ANA_ANEVENTS, 2, 2),
+       [ANA_ANEVENTS_DMAC_LOOKUP] = REG_FIELD(ANA_ANEVENTS, 1, 1),
+       [ANA_ANEVENTS_SMAC_LOOKUP] = REG_FIELD(ANA_ANEVENTS, 0, 0),
+       [ANA_TABLES_MACACCESS_B_DOM] = REG_FIELD(ANA_TABLES_MACACCESS, 18, 18),
+       [ANA_TABLES_MACTINDX_BUCKET] = REG_FIELD(ANA_TABLES_MACTINDX, 10, 11),
+       [ANA_TABLES_MACTINDX_M_INDEX] = REG_FIELD(ANA_TABLES_MACTINDX, 0, 9),
+       [QSYS_TIMED_FRAME_ENTRY_TFRM_VLD] = REG_FIELD(QSYS_TIMED_FRAME_ENTRY, 20, 20),
+       [QSYS_TIMED_FRAME_ENTRY_TFRM_FP] = REG_FIELD(QSYS_TIMED_FRAME_ENTRY, 8, 19),
+       [QSYS_TIMED_FRAME_ENTRY_TFRM_PORTNO] = REG_FIELD(QSYS_TIMED_FRAME_ENTRY, 4, 7),
+       [QSYS_TIMED_FRAME_ENTRY_TFRM_TM_SEL] = REG_FIELD(QSYS_TIMED_FRAME_ENTRY, 1, 3),
+       [QSYS_TIMED_FRAME_ENTRY_TFRM_TM_T] = REG_FIELD(QSYS_TIMED_FRAME_ENTRY, 0, 0),
+       [SYS_RESET_CFG_CORE_ENA] = REG_FIELD(SYS_RESET_CFG, 2, 2),
+       [SYS_RESET_CFG_MEM_ENA] = REG_FIELD(SYS_RESET_CFG, 1, 1),
+       [SYS_RESET_CFG_MEM_INIT] = REG_FIELD(SYS_RESET_CFG, 0, 0),
+};
+
+static const struct ocelot_stat_layout ocelot_stats_layout[] = {
+       { .name = "rx_octets", .offset = 0x00, },
+       { .name = "rx_unicast", .offset = 0x01, },
+       { .name = "rx_multicast", .offset = 0x02, },
+       { .name = "rx_broadcast", .offset = 0x03, },
+       { .name = "rx_shorts", .offset = 0x04, },
+       { .name = "rx_fragments", .offset = 0x05, },
+       { .name = "rx_jabbers", .offset = 0x06, },
+       { .name = "rx_crc_align_errs", .offset = 0x07, },
+       { .name = "rx_sym_errs", .offset = 0x08, },
+       { .name = "rx_frames_below_65_octets", .offset = 0x09, },
+       { .name = "rx_frames_65_to_127_octets", .offset = 0x0A, },
+       { .name = "rx_frames_128_to_255_octets", .offset = 0x0B, },
+       { .name = "rx_frames_256_to_511_octets", .offset = 0x0C, },
+       { .name = "rx_frames_512_to_1023_octets", .offset = 0x0D, },
+       { .name = "rx_frames_1024_to_1526_octets", .offset = 0x0E, },
+       { .name = "rx_frames_over_1526_octets", .offset = 0x0F, },
+       { .name = "rx_pause", .offset = 0x10, },
+       { .name = "rx_control", .offset = 0x11, },
+       { .name = "rx_longs", .offset = 0x12, },
+       { .name = "rx_classified_drops", .offset = 0x13, },
+       { .name = "rx_red_prio_0", .offset = 0x14, },
+       { .name = "rx_red_prio_1", .offset = 0x15, },
+       { .name = "rx_red_prio_2", .offset = 0x16, },
+       { .name = "rx_red_prio_3", .offset = 0x17, },
+       { .name = "rx_red_prio_4", .offset = 0x18, },
+       { .name = "rx_red_prio_5", .offset = 0x19, },
+       { .name = "rx_red_prio_6", .offset = 0x1A, },
+       { .name = "rx_red_prio_7", .offset = 0x1B, },
+       { .name = "rx_yellow_prio_0", .offset = 0x1C, },
+       { .name = "rx_yellow_prio_1", .offset = 0x1D, },
+       { .name = "rx_yellow_prio_2", .offset = 0x1E, },
+       { .name = "rx_yellow_prio_3", .offset = 0x1F, },
+       { .name = "rx_yellow_prio_4", .offset = 0x20, },
+       { .name = "rx_yellow_prio_5", .offset = 0x21, },
+       { .name = "rx_yellow_prio_6", .offset = 0x22, },
+       { .name = "rx_yellow_prio_7", .offset = 0x23, },
+       { .name = "rx_green_prio_0", .offset = 0x24, },
+       { .name = "rx_green_prio_1", .offset = 0x25, },
+       { .name = "rx_green_prio_2", .offset = 0x26, },
+       { .name = "rx_green_prio_3", .offset = 0x27, },
+       { .name = "rx_green_prio_4", .offset = 0x28, },
+       { .name = "rx_green_prio_5", .offset = 0x29, },
+       { .name = "rx_green_prio_6", .offset = 0x2A, },
+       { .name = "rx_green_prio_7", .offset = 0x2B, },
+       { .name = "tx_octets", .offset = 0x40, },
+       { .name = "tx_unicast", .offset = 0x41, },
+       { .name = "tx_multicast", .offset = 0x42, },
+       { .name = "tx_broadcast", .offset = 0x43, },
+       { .name = "tx_collision", .offset = 0x44, },
+       { .name = "tx_drops", .offset = 0x45, },
+       { .name = "tx_pause", .offset = 0x46, },
+       { .name = "tx_frames_below_65_octets", .offset = 0x47, },
+       { .name = "tx_frames_65_to_127_octets", .offset = 0x48, },
+       { .name = "tx_frames_128_255_octets", .offset = 0x49, },
+       { .name = "tx_frames_256_511_octets", .offset = 0x4A, },
+       { .name = "tx_frames_512_1023_octets", .offset = 0x4B, },
+       { .name = "tx_frames_1024_1526_octets", .offset = 0x4C, },
+       { .name = "tx_frames_over_1526_octets", .offset = 0x4D, },
+       { .name = "tx_yellow_prio_0", .offset = 0x4E, },
+       { .name = "tx_yellow_prio_1", .offset = 0x4F, },
+       { .name = "tx_yellow_prio_2", .offset = 0x50, },
+       { .name = "tx_yellow_prio_3", .offset = 0x51, },
+       { .name = "tx_yellow_prio_4", .offset = 0x52, },
+       { .name = "tx_yellow_prio_5", .offset = 0x53, },
+       { .name = "tx_yellow_prio_6", .offset = 0x54, },
+       { .name = "tx_yellow_prio_7", .offset = 0x55, },
+       { .name = "tx_green_prio_0", .offset = 0x56, },
+       { .name = "tx_green_prio_1", .offset = 0x57, },
+       { .name = "tx_green_prio_2", .offset = 0x58, },
+       { .name = "tx_green_prio_3", .offset = 0x59, },
+       { .name = "tx_green_prio_4", .offset = 0x5A, },
+       { .name = "tx_green_prio_5", .offset = 0x5B, },
+       { .name = "tx_green_prio_6", .offset = 0x5C, },
+       { .name = "tx_green_prio_7", .offset = 0x5D, },
+       { .name = "tx_aged", .offset = 0x5E, },
+       { .name = "drop_local", .offset = 0x80, },
+       { .name = "drop_tail", .offset = 0x81, },
+       { .name = "drop_yellow_prio_0", .offset = 0x82, },
+       { .name = "drop_yellow_prio_1", .offset = 0x83, },
+       { .name = "drop_yellow_prio_2", .offset = 0x84, },
+       { .name = "drop_yellow_prio_3", .offset = 0x85, },
+       { .name = "drop_yellow_prio_4", .offset = 0x86, },
+       { .name = "drop_yellow_prio_5", .offset = 0x87, },
+       { .name = "drop_yellow_prio_6", .offset = 0x88, },
+       { .name = "drop_yellow_prio_7", .offset = 0x89, },
+       { .name = "drop_green_prio_0", .offset = 0x8A, },
+       { .name = "drop_green_prio_1", .offset = 0x8B, },
+       { .name = "drop_green_prio_2", .offset = 0x8C, },
+       { .name = "drop_green_prio_3", .offset = 0x8D, },
+       { .name = "drop_green_prio_4", .offset = 0x8E, },
+       { .name = "drop_green_prio_5", .offset = 0x8F, },
+       { .name = "drop_green_prio_6", .offset = 0x90, },
+       { .name = "drop_green_prio_7", .offset = 0x91, },
+};
+
+static void ocelot_pll5_init(struct ocelot *ocelot)
+{
+       /* Configure PLL5. This will need a proper CCF driver
+        * The values are coming from the VTSS API for Ocelot
+        */
+       ocelot_write(ocelot, HSIO_PLL5G_CFG4_IB_CTRL(0x7600) |
+                    HSIO_PLL5G_CFG4_IB_BIAS_CTRL(0x8), HSIO_PLL5G_CFG4);
+       ocelot_write(ocelot, HSIO_PLL5G_CFG0_CORE_CLK_DIV(0x11) |
+                    HSIO_PLL5G_CFG0_CPU_CLK_DIV(2) |
+                    HSIO_PLL5G_CFG0_ENA_BIAS |
+                    HSIO_PLL5G_CFG0_ENA_VCO_BUF |
+                    HSIO_PLL5G_CFG0_ENA_CP1 |
+                    HSIO_PLL5G_CFG0_SELCPI(2) |
+                    HSIO_PLL5G_CFG0_LOOP_BW_RES(0xe) |
+                    HSIO_PLL5G_CFG0_SELBGV820(4) |
+                    HSIO_PLL5G_CFG0_DIV4 |
+                    HSIO_PLL5G_CFG0_ENA_CLKTREE |
+                    HSIO_PLL5G_CFG0_ENA_LANE, HSIO_PLL5G_CFG0);
+       ocelot_write(ocelot, HSIO_PLL5G_CFG2_EN_RESET_FRQ_DET |
+                    HSIO_PLL5G_CFG2_EN_RESET_OVERRUN |
+                    HSIO_PLL5G_CFG2_GAIN_TEST(0x8) |
+                    HSIO_PLL5G_CFG2_ENA_AMPCTRL |
+                    HSIO_PLL5G_CFG2_PWD_AMPCTRL_N |
+                    HSIO_PLL5G_CFG2_AMPC_SEL(0x10), HSIO_PLL5G_CFG2);
+}
+
+int ocelot_chip_init(struct ocelot *ocelot)
+{
+       int ret;
+
+       ocelot->map = ocelot_regmap;
+       ocelot->stats_layout = ocelot_stats_layout;
+       ocelot->num_stats = ARRAY_SIZE(ocelot_stats_layout);
+       ocelot->shared_queue_sz = 224 * 1024;
+
+       ret = ocelot_regfields_init(ocelot, ocelot_regfields);
+       if (ret)
+               return ret;
+
+       ocelot_pll5_init(ocelot);
+
+       eth_random_addr(ocelot->base_mac);
+       ocelot->base_mac[5] &= 0xf0;
+
+       return 0;
+}
+EXPORT_SYMBOL(ocelot_chip_init);
diff --git a/drivers/net/ethernet/mscc/ocelot_rew.h b/drivers/net/ethernet/mscc/ocelot_rew.h
new file mode 100644 (file)
index 0000000..210914b
--- /dev/null
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */
+/*
+ * Microsemi Ocelot Switch driver
+ *
+ * Copyright (c) 2017 Microsemi Corporation
+ */
+
+#ifndef _MSCC_OCELOT_REW_H_
+#define _MSCC_OCELOT_REW_H_
+
+#define REW_PORT_VLAN_CFG_GSZ                             0x80
+
+#define REW_PORT_VLAN_CFG_PORT_TPID(x)                    (((x) << 16) & GENMASK(31, 16))
+#define REW_PORT_VLAN_CFG_PORT_TPID_M                     GENMASK(31, 16)
+#define REW_PORT_VLAN_CFG_PORT_TPID_X(x)                  (((x) & GENMASK(31, 16)) >> 16)
+#define REW_PORT_VLAN_CFG_PORT_DEI                        BIT(15)
+#define REW_PORT_VLAN_CFG_PORT_PCP(x)                     (((x) << 12) & GENMASK(14, 12))
+#define REW_PORT_VLAN_CFG_PORT_PCP_M                      GENMASK(14, 12)
+#define REW_PORT_VLAN_CFG_PORT_PCP_X(x)                   (((x) & GENMASK(14, 12)) >> 12)
+#define REW_PORT_VLAN_CFG_PORT_VID(x)                     ((x) & GENMASK(11, 0))
+#define REW_PORT_VLAN_CFG_PORT_VID_M                      GENMASK(11, 0)
+
+#define REW_TAG_CFG_GSZ                                   0x80
+
+#define REW_TAG_CFG_TAG_CFG(x)                            (((x) << 7) & GENMASK(8, 7))
+#define REW_TAG_CFG_TAG_CFG_M                             GENMASK(8, 7)
+#define REW_TAG_CFG_TAG_CFG_X(x)                          (((x) & GENMASK(8, 7)) >> 7)
+#define REW_TAG_CFG_TAG_TPID_CFG(x)                       (((x) << 5) & GENMASK(6, 5))
+#define REW_TAG_CFG_TAG_TPID_CFG_M                        GENMASK(6, 5)
+#define REW_TAG_CFG_TAG_TPID_CFG_X(x)                     (((x) & GENMASK(6, 5)) >> 5)
+#define REW_TAG_CFG_TAG_VID_CFG                           BIT(4)
+#define REW_TAG_CFG_TAG_PCP_CFG(x)                        (((x) << 2) & GENMASK(3, 2))
+#define REW_TAG_CFG_TAG_PCP_CFG_M                         GENMASK(3, 2)
+#define REW_TAG_CFG_TAG_PCP_CFG_X(x)                      (((x) & GENMASK(3, 2)) >> 2)
+#define REW_TAG_CFG_TAG_DEI_CFG(x)                        ((x) & GENMASK(1, 0))
+#define REW_TAG_CFG_TAG_DEI_CFG_M                         GENMASK(1, 0)
+
+#define REW_PORT_CFG_GSZ                                  0x80
+
+#define REW_PORT_CFG_ES0_EN                               BIT(5)
+#define REW_PORT_CFG_FCS_UPDATE_NONCPU_CFG(x)             (((x) << 3) & GENMASK(4, 3))
+#define REW_PORT_CFG_FCS_UPDATE_NONCPU_CFG_M              GENMASK(4, 3)
+#define REW_PORT_CFG_FCS_UPDATE_NONCPU_CFG_X(x)           (((x) & GENMASK(4, 3)) >> 3)
+#define REW_PORT_CFG_FCS_UPDATE_CPU_ENA                   BIT(2)
+#define REW_PORT_CFG_FLUSH_ENA                            BIT(1)
+#define REW_PORT_CFG_AGE_DIS                              BIT(0)
+
+#define REW_DSCP_CFG_GSZ                                  0x80
+
+#define REW_PCP_DEI_QOS_MAP_CFG_GSZ                       0x80
+#define REW_PCP_DEI_QOS_MAP_CFG_RSZ                       0x4
+
+#define REW_PCP_DEI_QOS_MAP_CFG_DEI_QOS_VAL               BIT(3)
+#define REW_PCP_DEI_QOS_MAP_CFG_PCP_QOS_VAL(x)            ((x) & GENMASK(2, 0))
+#define REW_PCP_DEI_QOS_MAP_CFG_PCP_QOS_VAL_M             GENMASK(2, 0)
+
+#define REW_PTP_CFG_GSZ                                   0x80
+
+#define REW_PTP_CFG_PTP_BACKPLANE_MODE                    BIT(7)
+#define REW_PTP_CFG_GP_CFG_UNUSED(x)                      (((x) << 3) & GENMASK(6, 3))
+#define REW_PTP_CFG_GP_CFG_UNUSED_M                       GENMASK(6, 3)
+#define REW_PTP_CFG_GP_CFG_UNUSED_X(x)                    (((x) & GENMASK(6, 3)) >> 3)
+#define REW_PTP_CFG_PTP_1STEP_DIS                         BIT(2)
+#define REW_PTP_CFG_PTP_2STEP_DIS                         BIT(1)
+#define REW_PTP_CFG_PTP_UDP_KEEP                          BIT(0)
+
+#define REW_PTP_DLY1_CFG_GSZ                              0x80
+
+#define REW_RED_TAG_CFG_GSZ                               0x80
+
+#define REW_RED_TAG_CFG_RED_TAG_CFG                       BIT(0)
+
+#define REW_DSCP_REMAP_DP1_CFG_RSZ                        0x4
+
+#define REW_DSCP_REMAP_CFG_RSZ                            0x4
+
+#define REW_REW_STICKY_ES0_TAGB_PUSH_FAILED               BIT(0)
+
+#define REW_PPT_RSZ                                       0x4
+
+#endif
diff --git a/drivers/net/ethernet/mscc/ocelot_sys.h b/drivers/net/ethernet/mscc/ocelot_sys.h
new file mode 100644 (file)
index 0000000..16f91e1
--- /dev/null
@@ -0,0 +1,144 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */
+/*
+ * Microsemi Ocelot Switch driver
+ *
+ * Copyright (c) 2017 Microsemi Corporation
+ */
+
+#ifndef _MSCC_OCELOT_SYS_H_
+#define _MSCC_OCELOT_SYS_H_
+
+#define SYS_COUNT_RX_OCTETS_RSZ                           0x4
+
+#define SYS_COUNT_TX_OCTETS_RSZ                           0x4
+
+#define SYS_PORT_MODE_RSZ                                 0x4
+
+#define SYS_PORT_MODE_DATA_WO_TS(x)                       (((x) << 5) & GENMASK(6, 5))
+#define SYS_PORT_MODE_DATA_WO_TS_M                        GENMASK(6, 5)
+#define SYS_PORT_MODE_DATA_WO_TS_X(x)                     (((x) & GENMASK(6, 5)) >> 5)
+#define SYS_PORT_MODE_INCL_INJ_HDR(x)                     (((x) << 3) & GENMASK(4, 3))
+#define SYS_PORT_MODE_INCL_INJ_HDR_M                      GENMASK(4, 3)
+#define SYS_PORT_MODE_INCL_INJ_HDR_X(x)                   (((x) & GENMASK(4, 3)) >> 3)
+#define SYS_PORT_MODE_INCL_XTR_HDR(x)                     (((x) << 1) & GENMASK(2, 1))
+#define SYS_PORT_MODE_INCL_XTR_HDR_M                      GENMASK(2, 1)
+#define SYS_PORT_MODE_INCL_XTR_HDR_X(x)                   (((x) & GENMASK(2, 1)) >> 1)
+#define SYS_PORT_MODE_INJ_HDR_ERR                         BIT(0)
+
+#define SYS_FRONT_PORT_MODE_RSZ                           0x4
+
+#define SYS_FRONT_PORT_MODE_HDX_MODE                      BIT(0)
+
+#define SYS_FRM_AGING_AGE_TX_ENA                          BIT(20)
+#define SYS_FRM_AGING_MAX_AGE(x)                          ((x) & GENMASK(19, 0))
+#define SYS_FRM_AGING_MAX_AGE_M                           GENMASK(19, 0)
+
+#define SYS_STAT_CFG_STAT_CLEAR_SHOT(x)                   (((x) << 10) & GENMASK(16, 10))
+#define SYS_STAT_CFG_STAT_CLEAR_SHOT_M                    GENMASK(16, 10)
+#define SYS_STAT_CFG_STAT_CLEAR_SHOT_X(x)                 (((x) & GENMASK(16, 10)) >> 10)
+#define SYS_STAT_CFG_STAT_VIEW(x)                         ((x) & GENMASK(9, 0))
+#define SYS_STAT_CFG_STAT_VIEW_M                          GENMASK(9, 0)
+
+#define SYS_SW_STATUS_RSZ                                 0x4
+
+#define SYS_SW_STATUS_PORT_RX_PAUSED                      BIT(0)
+
+#define SYS_MISC_CFG_PTP_RSRV_CLR                         BIT(1)
+#define SYS_MISC_CFG_PTP_DIS_NEG_RO                       BIT(0)
+
+#define SYS_REW_MAC_HIGH_CFG_RSZ                          0x4
+
+#define SYS_REW_MAC_LOW_CFG_RSZ                           0x4
+
+#define SYS_TIMESTAMP_OFFSET_ETH_TYPE_CFG(x)              (((x) << 6) & GENMASK(21, 6))
+#define SYS_TIMESTAMP_OFFSET_ETH_TYPE_CFG_M               GENMASK(21, 6)
+#define SYS_TIMESTAMP_OFFSET_ETH_TYPE_CFG_X(x)            (((x) & GENMASK(21, 6)) >> 6)
+#define SYS_TIMESTAMP_OFFSET_TIMESTAMP_OFFSET(x)          ((x) & GENMASK(5, 0))
+#define SYS_TIMESTAMP_OFFSET_TIMESTAMP_OFFSET_M           GENMASK(5, 0)
+
+#define SYS_PAUSE_CFG_RSZ                                 0x4
+
+#define SYS_PAUSE_CFG_PAUSE_START(x)                      (((x) << 10) & GENMASK(18, 10))
+#define SYS_PAUSE_CFG_PAUSE_START_M                       GENMASK(18, 10)
+#define SYS_PAUSE_CFG_PAUSE_START_X(x)                    (((x) & GENMASK(18, 10)) >> 10)
+#define SYS_PAUSE_CFG_PAUSE_STOP(x)                       (((x) << 1) & GENMASK(9, 1))
+#define SYS_PAUSE_CFG_PAUSE_STOP_M                        GENMASK(9, 1)
+#define SYS_PAUSE_CFG_PAUSE_STOP_X(x)                     (((x) & GENMASK(9, 1)) >> 1)
+#define SYS_PAUSE_CFG_PAUSE_ENA                           BIT(0)
+
+#define SYS_PAUSE_TOT_CFG_PAUSE_TOT_START(x)              (((x) << 9) & GENMASK(17, 9))
+#define SYS_PAUSE_TOT_CFG_PAUSE_TOT_START_M               GENMASK(17, 9)
+#define SYS_PAUSE_TOT_CFG_PAUSE_TOT_START_X(x)            (((x) & GENMASK(17, 9)) >> 9)
+#define SYS_PAUSE_TOT_CFG_PAUSE_TOT_STOP(x)               ((x) & GENMASK(8, 0))
+#define SYS_PAUSE_TOT_CFG_PAUSE_TOT_STOP_M                GENMASK(8, 0)
+
+#define SYS_ATOP_RSZ                                      0x4
+
+#define SYS_MAC_FC_CFG_RSZ                                0x4
+
+#define SYS_MAC_FC_CFG_FC_LINK_SPEED(x)                   (((x) << 26) & GENMASK(27, 26))
+#define SYS_MAC_FC_CFG_FC_LINK_SPEED_M                    GENMASK(27, 26)
+#define SYS_MAC_FC_CFG_FC_LINK_SPEED_X(x)                 (((x) & GENMASK(27, 26)) >> 26)
+#define SYS_MAC_FC_CFG_FC_LATENCY_CFG(x)                  (((x) << 20) & GENMASK(25, 20))
+#define SYS_MAC_FC_CFG_FC_LATENCY_CFG_M                   GENMASK(25, 20)
+#define SYS_MAC_FC_CFG_FC_LATENCY_CFG_X(x)                (((x) & GENMASK(25, 20)) >> 20)
+#define SYS_MAC_FC_CFG_ZERO_PAUSE_ENA                     BIT(18)
+#define SYS_MAC_FC_CFG_TX_FC_ENA                          BIT(17)
+#define SYS_MAC_FC_CFG_RX_FC_ENA                          BIT(16)
+#define SYS_MAC_FC_CFG_PAUSE_VAL_CFG(x)                   ((x) & GENMASK(15, 0))
+#define SYS_MAC_FC_CFG_PAUSE_VAL_CFG_M                    GENMASK(15, 0)
+
+#define SYS_MMGT_RELCNT(x)                                (((x) << 16) & GENMASK(31, 16))
+#define SYS_MMGT_RELCNT_M                                 GENMASK(31, 16)
+#define SYS_MMGT_RELCNT_X(x)                              (((x) & GENMASK(31, 16)) >> 16)
+#define SYS_MMGT_FREECNT(x)                               ((x) & GENMASK(15, 0))
+#define SYS_MMGT_FREECNT_M                                GENMASK(15, 0)
+
+#define SYS_MMGT_FAST_FREEVLD(x)                          (((x) << 4) & GENMASK(7, 4))
+#define SYS_MMGT_FAST_FREEVLD_M                           GENMASK(7, 4)
+#define SYS_MMGT_FAST_FREEVLD_X(x)                        (((x) & GENMASK(7, 4)) >> 4)
+#define SYS_MMGT_FAST_RELVLD(x)                           ((x) & GENMASK(3, 0))
+#define SYS_MMGT_FAST_RELVLD_M                            GENMASK(3, 0)
+
+#define SYS_EVENTS_DIF_RSZ                                0x4
+
+#define SYS_EVENTS_DIF_EV_DRX(x)                          (((x) << 6) & GENMASK(8, 6))
+#define SYS_EVENTS_DIF_EV_DRX_M                           GENMASK(8, 6)
+#define SYS_EVENTS_DIF_EV_DRX_X(x)                        (((x) & GENMASK(8, 6)) >> 6)
+#define SYS_EVENTS_DIF_EV_DTX(x)                          ((x) & GENMASK(5, 0))
+#define SYS_EVENTS_DIF_EV_DTX_M                           GENMASK(5, 0)
+
+#define SYS_EVENTS_CORE_EV_FWR                            BIT(2)
+#define SYS_EVENTS_CORE_EV_ANA(x)                         ((x) & GENMASK(1, 0))
+#define SYS_EVENTS_CORE_EV_ANA_M                          GENMASK(1, 0)
+
+#define SYS_CNT_GSZ                                       0x4
+
+#define SYS_PTP_STATUS_PTP_TXSTAMP_OAM                    BIT(29)
+#define SYS_PTP_STATUS_PTP_OVFL                           BIT(28)
+#define SYS_PTP_STATUS_PTP_MESS_VLD                       BIT(27)
+#define SYS_PTP_STATUS_PTP_MESS_ID(x)                     (((x) << 21) & GENMASK(26, 21))
+#define SYS_PTP_STATUS_PTP_MESS_ID_M                      GENMASK(26, 21)
+#define SYS_PTP_STATUS_PTP_MESS_ID_X(x)                   (((x) & GENMASK(26, 21)) >> 21)
+#define SYS_PTP_STATUS_PTP_MESS_TXPORT(x)                 (((x) << 16) & GENMASK(20, 16))
+#define SYS_PTP_STATUS_PTP_MESS_TXPORT_M                  GENMASK(20, 16)
+#define SYS_PTP_STATUS_PTP_MESS_TXPORT_X(x)               (((x) & GENMASK(20, 16)) >> 16)
+#define SYS_PTP_STATUS_PTP_MESS_SEQ_ID(x)                 ((x) & GENMASK(15, 0))
+#define SYS_PTP_STATUS_PTP_MESS_SEQ_ID_M                  GENMASK(15, 0)
+
+#define SYS_PTP_TXSTAMP_PTP_TXSTAMP(x)                    ((x) & GENMASK(29, 0))
+#define SYS_PTP_TXSTAMP_PTP_TXSTAMP_M                     GENMASK(29, 0)
+#define SYS_PTP_TXSTAMP_PTP_TXSTAMP_SEC                   BIT(31)
+
+#define SYS_PTP_NXT_PTP_NXT                               BIT(0)
+
+#define SYS_PTP_CFG_PTP_STAMP_WID(x)                      (((x) << 2) & GENMASK(7, 2))
+#define SYS_PTP_CFG_PTP_STAMP_WID_M                       GENMASK(7, 2)
+#define SYS_PTP_CFG_PTP_STAMP_WID_X(x)                    (((x) & GENMASK(7, 2)) >> 2)
+#define SYS_PTP_CFG_PTP_CF_ROLL_MODE(x)                   ((x) & GENMASK(1, 0))
+#define SYS_PTP_CFG_PTP_CF_ROLL_MODE_M                    GENMASK(1, 0)
+
+#define SYS_RAM_INIT_RAM_INIT                             BIT(1)
+#define SYS_RAM_INIT_RAM_CFG_HOOK                         BIT(0)
+
+#endif
index 7e298148ca26b4899b6f52b4405f4400838ad124..cb87fccb9f6a2b369e3832bde74a536a65b432e1 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 Netronome Systems, Inc.
+ * Copyright (C) 2017-2018 Netronome Systems, Inc.
  *
  * This software is dual licensed under the GNU General License Version 2,
  * June 1991 as shown in the file COPYING in the top-level directory of this
@@ -102,6 +102,15 @@ nfp_bpf_cmsg_map_req_alloc(struct nfp_app_bpf *bpf, unsigned int n)
        return nfp_bpf_cmsg_alloc(bpf, size);
 }
 
+static u8 nfp_bpf_cmsg_get_type(struct sk_buff *skb)
+{
+       struct cmsg_hdr *hdr;
+
+       hdr = (struct cmsg_hdr *)skb->data;
+
+       return hdr->type;
+}
+
 static unsigned int nfp_bpf_cmsg_get_tag(struct sk_buff *skb)
 {
        struct cmsg_hdr *hdr;
@@ -431,6 +440,11 @@ void nfp_bpf_ctrl_msg_rx(struct nfp_app *app, struct sk_buff *skb)
                goto err_free;
        }
 
+       if (nfp_bpf_cmsg_get_type(skb) == CMSG_TYPE_BPF_EVENT) {
+               nfp_bpf_event_output(bpf, skb);
+               return;
+       }
+
        nfp_ctrl_lock(bpf->app->ctrl);
 
        tag = nfp_bpf_cmsg_get_tag(skb);
index 39639ac28b01785c64c6333d5e3bdde1ec132b32..4c7972e3db63e32f515bdd7fd9e287fa104fd484 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 Netronome Systems, Inc.
+ * Copyright (C) 2017-2018 Netronome Systems, Inc.
  *
  * This software is dual licensed under the GNU General License Version 2,
  * June 1991 as shown in the file COPYING in the top-level directory of this
 #include <linux/bitops.h>
 #include <linux/types.h>
 
+/* Kernel's enum bpf_reg_type is not uABI so people may change it breaking
+ * our FW ABI.  In that case we will do translation in the driver.
+ */
+#define NFP_BPF_SCALAR_VALUE           1
+#define NFP_BPF_MAP_VALUE              4
+#define NFP_BPF_STACK                  6
+#define NFP_BPF_PACKET_DATA            8
+
 enum bpf_cap_tlv_type {
        NFP_BPF_CAP_TYPE_FUNC           = 1,
        NFP_BPF_CAP_TYPE_ADJUST_HEAD    = 2,
        NFP_BPF_CAP_TYPE_MAPS           = 3,
        NFP_BPF_CAP_TYPE_RANDOM         = 4,
+       NFP_BPF_CAP_TYPE_QUEUE_SELECT   = 5,
 };
 
 struct nfp_bpf_cap_tlv_func {
@@ -81,6 +90,7 @@ enum nfp_bpf_cmsg_type {
        CMSG_TYPE_MAP_DELETE    = 5,
        CMSG_TYPE_MAP_GETNEXT   = 6,
        CMSG_TYPE_MAP_GETFIRST  = 7,
+       CMSG_TYPE_BPF_EVENT     = 8,
        __CMSG_TYPE_MAP_MAX,
 };
 
@@ -155,4 +165,13 @@ struct cmsg_reply_map_op {
        __be32 resv;
        struct cmsg_key_value_pair elem[0];
 };
+
+struct cmsg_bpf_event {
+       struct cmsg_hdr hdr;
+       __be32 cpu_id;
+       __be64 map_ptr;
+       __be32 data_size;
+       __be32 pkt_size;
+       u8 data[0];
+};
 #endif
index 29b4e5f8c102da14917112b2d7b91b27ab30cce9..a4d3da21586330eaf4c33b2a00a308270678c3f6 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2016-2017 Netronome Systems, Inc.
+ * Copyright (C) 2016-2018 Netronome Systems, Inc.
  *
  * This software is dual licensed under the GNU General License Version 2,
  * June 1991 as shown in the file COPYING in the top-level directory of this
@@ -42,6 +42,7 @@
 
 #include "main.h"
 #include "../nfp_asm.h"
+#include "../nfp_net_ctrl.h"
 
 /* --- NFP prog --- */
 /* Foreach "multiple" entries macros provide pos and next<n> pointers.
@@ -1214,45 +1215,83 @@ wrp_test_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
        return 0;
 }
 
-static int
-wrp_cmp_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
-           enum br_mask br_mask, bool swap)
+static const struct jmp_code_map {
+       enum br_mask br_mask;
+       bool swap;
+} jmp_code_map[] = {
+       [BPF_JGT >> 4]  = { BR_BLO, true },
+       [BPF_JGE >> 4]  = { BR_BHS, false },
+       [BPF_JLT >> 4]  = { BR_BLO, false },
+       [BPF_JLE >> 4]  = { BR_BHS, true },
+       [BPF_JSGT >> 4] = { BR_BLT, true },
+       [BPF_JSGE >> 4] = { BR_BGE, false },
+       [BPF_JSLT >> 4] = { BR_BLT, false },
+       [BPF_JSLE >> 4] = { BR_BGE, true },
+};
+
+static const struct jmp_code_map *nfp_jmp_code_get(struct nfp_insn_meta *meta)
+{
+       unsigned int op;
+
+       op = BPF_OP(meta->insn.code) >> 4;
+       /* br_mask of 0 is BR_BEQ which we don't use in jump code table */
+       if (WARN_ONCE(op >= ARRAY_SIZE(jmp_code_map) ||
+                     !jmp_code_map[op].br_mask,
+                     "no code found for jump instruction"))
+               return NULL;
+
+       return &jmp_code_map[op];
+}
+
+static int cmp_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
        const struct bpf_insn *insn = &meta->insn;
        u64 imm = insn->imm; /* sign extend */
+       const struct jmp_code_map *code;
+       enum alu_op alu_op, carry_op;
        u8 reg = insn->dst_reg * 2;
        swreg tmp_reg;
 
+       code = nfp_jmp_code_get(meta);
+       if (!code)
+               return -EINVAL;
+
+       alu_op = meta->jump_neg_op ? ALU_OP_ADD : ALU_OP_SUB;
+       carry_op = meta->jump_neg_op ? ALU_OP_ADD_C : ALU_OP_SUB_C;
+
        tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog));
-       if (!swap)
-               emit_alu(nfp_prog, reg_none(), reg_a(reg), ALU_OP_SUB, tmp_reg);
+       if (!code->swap)
+               emit_alu(nfp_prog, reg_none(), reg_a(reg), alu_op, tmp_reg);
        else
-               emit_alu(nfp_prog, reg_none(), tmp_reg, ALU_OP_SUB, reg_a(reg));
+               emit_alu(nfp_prog, reg_none(), tmp_reg, alu_op, reg_a(reg));
 
        tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog));
-       if (!swap)
+       if (!code->swap)
                emit_alu(nfp_prog, reg_none(),
-                        reg_a(reg + 1), ALU_OP_SUB_C, tmp_reg);
+                        reg_a(reg + 1), carry_op, tmp_reg);
        else
                emit_alu(nfp_prog, reg_none(),
-                        tmp_reg, ALU_OP_SUB_C, reg_a(reg + 1));
+                        tmp_reg, carry_op, reg_a(reg + 1));
 
-       emit_br(nfp_prog, br_mask, insn->off, 0);
+       emit_br(nfp_prog, code->br_mask, insn->off, 0);
 
        return 0;
 }
 
-static int
-wrp_cmp_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
-           enum br_mask br_mask, bool swap)
+static int cmp_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
        const struct bpf_insn *insn = &meta->insn;
+       const struct jmp_code_map *code;
        u8 areg, breg;
 
+       code = nfp_jmp_code_get(meta);
+       if (!code)
+               return -EINVAL;
+
        areg = insn->dst_reg * 2;
        breg = insn->src_reg * 2;
 
-       if (swap) {
+       if (code->swap) {
                areg ^= breg;
                breg ^= areg;
                areg ^= breg;
@@ -1261,7 +1300,7 @@ wrp_cmp_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
        emit_alu(nfp_prog, reg_none(), reg_a(areg), ALU_OP_SUB, reg_b(breg));
        emit_alu(nfp_prog, reg_none(),
                 reg_a(areg + 1), ALU_OP_SUB_C, reg_b(breg + 1));
-       emit_br(nfp_prog, br_mask, insn->off, 0);
+       emit_br(nfp_prog, code->br_mask, insn->off, 0);
 
        return 0;
 }
@@ -1357,15 +1396,9 @@ static int adjust_head(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 static int
 map_call_stack_common(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
-       struct bpf_offloaded_map *offmap;
-       struct nfp_bpf_map *nfp_map;
        bool load_lm_ptr;
        u32 ret_tgt;
        s64 lm_off;
-       swreg tid;
-
-       offmap = (struct bpf_offloaded_map *)meta->arg1.map_ptr;
-       nfp_map = offmap->dev_priv;
 
        /* We only have to reload LM0 if the key is not at start of stack */
        lm_off = nfp_prog->stack_depth;
@@ -1378,17 +1411,12 @@ map_call_stack_common(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
        if (meta->func_id == BPF_FUNC_map_update_elem)
                emit_csr_wr(nfp_prog, reg_b(3 * 2), NFP_CSR_ACT_LM_ADDR2);
 
-       /* Load map ID into a register, it should actually fit as an immediate
-        * but in case it doesn't deal with it here, not in the delay slots.
-        */
-       tid = ur_load_imm_any(nfp_prog, nfp_map->tid, imm_a(nfp_prog));
-
        emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO + meta->func_id,
                     2, RELO_BR_HELPER);
        ret_tgt = nfp_prog_current_offset(nfp_prog) + 2;
 
        /* Load map ID into A0 */
-       wrp_mov(nfp_prog, reg_a(0), tid);
+       wrp_mov(nfp_prog, reg_a(0), reg_a(2));
 
        /* Load the return address into B0 */
        wrp_immed_relo(nfp_prog, reg_b(0), ret_tgt, RELO_IMMED_REL);
@@ -1400,7 +1428,7 @@ map_call_stack_common(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
        if (!load_lm_ptr)
                return 0;
 
-       emit_csr_wr(nfp_prog, stack_reg(nfp_prog),  NFP_CSR_ACT_LM_ADDR0);
+       emit_csr_wr(nfp_prog, stack_reg(nfp_prog), NFP_CSR_ACT_LM_ADDR0);
        wrp_nops(nfp_prog, 3);
 
        return 0;
@@ -1418,6 +1446,63 @@ nfp_get_prandom_u32(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
        return 0;
 }
 
+static int
+nfp_perf_event_output(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+       swreg ptr_type;
+       u32 ret_tgt;
+
+       ptr_type = ur_load_imm_any(nfp_prog, meta->arg1.type, imm_a(nfp_prog));
+
+       ret_tgt = nfp_prog_current_offset(nfp_prog) + 3;
+
+       emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO + meta->func_id,
+                    2, RELO_BR_HELPER);
+
+       /* Load ptr type into A1 */
+       wrp_mov(nfp_prog, reg_a(1), ptr_type);
+
+       /* Load the return address into B0 */
+       wrp_immed_relo(nfp_prog, reg_b(0), ret_tgt, RELO_IMMED_REL);
+
+       if (!nfp_prog_confirm_current_offset(nfp_prog, ret_tgt))
+               return -EINVAL;
+
+       return 0;
+}
+
+static int
+nfp_queue_select(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+       u32 jmp_tgt;
+
+       jmp_tgt = nfp_prog_current_offset(nfp_prog) + 5;
+
+       /* Make sure the queue id fits into FW field */
+       emit_alu(nfp_prog, reg_none(), reg_a(meta->insn.src_reg * 2),
+                ALU_OP_AND_NOT_B, reg_imm(0xff));
+       emit_br(nfp_prog, BR_BEQ, jmp_tgt, 2);
+
+       /* Set the 'queue selected' bit and the queue value */
+       emit_shf(nfp_prog, pv_qsel_set(nfp_prog),
+                pv_qsel_set(nfp_prog), SHF_OP_OR, reg_imm(1),
+                SHF_SC_L_SHF, PKT_VEL_QSEL_SET_BIT);
+       emit_ld_field(nfp_prog,
+                     pv_qsel_val(nfp_prog), 0x1, reg_b(meta->insn.src_reg * 2),
+                     SHF_SC_NONE, 0);
+       /* Delay slots end here, we will jump over next instruction if queue
+        * value fits into the field.
+        */
+       emit_ld_field(nfp_prog,
+                     pv_qsel_val(nfp_prog), 0x1, reg_imm(NFP_NET_RXR_MAX),
+                     SHF_SC_NONE, 0);
+
+       if (!nfp_prog_confirm_current_offset(nfp_prog, jmp_tgt))
+               return -EINVAL;
+
+       return 0;
+}
+
 /* --- Callbacks --- */
 static int mov_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
@@ -2108,6 +2193,17 @@ mem_stx_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
                            false, wrp_lmem_store);
 }
 
+static int mem_stx_xdp(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+       switch (meta->insn.off) {
+       case offsetof(struct xdp_md, rx_queue_index):
+               return nfp_queue_select(nfp_prog, meta);
+       }
+
+       WARN_ON_ONCE(1); /* verifier should have rejected bad accesses */
+       return -EOPNOTSUPP;
+}
+
 static int
 mem_stx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
        unsigned int size)
@@ -2134,6 +2230,9 @@ static int mem_stx2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 
 static int mem_stx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
+       if (meta->ptr.type == PTR_TO_CTX)
+               if (nfp_prog->type == BPF_PROG_TYPE_XDP)
+                       return mem_stx_xdp(nfp_prog, meta);
        return mem_stx(nfp_prog, meta, 4);
 }
 
@@ -2283,46 +2382,6 @@ static int jeq_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
        return 0;
 }
 
-static int jgt_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
-{
-       return wrp_cmp_imm(nfp_prog, meta, BR_BLO, true);
-}
-
-static int jge_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
-{
-       return wrp_cmp_imm(nfp_prog, meta, BR_BHS, false);
-}
-
-static int jlt_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
-{
-       return wrp_cmp_imm(nfp_prog, meta, BR_BLO, false);
-}
-
-static int jle_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
-{
-       return wrp_cmp_imm(nfp_prog, meta, BR_BHS, true);
-}
-
-static int jsgt_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
-{
-       return wrp_cmp_imm(nfp_prog, meta, BR_BLT, true);
-}
-
-static int jsge_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
-{
-       return wrp_cmp_imm(nfp_prog, meta, BR_BGE, false);
-}
-
-static int jslt_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
-{
-       return wrp_cmp_imm(nfp_prog, meta, BR_BLT, false);
-}
-
-static int jsle_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
-{
-       return wrp_cmp_imm(nfp_prog, meta, BR_BGE, true);
-}
-
 static int jset_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
        const struct bpf_insn *insn = &meta->insn;
@@ -2392,46 +2451,6 @@ static int jeq_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
        return 0;
 }
 
-static int jgt_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
-{
-       return wrp_cmp_reg(nfp_prog, meta, BR_BLO, true);
-}
-
-static int jge_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
-{
-       return wrp_cmp_reg(nfp_prog, meta, BR_BHS, false);
-}
-
-static int jlt_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
-{
-       return wrp_cmp_reg(nfp_prog, meta, BR_BLO, false);
-}
-
-static int jle_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
-{
-       return wrp_cmp_reg(nfp_prog, meta, BR_BHS, true);
-}
-
-static int jsgt_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
-{
-       return wrp_cmp_reg(nfp_prog, meta, BR_BLT, true);
-}
-
-static int jsge_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
-{
-       return wrp_cmp_reg(nfp_prog, meta, BR_BGE, false);
-}
-
-static int jslt_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
-{
-       return wrp_cmp_reg(nfp_prog, meta, BR_BLT, false);
-}
-
-static int jsle_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
-{
-       return wrp_cmp_reg(nfp_prog, meta, BR_BGE, true);
-}
-
 static int jset_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
        return wrp_test_reg(nfp_prog, meta, ALU_OP_AND, BR_BNE);
@@ -2453,6 +2472,8 @@ static int call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
                return map_call_stack_common(nfp_prog, meta);
        case BPF_FUNC_get_prandom_u32:
                return nfp_get_prandom_u32(nfp_prog, meta);
+       case BPF_FUNC_perf_event_output:
+               return nfp_perf_event_output(nfp_prog, meta);
        default:
                WARN_ONCE(1, "verifier allowed unsupported function\n");
                return -EOPNOTSUPP;
@@ -2520,25 +2541,25 @@ static const instr_cb_t instr_cb[256] = {
        [BPF_ST | BPF_MEM | BPF_DW] =   mem_st8,
        [BPF_JMP | BPF_JA | BPF_K] =    jump,
        [BPF_JMP | BPF_JEQ | BPF_K] =   jeq_imm,
-       [BPF_JMP | BPF_JGT | BPF_K] =   jgt_imm,
-       [BPF_JMP | BPF_JGE | BPF_K] =   jge_imm,
-       [BPF_JMP | BPF_JLT | BPF_K] =   jlt_imm,
-       [BPF_JMP | BPF_JLE | BPF_K] =   jle_imm,
-       [BPF_JMP | BPF_JSGT | BPF_K] =  jsgt_imm,
-       [BPF_JMP | BPF_JSGE | BPF_K] =  jsge_imm,
-       [BPF_JMP | BPF_JSLT | BPF_K] =  jslt_imm,
-       [BPF_JMP | BPF_JSLE | BPF_K] =  jsle_imm,
+       [BPF_JMP | BPF_JGT | BPF_K] =   cmp_imm,
+       [BPF_JMP | BPF_JGE | BPF_K] =   cmp_imm,
+       [BPF_JMP | BPF_JLT | BPF_K] =   cmp_imm,
+       [BPF_JMP | BPF_JLE | BPF_K] =   cmp_imm,
+       [BPF_JMP | BPF_JSGT | BPF_K] =  cmp_imm,
+       [BPF_JMP | BPF_JSGE | BPF_K] =  cmp_imm,
+       [BPF_JMP | BPF_JSLT | BPF_K] =  cmp_imm,
+       [BPF_JMP | BPF_JSLE | BPF_K] =  cmp_imm,
        [BPF_JMP | BPF_JSET | BPF_K] =  jset_imm,
        [BPF_JMP | BPF_JNE | BPF_K] =   jne_imm,
        [BPF_JMP | BPF_JEQ | BPF_X] =   jeq_reg,
-       [BPF_JMP | BPF_JGT | BPF_X] =   jgt_reg,
-       [BPF_JMP | BPF_JGE | BPF_X] =   jge_reg,
-       [BPF_JMP | BPF_JLT | BPF_X] =   jlt_reg,
-       [BPF_JMP | BPF_JLE | BPF_X] =   jle_reg,
-       [BPF_JMP | BPF_JSGT | BPF_X] =  jsgt_reg,
-       [BPF_JMP | BPF_JSGE | BPF_X] =  jsge_reg,
-       [BPF_JMP | BPF_JSLT | BPF_X] =  jslt_reg,
-       [BPF_JMP | BPF_JSLE | BPF_X] =  jsle_reg,
+       [BPF_JMP | BPF_JGT | BPF_X] =   cmp_reg,
+       [BPF_JMP | BPF_JGE | BPF_X] =   cmp_reg,
+       [BPF_JMP | BPF_JLT | BPF_X] =   cmp_reg,
+       [BPF_JMP | BPF_JLE | BPF_X] =   cmp_reg,
+       [BPF_JMP | BPF_JSGT | BPF_X] =  cmp_reg,
+       [BPF_JMP | BPF_JSGE | BPF_X] =  cmp_reg,
+       [BPF_JMP | BPF_JSLT | BPF_X] =  cmp_reg,
+       [BPF_JMP | BPF_JSLE | BPF_X] =  cmp_reg,
        [BPF_JMP | BPF_JSET | BPF_X] =  jset_reg,
        [BPF_JMP | BPF_JNE | BPF_X] =   jne_reg,
        [BPF_JMP | BPF_CALL] =          call,
@@ -2777,6 +2798,54 @@ static void nfp_bpf_opt_reg_init(struct nfp_prog *nfp_prog)
        }
 }
 
+/* abs(insn.imm) will fit better into unrestricted reg immediate -
+ * convert add/sub of a negative number into a sub/add of a positive one.
+ */
+static void nfp_bpf_opt_neg_add_sub(struct nfp_prog *nfp_prog)
+{
+       struct nfp_insn_meta *meta;
+
+       list_for_each_entry(meta, &nfp_prog->insns, l) {
+               struct bpf_insn insn = meta->insn;
+
+               if (meta->skip)
+                       continue;
+
+               if (BPF_CLASS(insn.code) != BPF_ALU &&
+                   BPF_CLASS(insn.code) != BPF_ALU64 &&
+                   BPF_CLASS(insn.code) != BPF_JMP)
+                       continue;
+               if (BPF_SRC(insn.code) != BPF_K)
+                       continue;
+               if (insn.imm >= 0)
+                       continue;
+
+               if (BPF_CLASS(insn.code) == BPF_JMP) {
+                       switch (BPF_OP(insn.code)) {
+                       case BPF_JGE:
+                       case BPF_JSGE:
+                       case BPF_JLT:
+                       case BPF_JSLT:
+                               meta->jump_neg_op = true;
+                               break;
+                       default:
+                               continue;
+                       }
+               } else {
+                       if (BPF_OP(insn.code) == BPF_ADD)
+                               insn.code = BPF_CLASS(insn.code) | BPF_SUB;
+                       else if (BPF_OP(insn.code) == BPF_SUB)
+                               insn.code = BPF_CLASS(insn.code) | BPF_ADD;
+                       else
+                               continue;
+
+                       meta->insn.code = insn.code | BPF_K;
+               }
+
+               meta->insn.imm = -insn.imm;
+       }
+}
+
 /* Remove masking after load since our load guarantees this is not needed */
 static void nfp_bpf_opt_ld_mask(struct nfp_prog *nfp_prog)
 {
@@ -3212,6 +3281,7 @@ static int nfp_bpf_optimize(struct nfp_prog *nfp_prog)
 {
        nfp_bpf_opt_reg_init(nfp_prog);
 
+       nfp_bpf_opt_neg_add_sub(nfp_prog);
        nfp_bpf_opt_ld_mask(nfp_prog);
        nfp_bpf_opt_ld_shift(nfp_prog);
        nfp_bpf_opt_ldst_gather(nfp_prog);
@@ -3220,6 +3290,33 @@ static int nfp_bpf_optimize(struct nfp_prog *nfp_prog)
        return 0;
 }
 
+static int nfp_bpf_replace_map_ptrs(struct nfp_prog *nfp_prog)
+{
+       struct nfp_insn_meta *meta1, *meta2;
+       struct nfp_bpf_map *nfp_map;
+       struct bpf_map *map;
+
+       nfp_for_each_insn_walk2(nfp_prog, meta1, meta2) {
+               if (meta1->skip || meta2->skip)
+                       continue;
+
+               if (meta1->insn.code != (BPF_LD | BPF_IMM | BPF_DW) ||
+                   meta1->insn.src_reg != BPF_PSEUDO_MAP_FD)
+                       continue;
+
+               map = (void *)(unsigned long)((u32)meta1->insn.imm |
+                                             (u64)meta2->insn.imm << 32);
+               if (bpf_map_offload_neutral(map))
+                       continue;
+               nfp_map = map_to_offmap(map)->dev_priv;
+
+               meta1->insn.imm = nfp_map->tid;
+               meta2->insn.imm = 0;
+       }
+
+       return 0;
+}
+
 static int nfp_bpf_ustore_calc(u64 *prog, unsigned int len)
 {
        __le64 *ustore = (__force __le64 *)prog;
@@ -3256,6 +3353,10 @@ int nfp_bpf_jit(struct nfp_prog *nfp_prog)
 {
        int ret;
 
+       ret = nfp_bpf_replace_map_ptrs(nfp_prog);
+       if (ret)
+               return ret;
+
        ret = nfp_bpf_optimize(nfp_prog);
        if (ret)
                return ret;
@@ -3346,6 +3447,9 @@ void *nfp_bpf_relo_for_vnic(struct nfp_prog *nfp_prog, struct nfp_bpf_vnic *bv)
                        case BPF_FUNC_map_delete_elem:
                                val = nfp_prog->bpf->helpers.map_delete;
                                break;
+                       case BPF_FUNC_perf_event_output:
+                               val = nfp_prog->bpf->helpers.perf_event_output;
+                               break;
                        default:
                                pr_err("relocation of unknown helper %d\n",
                                       val);
index 1dc424685f4e772966ea9aa0bfc757cc0f41b18b..fcdfb8e7fdeab0b9dcb353f4cd4a7d76370c9817 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 Netronome Systems, Inc.
+ * Copyright (C) 2017-2018 Netronome Systems, Inc.
  *
  * This software is dual licensed under the GNU General License Version 2,
  * June 1991 as shown in the file COPYING in the top-level directory of this
 #include "fw.h"
 #include "main.h"
 
+const struct rhashtable_params nfp_bpf_maps_neutral_params = {
+       .nelem_hint             = 4,
+       .key_len                = FIELD_SIZEOF(struct nfp_bpf_neutral_map, ptr),
+       .key_offset             = offsetof(struct nfp_bpf_neutral_map, ptr),
+       .head_offset            = offsetof(struct nfp_bpf_neutral_map, l),
+       .automatic_shrinking    = true,
+};
+
 static bool nfp_net_ebpf_capable(struct nfp_net *nn)
 {
 #ifdef __LITTLE_ENDIAN
@@ -290,6 +298,9 @@ nfp_bpf_parse_cap_func(struct nfp_app_bpf *bpf, void __iomem *value, u32 length)
        case BPF_FUNC_map_delete_elem:
                bpf->helpers.map_delete = readl(&cap->func_addr);
                break;
+       case BPF_FUNC_perf_event_output:
+               bpf->helpers.perf_event_output = readl(&cap->func_addr);
+               break;
        }
 
        return 0;
@@ -323,6 +334,13 @@ nfp_bpf_parse_cap_random(struct nfp_app_bpf *bpf, void __iomem *value,
        return 0;
 }
 
+static int
+nfp_bpf_parse_cap_qsel(struct nfp_app_bpf *bpf, void __iomem *value, u32 length)
+{
+       bpf->queue_select = true;
+       return 0;
+}
+
 static int nfp_bpf_parse_capabilities(struct nfp_app *app)
 {
        struct nfp_cpp *cpp = app->pf->cpp;
@@ -335,7 +353,7 @@ static int nfp_bpf_parse_capabilities(struct nfp_app *app)
                return PTR_ERR(mem) == -ENOENT ? 0 : PTR_ERR(mem);
 
        start = mem;
-       while (mem - start + 8 < nfp_cpp_area_size(area)) {
+       while (mem - start + 8 <= nfp_cpp_area_size(area)) {
                u8 __iomem *value;
                u32 type, length;
 
@@ -365,6 +383,10 @@ static int nfp_bpf_parse_capabilities(struct nfp_app *app)
                        if (nfp_bpf_parse_cap_random(app->priv, value, length))
                                goto err_release_free;
                        break;
+               case NFP_BPF_CAP_TYPE_QUEUE_SELECT:
+                       if (nfp_bpf_parse_cap_qsel(app->priv, value, length))
+                               goto err_release_free;
+                       break;
                default:
                        nfp_dbg(cpp, "unknown BPF capability: %d\n", type);
                        break;
@@ -401,17 +423,28 @@ static int nfp_bpf_init(struct nfp_app *app)
        init_waitqueue_head(&bpf->cmsg_wq);
        INIT_LIST_HEAD(&bpf->map_list);
 
-       err = nfp_bpf_parse_capabilities(app);
+       err = rhashtable_init(&bpf->maps_neutral, &nfp_bpf_maps_neutral_params);
        if (err)
                goto err_free_bpf;
 
+       err = nfp_bpf_parse_capabilities(app);
+       if (err)
+               goto err_free_neutral_maps;
+
        return 0;
 
+err_free_neutral_maps:
+       rhashtable_destroy(&bpf->maps_neutral);
 err_free_bpf:
        kfree(bpf);
        return err;
 }
 
+static void nfp_check_rhashtable_empty(void *ptr, void *arg)
+{
+       WARN_ON_ONCE(1);
+}
+
 static void nfp_bpf_clean(struct nfp_app *app)
 {
        struct nfp_app_bpf *bpf = app->priv;
@@ -419,6 +452,8 @@ static void nfp_bpf_clean(struct nfp_app *app)
        WARN_ON(!skb_queue_empty(&bpf->cmsg_replies));
        WARN_ON(!list_empty(&bpf->map_list));
        WARN_ON(bpf->maps_in_use || bpf->map_elems_in_use);
+       rhashtable_free_and_destroy(&bpf->maps_neutral,
+                                   nfp_check_rhashtable_empty, NULL);
        kfree(bpf);
 }
 
index 4981c8944ca35accd3ddc4c34c77c8a56309f2d5..8b143546ae85ff8dfa3376b0498e1726c3c00cfb 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2016-2017 Netronome Systems, Inc.
+ * Copyright (C) 2016-2018 Netronome Systems, Inc.
  *
  * This software is dual licensed under the GNU General License Version 2,
  * June 1991 as shown in the file COPYING in the top-level directory of this
@@ -39,6 +39,7 @@
 #include <linux/bpf_verifier.h>
 #include <linux/kernel.h>
 #include <linux/list.h>
+#include <linux/rhashtable.h>
 #include <linux/skbuff.h>
 #include <linux/types.h>
 #include <linux/wait.h>
@@ -81,10 +82,16 @@ enum static_regs {
 enum pkt_vec {
        PKT_VEC_PKT_LEN         = 0,
        PKT_VEC_PKT_PTR         = 2,
+       PKT_VEC_QSEL_SET        = 4,
+       PKT_VEC_QSEL_VAL        = 6,
 };
 
+#define PKT_VEL_QSEL_SET_BIT   4
+
 #define pv_len(np)     reg_lm(1, PKT_VEC_PKT_LEN)
 #define pv_ctm_ptr(np) reg_lm(1, PKT_VEC_PKT_PTR)
+#define pv_qsel_set(np)        reg_lm(1, PKT_VEC_QSEL_SET)
+#define pv_qsel_val(np)        reg_lm(1, PKT_VEC_QSEL_VAL)
 
 #define stack_reg(np)  reg_a(STATIC_REG_STACK)
 #define stack_imm(np)  imm_b(np)
@@ -114,6 +121,8 @@ enum pkt_vec {
  * @maps_in_use:       number of currently offloaded maps
  * @map_elems_in_use:  number of elements allocated to offloaded maps
  *
+ * @maps_neutral:      hash table of offload-neutral maps (on pointer)
+ *
  * @adjust_head:       adjust head capability
  * @adjust_head.flags:         extra flags for adjust head
  * @adjust_head.off_min:       minimal packet offset within buffer required
@@ -133,8 +142,10 @@ enum pkt_vec {
  * @helpers.map_lookup:                map lookup helper address
  * @helpers.map_update:                map update helper address
  * @helpers.map_delete:                map delete helper address
+ * @helpers.perf_event_output: output perf event to a ring buffer
  *
  * @pseudo_random:     FW initialized the pseudo-random machinery (CSRs)
+ * @queue_select:      BPF can set the RX queue ID in packet vector
  */
 struct nfp_app_bpf {
        struct nfp_app *app;
@@ -150,6 +161,8 @@ struct nfp_app_bpf {
        unsigned int maps_in_use;
        unsigned int map_elems_in_use;
 
+       struct rhashtable maps_neutral;
+
        struct nfp_bpf_cap_adjust_head {
                u32 flags;
                int off_min;
@@ -171,9 +184,11 @@ struct nfp_app_bpf {
                u32 map_lookup;
                u32 map_update;
                u32 map_delete;
+               u32 perf_event_output;
        } helpers;
 
        bool pseudo_random;
+       bool queue_select;
 };
 
 enum nfp_bpf_map_use {
@@ -199,6 +214,14 @@ struct nfp_bpf_map {
        enum nfp_bpf_map_use use_map[];
 };
 
+struct nfp_bpf_neutral_map {
+       struct rhash_head l;
+       struct bpf_map *ptr;
+       u32 count;
+};
+
+extern const struct rhashtable_params nfp_bpf_maps_neutral_params;
+
 struct nfp_prog;
 struct nfp_insn_meta;
 typedef int (*instr_cb_t)(struct nfp_prog *, struct nfp_insn_meta *);
@@ -236,6 +259,7 @@ struct nfp_bpf_reg_state {
  * @xadd_over_16bit: 16bit immediate is not guaranteed
  * @xadd_maybe_16bit: 16bit immediate is possible
  * @jmp_dst: destination info for jump instructions
+ * @jump_neg_op: jump instruction has inverted immediate, use ADD instead of SUB
  * @func_id: function id for call instructions
  * @arg1: arg1 for call instructions
  * @arg2: arg2 for call instructions
@@ -264,7 +288,10 @@ struct nfp_insn_meta {
                        bool xadd_maybe_16bit;
                };
                /* jump */
-               struct nfp_insn_meta *jmp_dst;
+               struct {
+                       struct nfp_insn_meta *jmp_dst;
+                       bool jump_neg_op;
+               };
                /* function calls */
                struct {
                        u32 func_id;
@@ -363,6 +390,8 @@ static inline bool is_mbpf_xadd(const struct nfp_insn_meta *meta)
  * @error: error code if something went wrong
  * @stack_depth: max stack depth from the verifier
  * @adjust_head_location: if program has single adjust head call - the insn no.
+ * @map_records_cnt: the number of map pointers recorded for this prog
+ * @map_records: the map record pointers from bpf->maps_neutral
  * @insns: list of BPF instruction wrappers (struct nfp_insn_meta)
  */
 struct nfp_prog {
@@ -386,6 +415,9 @@ struct nfp_prog {
        unsigned int stack_depth;
        unsigned int adjust_head_location;
 
+       unsigned int map_records_cnt;
+       struct nfp_bpf_neutral_map **map_records;
+
        struct list_head insns;
 };
 
@@ -436,5 +468,7 @@ int nfp_bpf_ctrl_lookup_entry(struct bpf_offloaded_map *offmap,
 int nfp_bpf_ctrl_getnext_entry(struct bpf_offloaded_map *offmap,
                               void *key, void *next_key);
 
+int nfp_bpf_event_output(struct nfp_app_bpf *bpf, struct sk_buff *skb);
+
 void nfp_bpf_ctrl_msg_rx(struct nfp_app *app, struct sk_buff *skb);
 #endif
index 42d98792bd25568be3ca840657e95dd6273a33c3..4db0ac1e42a8d58f4ac81e201db3af0d6273d9b8 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2016-2017 Netronome Systems, Inc.
+ * Copyright (C) 2016-2018 Netronome Systems, Inc.
  *
  * This software is dual licensed under the GNU General License Version 2,
  * June 1991 as shown in the file COPYING in the top-level directory of this
 #include "../nfp_net_ctrl.h"
 #include "../nfp_net.h"
 
+static int
+nfp_map_ptr_record(struct nfp_app_bpf *bpf, struct nfp_prog *nfp_prog,
+                  struct bpf_map *map)
+{
+       struct nfp_bpf_neutral_map *record;
+       int err;
+
+       /* Map record paths are entered via ndo, update side is protected. */
+       ASSERT_RTNL();
+
+       /* Reuse path - other offloaded program is already tracking this map. */
+       record = rhashtable_lookup_fast(&bpf->maps_neutral, &map,
+                                       nfp_bpf_maps_neutral_params);
+       if (record) {
+               nfp_prog->map_records[nfp_prog->map_records_cnt++] = record;
+               record->count++;
+               return 0;
+       }
+
+       /* Grab a single ref to the map for our record.  The prog destroy ndo
+        * happens after free_used_maps().
+        */
+       map = bpf_map_inc(map, false);
+       if (IS_ERR(map))
+               return PTR_ERR(map);
+
+       record = kmalloc(sizeof(*record), GFP_KERNEL);
+       if (!record) {
+               err = -ENOMEM;
+               goto err_map_put;
+       }
+
+       record->ptr = map;
+       record->count = 1;
+
+       err = rhashtable_insert_fast(&bpf->maps_neutral, &record->l,
+                                    nfp_bpf_maps_neutral_params);
+       if (err)
+               goto err_free_rec;
+
+       nfp_prog->map_records[nfp_prog->map_records_cnt++] = record;
+
+       return 0;
+
+err_free_rec:
+       kfree(record);
+err_map_put:
+       bpf_map_put(map);
+       return err;
+}
+
+static void
+nfp_map_ptrs_forget(struct nfp_app_bpf *bpf, struct nfp_prog *nfp_prog)
+{
+       bool freed = false;
+       int i;
+
+       ASSERT_RTNL();
+
+       for (i = 0; i < nfp_prog->map_records_cnt; i++) {
+               if (--nfp_prog->map_records[i]->count) {
+                       nfp_prog->map_records[i] = NULL;
+                       continue;
+               }
+
+               WARN_ON(rhashtable_remove_fast(&bpf->maps_neutral,
+                                              &nfp_prog->map_records[i]->l,
+                                              nfp_bpf_maps_neutral_params));
+               freed = true;
+       }
+
+       if (freed) {
+               synchronize_rcu();
+
+               for (i = 0; i < nfp_prog->map_records_cnt; i++)
+                       if (nfp_prog->map_records[i]) {
+                               bpf_map_put(nfp_prog->map_records[i]->ptr);
+                               kfree(nfp_prog->map_records[i]);
+                       }
+       }
+
+       kfree(nfp_prog->map_records);
+       nfp_prog->map_records = NULL;
+       nfp_prog->map_records_cnt = 0;
+}
+
+static int
+nfp_map_ptrs_record(struct nfp_app_bpf *bpf, struct nfp_prog *nfp_prog,
+                   struct bpf_prog *prog)
+{
+       int i, cnt, err;
+
+       /* Quickly count the maps we will have to remember */
+       cnt = 0;
+       for (i = 0; i < prog->aux->used_map_cnt; i++)
+               if (bpf_map_offload_neutral(prog->aux->used_maps[i]))
+                       cnt++;
+       if (!cnt)
+               return 0;
+
+       nfp_prog->map_records = kmalloc_array(cnt,
+                                             sizeof(nfp_prog->map_records[0]),
+                                             GFP_KERNEL);
+       if (!nfp_prog->map_records)
+               return -ENOMEM;
+
+       for (i = 0; i < prog->aux->used_map_cnt; i++)
+               if (bpf_map_offload_neutral(prog->aux->used_maps[i])) {
+                       err = nfp_map_ptr_record(bpf, nfp_prog,
+                                                prog->aux->used_maps[i]);
+                       if (err) {
+                               nfp_map_ptrs_forget(bpf, nfp_prog);
+                               return err;
+                       }
+               }
+       WARN_ON(cnt != nfp_prog->map_records_cnt);
+
+       return 0;
+}
+
 static int
 nfp_prog_prepare(struct nfp_prog *nfp_prog, const struct bpf_insn *prog,
                 unsigned int cnt)
@@ -151,7 +271,7 @@ static int nfp_bpf_translate(struct nfp_net *nn, struct bpf_prog *prog)
        prog->aux->offload->jited_len = nfp_prog->prog_len * sizeof(u64);
        prog->aux->offload->jited_image = nfp_prog->prog;
 
-       return 0;
+       return nfp_map_ptrs_record(nfp_prog->bpf, nfp_prog, prog);
 }
 
 static int nfp_bpf_destroy(struct nfp_net *nn, struct bpf_prog *prog)
@@ -159,6 +279,7 @@ static int nfp_bpf_destroy(struct nfp_net *nn, struct bpf_prog *prog)
        struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv;
 
        kvfree(nfp_prog->prog);
+       nfp_map_ptrs_forget(nfp_prog->bpf, nfp_prog);
        nfp_prog_free(nfp_prog);
 
        return 0;
@@ -320,6 +441,53 @@ int nfp_ndo_bpf(struct nfp_app *app, struct nfp_net *nn, struct netdev_bpf *bpf)
        }
 }
 
+static unsigned long
+nfp_bpf_perf_event_copy(void *dst, const void *src,
+                       unsigned long off, unsigned long len)
+{
+       memcpy(dst, src + off, len);
+       return 0;
+}
+
+int nfp_bpf_event_output(struct nfp_app_bpf *bpf, struct sk_buff *skb)
+{
+       struct cmsg_bpf_event *cbe = (void *)skb->data;
+       u32 pkt_size, data_size;
+       struct bpf_map *map;
+
+       if (skb->len < sizeof(struct cmsg_bpf_event))
+               goto err_drop;
+
+       pkt_size = be32_to_cpu(cbe->pkt_size);
+       data_size = be32_to_cpu(cbe->data_size);
+       map = (void *)(unsigned long)be64_to_cpu(cbe->map_ptr);
+
+       if (skb->len < sizeof(struct cmsg_bpf_event) + pkt_size + data_size)
+               goto err_drop;
+       if (cbe->hdr.ver != CMSG_MAP_ABI_VERSION)
+               goto err_drop;
+
+       rcu_read_lock();
+       if (!rhashtable_lookup_fast(&bpf->maps_neutral, &map,
+                                   nfp_bpf_maps_neutral_params)) {
+               rcu_read_unlock();
+               pr_warn("perf event: dest map pointer %px not recognized, dropping event\n",
+                       map);
+               goto err_drop;
+       }
+
+       bpf_event_output(map, be32_to_cpu(cbe->cpu_id),
+                        &cbe->data[round_up(pkt_size, 4)], data_size,
+                        cbe->data, pkt_size, nfp_bpf_perf_event_copy);
+       rcu_read_unlock();
+
+       dev_consume_skb_any(skb);
+       return 0;
+err_drop:
+       dev_kfree_skb_any(skb);
+       return -EINVAL;
+}
+
 static int
 nfp_net_bpf_load(struct nfp_net *nn, struct bpf_prog *prog,
                 struct netlink_ext_ack *extack)
index 06ad53ce4ad93d402a6f3888ce475c19e9f363d0..844a9be6e55a97a0ed4a94c28f698ba10abf7c8b 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2016-2017 Netronome Systems, Inc.
+ * Copyright (C) 2016-2018 Netronome Systems, Inc.
  *
  * This software is dual licensed under the GNU General License Version 2,
  * June 1991 as shown in the file COPYING in the top-level directory of this
@@ -36,6 +36,8 @@
 #include <linux/kernel.h>
 #include <linux/pkt_cls.h>
 
+#include "../nfp_app.h"
+#include "../nfp_main.h"
 #include "fw.h"
 #include "main.h"
 
@@ -149,15 +151,6 @@ nfp_bpf_map_call_ok(const char *fname, struct bpf_verifier_env *env,
                return false;
        }
 
-       /* Rest of the checks is only if we re-parse the same insn */
-       if (!meta->func_id)
-               return true;
-
-       if (meta->arg1.map_ptr != reg1->map_ptr) {
-               pr_vlog(env, "%s: called for different map\n", fname);
-               return false;
-       }
-
        return true;
 }
 
@@ -216,6 +209,71 @@ nfp_bpf_check_call(struct nfp_prog *nfp_prog, struct bpf_verifier_env *env,
                pr_vlog(env, "bpf_get_prandom_u32(): FW doesn't support random number generation\n");
                return -EOPNOTSUPP;
 
+       case BPF_FUNC_perf_event_output:
+               BUILD_BUG_ON(NFP_BPF_SCALAR_VALUE != SCALAR_VALUE ||
+                            NFP_BPF_MAP_VALUE != PTR_TO_MAP_VALUE ||
+                            NFP_BPF_STACK != PTR_TO_STACK ||
+                            NFP_BPF_PACKET_DATA != PTR_TO_PACKET);
+
+               if (!bpf->helpers.perf_event_output) {
+                       pr_vlog(env, "event_output: not supported by FW\n");
+                       return -EOPNOTSUPP;
+               }
+
+               /* Force current CPU to make sure we can report the event
+                * wherever we get the control message from FW.
+                */
+               if (reg3->var_off.mask & BPF_F_INDEX_MASK ||
+                   (reg3->var_off.value & BPF_F_INDEX_MASK) !=
+                   BPF_F_CURRENT_CPU) {
+                       char tn_buf[48];
+
+                       tnum_strn(tn_buf, sizeof(tn_buf), reg3->var_off);
+                       pr_vlog(env, "event_output: must use BPF_F_CURRENT_CPU, var_off: %s\n",
+                               tn_buf);
+                       return -EOPNOTSUPP;
+               }
+
+               /* Save space in meta, we don't care about arguments other
+                * than 4th meta, shove it into arg1.
+                */
+               reg1 = cur_regs(env) + BPF_REG_4;
+
+               if (reg1->type != SCALAR_VALUE /* NULL ptr */ &&
+                   reg1->type != PTR_TO_STACK &&
+                   reg1->type != PTR_TO_MAP_VALUE &&
+                   reg1->type != PTR_TO_PACKET) {
+                       pr_vlog(env, "event_output: unsupported ptr type: %d\n",
+                               reg1->type);
+                       return -EOPNOTSUPP;
+               }
+
+               if (reg1->type == PTR_TO_STACK &&
+                   !nfp_bpf_stack_arg_ok("event_output", env, reg1, NULL))
+                       return -EOPNOTSUPP;
+
+               /* Warn user that on offload NFP may return success even if map
+                * is not going to accept the event, since the event output is
+                * fully async and device won't know the state of the map.
+                * There is also FW limitation on the event length.
+                *
+                * Lost events will not show up on the perf ring, driver
+                * won't see them at all.  Events may also get reordered.
+                */
+               dev_warn_once(&nfp_prog->bpf->app->pf->pdev->dev,
+                             "bpf: note: return codes and behavior of bpf_event_output() helper differs for offloaded programs!\n");
+               pr_vlog(env, "warning: return codes and behavior of event_output helper differ for offload!\n");
+
+               if (!meta->func_id)
+                       break;
+
+               if (reg1->type != meta->arg1.type) {
+                       pr_vlog(env, "event_output: ptr type changed: %d %d\n",
+                               meta->arg1.type, reg1->type);
+                       return -EINVAL;
+               }
+               break;
+
        default:
                pr_vlog(env, "unsupported function id: %d\n", func_id);
                return -EOPNOTSUPP;
@@ -409,6 +467,30 @@ nfp_bpf_check_ptr(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
        return 0;
 }
 
+static int
+nfp_bpf_check_store(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
+                   struct bpf_verifier_env *env)
+{
+       const struct bpf_reg_state *reg = cur_regs(env) + meta->insn.dst_reg;
+
+       if (reg->type == PTR_TO_CTX) {
+               if (nfp_prog->type == BPF_PROG_TYPE_XDP) {
+                       /* XDP ctx accesses must be 4B in size */
+                       switch (meta->insn.off) {
+                       case offsetof(struct xdp_md, rx_queue_index):
+                               if (nfp_prog->bpf->queue_select)
+                                       goto exit_check_ptr;
+                               pr_vlog(env, "queue selection not supported by FW\n");
+                               return -EOPNOTSUPP;
+                       }
+               }
+               pr_vlog(env, "unsupported store to context field\n");
+               return -EOPNOTSUPP;
+       }
+exit_check_ptr:
+       return nfp_bpf_check_ptr(nfp_prog, meta, env, meta->insn.dst_reg);
+}
+
 static int
 nfp_bpf_check_xadd(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
                   struct bpf_verifier_env *env)
@@ -464,8 +546,8 @@ nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx)
                return nfp_bpf_check_ptr(nfp_prog, meta, env,
                                         meta->insn.src_reg);
        if (is_mbpf_store(meta))
-               return nfp_bpf_check_ptr(nfp_prog, meta, env,
-                                        meta->insn.dst_reg);
+               return nfp_bpf_check_store(nfp_prog, meta, env);
+
        if (is_mbpf_xadd(meta))
                return nfp_bpf_check_xadd(nfp_prog, meta, env);
 
index b3567a596fc1427c395ce18ea5f3541d65c0ce78..80df9a5d4217d05a2c0d3940543ea68547f06cc8 100644 (file)
@@ -183,17 +183,21 @@ static int
 nfp_fl_set_ipv4_udp_tun(struct nfp_fl_set_ipv4_udp_tun *set_tun,
                        const struct tc_action *action,
                        struct nfp_fl_pre_tunnel *pre_tun,
-                       enum nfp_flower_tun_type tun_type)
+                       enum nfp_flower_tun_type tun_type,
+                       struct net_device *netdev)
 {
        size_t act_size = sizeof(struct nfp_fl_set_ipv4_udp_tun);
        struct ip_tunnel_info *ip_tun = tcf_tunnel_info(action);
        u32 tmp_set_ip_tun_type_index = 0;
        /* Currently support one pre-tunnel so index is always 0. */
        int pretun_idx = 0;
+       struct net *net;
 
        if (ip_tun->options_len)
                return -EOPNOTSUPP;
 
+       net = dev_net(netdev);
+
        set_tun->head.jump_id = NFP_FL_ACTION_OPCODE_SET_IPV4_TUNNEL;
        set_tun->head.len_lw = act_size >> NFP_FL_LW_SIZ;
 
@@ -204,6 +208,7 @@ nfp_fl_set_ipv4_udp_tun(struct nfp_fl_set_ipv4_udp_tun *set_tun,
 
        set_tun->tun_type_index = cpu_to_be32(tmp_set_ip_tun_type_index);
        set_tun->tun_id = ip_tun->key.tun_id;
+       set_tun->ttl = net->ipv4.sysctl_ip_default_ttl;
 
        /* Complete pre_tunnel action. */
        pre_tun->ipv4_dst = ip_tun->key.u.ipv4.dst;
@@ -511,7 +516,8 @@ nfp_flower_loop_action(const struct tc_action *a,
                *a_len += sizeof(struct nfp_fl_pre_tunnel);
 
                set_tun = (void *)&nfp_fl->action_data[*a_len];
-               err = nfp_fl_set_ipv4_udp_tun(set_tun, a, pre_tun, *tun_type);
+               err = nfp_fl_set_ipv4_udp_tun(set_tun, a, pre_tun, *tun_type,
+                                             netdev);
                if (err)
                        return err;
                *a_len += sizeof(struct nfp_fl_set_ipv4_udp_tun);
index b6c0fd053a503d8f087ccca90e0d9655cecd0661..bee4367a2c388810b8fa13098f719af6c39819b2 100644 (file)
@@ -190,7 +190,10 @@ struct nfp_fl_set_ipv4_udp_tun {
        __be16 reserved;
        __be64 tun_id __packed;
        __be32 tun_type_index;
-       __be32 extra[3];
+       __be16 reserved2;
+       u8 ttl;
+       u8 reserved3;
+       __be32 extra[2];
 };
 
 /* Metadata with L2 (1W/4B)
index ad02592a82b748de1d83bfd26224e3e8b452411d..4e67c0cbf9f08ce7c5c6e131130f382f57bb45c5 100644 (file)
@@ -52,8 +52,6 @@
 
 #define NFP_FLOWER_ALLOWED_VER 0x0001000000010000UL
 
-#define NFP_FLOWER_FRAME_HEADROOM      158
-
 static const char *nfp_flower_extra_cap(struct nfp_app *app, struct nfp_net *nn)
 {
        return "FLOWER";
@@ -249,12 +247,16 @@ nfp_flower_spawn_vnic_reprs(struct nfp_app *app,
                        err = -ENOMEM;
                        goto err_reprs_clean;
                }
-               RCU_INIT_POINTER(reprs->reprs[i], repr);
 
                /* For now we only support 1 PF */
                WARN_ON(repr_type == NFP_REPR_TYPE_PF && i);
 
                port = nfp_port_alloc(app, port_type, repr);
+               if (IS_ERR(port)) {
+                       err = PTR_ERR(port);
+                       nfp_repr_free(repr);
+                       goto err_reprs_clean;
+               }
                if (repr_type == NFP_REPR_TYPE_PF) {
                        port->pf_id = i;
                        port->vnic = priv->nn->dp.ctrl_bar;
@@ -273,9 +275,11 @@ nfp_flower_spawn_vnic_reprs(struct nfp_app *app,
                                    port_id, port, priv->nn->dp.netdev);
                if (err) {
                        nfp_port_free(port);
+                       nfp_repr_free(repr);
                        goto err_reprs_clean;
                }
 
+               RCU_INIT_POINTER(reprs->reprs[i], repr);
                nfp_info(app->cpp, "%s%d Representor(%s) created\n",
                         repr_type == NFP_REPR_TYPE_PF ? "PF" : "VF", i,
                         repr->name);
@@ -346,27 +350,29 @@ nfp_flower_spawn_phy_reprs(struct nfp_app *app, struct nfp_flower_priv *priv)
                        err = -ENOMEM;
                        goto err_reprs_clean;
                }
-               RCU_INIT_POINTER(reprs->reprs[phys_port], repr);
 
                port = nfp_port_alloc(app, NFP_PORT_PHYS_PORT, repr);
                if (IS_ERR(port)) {
                        err = PTR_ERR(port);
+                       nfp_repr_free(repr);
                        goto err_reprs_clean;
                }
                err = nfp_port_init_phy_port(app->pf, app, port, i);
                if (err) {
                        nfp_port_free(port);
+                       nfp_repr_free(repr);
                        goto err_reprs_clean;
                }
 
                SET_NETDEV_DEV(repr, &priv->nn->pdev->dev);
-               nfp_net_get_mac_addr(app->pf, port);
+               nfp_net_get_mac_addr(app->pf, repr, port);
 
                cmsg_port_id = nfp_flower_cmsg_phys_port(phys_port);
                err = nfp_repr_init(app, repr,
                                    cmsg_port_id, port, priv->nn->dp.netdev);
                if (err) {
                        nfp_port_free(port);
+                       nfp_repr_free(repr);
                        goto err_reprs_clean;
                }
 
@@ -375,6 +381,7 @@ nfp_flower_spawn_phy_reprs(struct nfp_app *app, struct nfp_flower_priv *priv)
                                             eth_tbl->ports[i].base,
                                             phys_port);
 
+               RCU_INIT_POINTER(reprs->reprs[phys_port], repr);
                nfp_info(app->cpp, "Phys Port %d Representor(%s) created\n",
                         phys_port, repr->name);
        }
@@ -559,22 +566,6 @@ static void nfp_flower_clean(struct nfp_app *app)
        app->priv = NULL;
 }
 
-static int
-nfp_flower_check_mtu(struct nfp_app *app, struct net_device *netdev,
-                    int new_mtu)
-{
-       /* The flower fw reserves NFP_FLOWER_FRAME_HEADROOM bytes of the
-        * supported max MTU to allow for appending tunnel headers. To prevent
-        * unexpected behaviour this needs to be accounted for.
-        */
-       if (new_mtu > netdev->max_mtu - NFP_FLOWER_FRAME_HEADROOM) {
-               nfp_err(app->cpp, "New MTU (%d) is not valid\n", new_mtu);
-               return -EINVAL;
-       }
-
-       return 0;
-}
-
 static bool nfp_flower_check_ack(struct nfp_flower_priv *app_priv)
 {
        bool ret;
@@ -656,7 +647,6 @@ const struct nfp_app_type app_flower = {
        .init           = nfp_flower_init,
        .clean          = nfp_flower_clean,
 
-       .check_mtu      = nfp_flower_check_mtu,
        .repr_change_mtu  = nfp_flower_repr_change_mtu,
 
        .vnic_alloc     = nfp_flower_vnic_alloc,
index c67e1b54c6141e494d9f30735755ce7b6f8e2b20..733ff53cc6014742b8b57e121930d01144033b75 100644 (file)
@@ -47,6 +47,7 @@
 struct net_device;
 struct nfp_app;
 
+#define NFP_FL_STATS_CTX_DONT_CARE     cpu_to_be32(0xffffffff)
 #define NFP_FL_STATS_ENTRY_RS          BIT(20)
 #define NFP_FL_STATS_ELEM_RS           4
 #define NFP_FL_REPEATED_HASH_MAX       BIT(17)
@@ -189,9 +190,11 @@ struct nfp_fl_payload {
        spinlock_t lock; /* lock stats */
        struct nfp_fl_stats stats;
        __be32 nfp_tun_ipv4_addr;
+       struct net_device *ingress_dev;
        char *unmasked_data;
        char *mask_data;
        char *action_data;
+       bool ingress_offload;
 };
 
 struct nfp_fl_stats_frame {
@@ -216,12 +219,14 @@ int nfp_flower_compile_action(struct tc_cls_flower_offload *flow,
                              struct nfp_fl_payload *nfp_flow);
 int nfp_compile_flow_metadata(struct nfp_app *app,
                              struct tc_cls_flower_offload *flow,
-                             struct nfp_fl_payload *nfp_flow);
+                             struct nfp_fl_payload *nfp_flow,
+                             struct net_device *netdev);
 int nfp_modify_flow_metadata(struct nfp_app *app,
                             struct nfp_fl_payload *nfp_flow);
 
 struct nfp_fl_payload *
-nfp_flower_search_fl_table(struct nfp_app *app, unsigned long tc_flower_cookie);
+nfp_flower_search_fl_table(struct nfp_app *app, unsigned long tc_flower_cookie,
+                          struct net_device *netdev, __be32 host_ctx);
 struct nfp_fl_payload *
 nfp_flower_remove_fl_table(struct nfp_app *app, unsigned long tc_flower_cookie);
 
index db977cf8e9332b27a63bd660c4f96b178e54cd14..21668aa435e815508d59b65620e6ecb7574dea46 100644 (file)
@@ -99,14 +99,18 @@ static int nfp_get_stats_entry(struct nfp_app *app, u32 *stats_context_id)
 
 /* Must be called with either RTNL or rcu_read_lock */
 struct nfp_fl_payload *
-nfp_flower_search_fl_table(struct nfp_app *app, unsigned long tc_flower_cookie)
+nfp_flower_search_fl_table(struct nfp_app *app, unsigned long tc_flower_cookie,
+                          struct net_device *netdev, __be32 host_ctx)
 {
        struct nfp_flower_priv *priv = app->priv;
        struct nfp_fl_payload *flower_entry;
 
        hash_for_each_possible_rcu(priv->flow_table, flower_entry, link,
                                   tc_flower_cookie)
-               if (flower_entry->tc_flower_cookie == tc_flower_cookie)
+               if (flower_entry->tc_flower_cookie == tc_flower_cookie &&
+                   (!netdev || flower_entry->ingress_dev == netdev) &&
+                   (host_ctx == NFP_FL_STATS_CTX_DONT_CARE ||
+                    flower_entry->meta.host_ctx_id == host_ctx))
                        return flower_entry;
 
        return NULL;
@@ -121,13 +125,11 @@ nfp_flower_update_stats(struct nfp_app *app, struct nfp_fl_stats_frame *stats)
        flower_cookie = be64_to_cpu(stats->stats_cookie);
 
        rcu_read_lock();
-       nfp_flow = nfp_flower_search_fl_table(app, flower_cookie);
+       nfp_flow = nfp_flower_search_fl_table(app, flower_cookie, NULL,
+                                             stats->stats_con_id);
        if (!nfp_flow)
                goto exit_rcu_unlock;
 
-       if (nfp_flow->meta.host_ctx_id != stats->stats_con_id)
-               goto exit_rcu_unlock;
-
        spin_lock(&nfp_flow->lock);
        nfp_flow->stats.pkts += be32_to_cpu(stats->pkt_count);
        nfp_flow->stats.bytes += be64_to_cpu(stats->byte_count);
@@ -317,7 +319,8 @@ nfp_check_mask_remove(struct nfp_app *app, char *mask_data, u32 mask_len,
 
 int nfp_compile_flow_metadata(struct nfp_app *app,
                              struct tc_cls_flower_offload *flow,
-                             struct nfp_fl_payload *nfp_flow)
+                             struct nfp_fl_payload *nfp_flow,
+                             struct net_device *netdev)
 {
        struct nfp_flower_priv *priv = app->priv;
        struct nfp_fl_payload *check_entry;
@@ -348,7 +351,8 @@ int nfp_compile_flow_metadata(struct nfp_app *app,
        nfp_flow->stats.bytes = 0;
        nfp_flow->stats.used = jiffies;
 
-       check_entry = nfp_flower_search_fl_table(app, flow->cookie);
+       check_entry = nfp_flower_search_fl_table(app, flow->cookie, netdev,
+                                                NFP_FL_STATS_CTX_DONT_CARE);
        if (check_entry) {
                if (nfp_release_stats_entry(app, stats_cxt))
                        return -EINVAL;
index 114d2ab02a389d11645ad2cee3b27f6a0d5597a3..70ec9d821b910a1317e11b8528250d9fcdec8737 100644 (file)
@@ -345,7 +345,7 @@ nfp_flower_calculate_key_layers(struct nfp_app *app,
 }
 
 static struct nfp_fl_payload *
-nfp_flower_allocate_new(struct nfp_fl_key_ls *key_layer)
+nfp_flower_allocate_new(struct nfp_fl_key_ls *key_layer, bool egress)
 {
        struct nfp_fl_payload *flow_pay;
 
@@ -371,6 +371,8 @@ nfp_flower_allocate_new(struct nfp_fl_key_ls *key_layer)
        flow_pay->meta.flags = 0;
        spin_lock_init(&flow_pay->lock);
 
+       flow_pay->ingress_offload = !egress;
+
        return flow_pay;
 
 err_free_mask:
@@ -402,8 +404,20 @@ nfp_flower_add_offload(struct nfp_app *app, struct net_device *netdev,
        struct nfp_flower_priv *priv = app->priv;
        struct nfp_fl_payload *flow_pay;
        struct nfp_fl_key_ls *key_layer;
+       struct net_device *ingr_dev;
        int err;
 
+       ingr_dev = egress ? NULL : netdev;
+       flow_pay = nfp_flower_search_fl_table(app, flow->cookie, ingr_dev,
+                                             NFP_FL_STATS_CTX_DONT_CARE);
+       if (flow_pay) {
+               /* Ignore as duplicate if it has been added by different cb. */
+               if (flow_pay->ingress_offload && egress)
+                       return 0;
+               else
+                       return -EOPNOTSUPP;
+       }
+
        key_layer = kmalloc(sizeof(*key_layer), GFP_KERNEL);
        if (!key_layer)
                return -ENOMEM;
@@ -413,12 +427,14 @@ nfp_flower_add_offload(struct nfp_app *app, struct net_device *netdev,
        if (err)
                goto err_free_key_ls;
 
-       flow_pay = nfp_flower_allocate_new(key_layer);
+       flow_pay = nfp_flower_allocate_new(key_layer, egress);
        if (!flow_pay) {
                err = -ENOMEM;
                goto err_free_key_ls;
        }
 
+       flow_pay->ingress_dev = egress ? NULL : netdev;
+
        err = nfp_flower_compile_flow_match(flow, key_layer, netdev, flow_pay,
                                            tun_type);
        if (err)
@@ -428,7 +444,8 @@ nfp_flower_add_offload(struct nfp_app *app, struct net_device *netdev,
        if (err)
                goto err_destroy_flow;
 
-       err = nfp_compile_flow_metadata(app, flow, flow_pay);
+       err = nfp_compile_flow_metadata(app, flow, flow_pay,
+                                       flow_pay->ingress_dev);
        if (err)
                goto err_destroy_flow;
 
@@ -462,6 +479,7 @@ nfp_flower_add_offload(struct nfp_app *app, struct net_device *netdev,
  * @app:       Pointer to the APP handle
  * @netdev:    netdev structure.
  * @flow:      TC flower classifier offload structure
+ * @egress:    Netdev is the egress dev.
  *
  * Removes a flow from the repeated hash structure and clears the
  * action payload.
@@ -470,15 +488,18 @@ nfp_flower_add_offload(struct nfp_app *app, struct net_device *netdev,
  */
 static int
 nfp_flower_del_offload(struct nfp_app *app, struct net_device *netdev,
-                      struct tc_cls_flower_offload *flow)
+                      struct tc_cls_flower_offload *flow, bool egress)
 {
        struct nfp_port *port = nfp_port_from_netdev(netdev);
        struct nfp_fl_payload *nfp_flow;
+       struct net_device *ingr_dev;
        int err;
 
-       nfp_flow = nfp_flower_search_fl_table(app, flow->cookie);
+       ingr_dev = egress ? NULL : netdev;
+       nfp_flow = nfp_flower_search_fl_table(app, flow->cookie, ingr_dev,
+                                             NFP_FL_STATS_CTX_DONT_CARE);
        if (!nfp_flow)
-               return -ENOENT;
+               return egress ? 0 : -ENOENT;
 
        err = nfp_modify_flow_metadata(app, nfp_flow);
        if (err)
@@ -505,7 +526,9 @@ nfp_flower_del_offload(struct nfp_app *app, struct net_device *netdev,
 /**
  * nfp_flower_get_stats() - Populates flow stats obtained from hardware.
  * @app:       Pointer to the APP handle
+ * @netdev:    Netdev structure.
  * @flow:      TC flower classifier offload structure
+ * @egress:    Netdev is the egress dev.
  *
  * Populates a flow statistics structure which which corresponds to a
  * specific flow.
@@ -513,14 +536,21 @@ nfp_flower_del_offload(struct nfp_app *app, struct net_device *netdev,
  * Return: negative value on error, 0 if stats populated successfully.
  */
 static int
-nfp_flower_get_stats(struct nfp_app *app, struct tc_cls_flower_offload *flow)
+nfp_flower_get_stats(struct nfp_app *app, struct net_device *netdev,
+                    struct tc_cls_flower_offload *flow, bool egress)
 {
        struct nfp_fl_payload *nfp_flow;
+       struct net_device *ingr_dev;
 
-       nfp_flow = nfp_flower_search_fl_table(app, flow->cookie);
+       ingr_dev = egress ? NULL : netdev;
+       nfp_flow = nfp_flower_search_fl_table(app, flow->cookie, ingr_dev,
+                                             NFP_FL_STATS_CTX_DONT_CARE);
        if (!nfp_flow)
                return -EINVAL;
 
+       if (nfp_flow->ingress_offload && egress)
+               return 0;
+
        spin_lock_bh(&nfp_flow->lock);
        tcf_exts_stats_update(flow->exts, nfp_flow->stats.bytes,
                              nfp_flow->stats.pkts, nfp_flow->stats.used);
@@ -543,9 +573,9 @@ nfp_flower_repr_offload(struct nfp_app *app, struct net_device *netdev,
        case TC_CLSFLOWER_REPLACE:
                return nfp_flower_add_offload(app, netdev, flower, egress);
        case TC_CLSFLOWER_DESTROY:
-               return nfp_flower_del_offload(app, netdev, flower);
+               return nfp_flower_del_offload(app, netdev, flower, egress);
        case TC_CLSFLOWER_STATS:
-               return nfp_flower_get_stats(app, flower);
+               return nfp_flower_get_stats(app, netdev, flower, egress);
        }
 
        return -EOPNOTSUPP;
index 6aedef0ad433a11b833920177be2db4c396166e9..0e0253c7e17b5e6f1ccb9ce4bd7f9ac87f67463a 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 Netronome Systems, Inc.
+ * Copyright (C) 2017-2018 Netronome Systems, Inc.
  *
  * This software is dual licensed under the GNU General License Version 2,
  * June 1991 as shown in the file COPYING in the top-level directory of this
index 2a2f2fbc8850df6a04e620ae0db1a989eec30a88..b9618c37403f9abe70c2835b338e8ddc7a6a500c 100644 (file)
@@ -69,7 +69,7 @@ int nfp_app_nic_vnic_alloc(struct nfp_app *app, struct nfp_net *nn,
        if (err)
                return err < 0 ? err : 0;
 
-       nfp_net_get_mac_addr(app->pf, nn->port);
+       nfp_net_get_mac_addr(app->pf, nn->dp.netdev, nn->port);
 
        return 0;
 }
index 5f2b2f24f4fae6b30f3e32120df076908ddd6904..faa4e131c136ec1d0cb21c253416acfda0c23ac6 100644 (file)
@@ -183,16 +183,18 @@ enum shf_sc {
 #define OP_ALU_DST_LMEXTN      0x80000000000ULL
 
 enum alu_op {
-       ALU_OP_NONE     = 0x00,
-       ALU_OP_ADD      = 0x01,
-       ALU_OP_NOT      = 0x04,
-       ALU_OP_ADD_2B   = 0x05,
-       ALU_OP_AND      = 0x08,
-       ALU_OP_SUB_C    = 0x0d,
-       ALU_OP_ADD_C    = 0x11,
-       ALU_OP_OR       = 0x14,
-       ALU_OP_SUB      = 0x15,
-       ALU_OP_XOR      = 0x18,
+       ALU_OP_NONE             = 0x00,
+       ALU_OP_ADD              = 0x01,
+       ALU_OP_NOT              = 0x04,
+       ALU_OP_ADD_2B           = 0x05,
+       ALU_OP_AND              = 0x08,
+       ALU_OP_AND_NOT_A        = 0x0c,
+       ALU_OP_SUB_C            = 0x0d,
+       ALU_OP_AND_NOT_B        = 0x10,
+       ALU_OP_ADD_C            = 0x11,
+       ALU_OP_OR               = 0x14,
+       ALU_OP_SUB              = 0x15,
+       ALU_OP_XOR              = 0x18,
 };
 
 enum alu_dst_ab {
index eb0fc614673dd5c6f98c5d104db34f9142347829..b1e67cf4257a5d3b3d046c1fe2330983b4c8cdcf 100644 (file)
@@ -175,8 +175,9 @@ int nfp_devlink_port_register(struct nfp_app *app, struct nfp_port *port)
                return ret;
 
        devlink_port_type_eth_set(&port->dl_port, port->netdev);
-       if (eth_port.is_split)
-               devlink_port_split_set(&port->dl_port, eth_port.label_port);
+       devlink_port_attrs_set(&port->dl_port, DEVLINK_PORT_FLAVOUR_PHYSICAL,
+                              eth_port.label_port, eth_port.is_split,
+                              eth_port.label_subport);
 
        devlink = priv_to_devlink(app->pf);
 
index c4b1f344b4da28cf47f7a3f4eb011fa8b084c799..0ade122805ad1deb467035294a2f8939ae8a4139 100644 (file)
@@ -486,6 +486,10 @@ static int nfp_pci_probe(struct pci_dev *pdev,
                goto err_disable_msix;
        }
 
+       err = nfp_resource_table_init(pf->cpp);
+       if (err)
+               goto err_cpp_free;
+
        pf->hwinfo = nfp_hwinfo_read(pf->cpp);
 
        dev_info(&pdev->dev, "Assembly: %s%s%s-%s CPLD: %s\n",
@@ -548,6 +552,7 @@ static int nfp_pci_probe(struct pci_dev *pdev,
        vfree(pf->dumpspec);
 err_hwinfo_free:
        kfree(pf->hwinfo);
+err_cpp_free:
        nfp_cpp_free(pf->cpp);
 err_disable_msix:
        destroy_workqueue(pf->wq);
index add46e28212be87fbb4832277e84e882ffe53381..42211083b51f40a63c4957b2c5ab5b9e567ddba9 100644 (file)
@@ -171,7 +171,9 @@ void nfp_net_pci_remove(struct nfp_pf *pf);
 int nfp_hwmon_register(struct nfp_pf *pf);
 void nfp_hwmon_unregister(struct nfp_pf *pf);
 
-void nfp_net_get_mac_addr(struct nfp_pf *pf, struct nfp_port *port);
+void
+nfp_net_get_mac_addr(struct nfp_pf *pf, struct net_device *netdev,
+                    struct nfp_port *port);
 
 bool nfp_ctrl_tx(struct nfp_net *nn, struct sk_buff *skb);
 
index 15fa47f622aa5e160027c7f188c6641ecbead4d9..45cd2092e498a3a07d94c8103455abffc7041993 100644 (file)
 /**
  * nfp_net_get_mac_addr() - Get the MAC address.
  * @pf:       NFP PF handle
+ * @netdev:   net_device to set MAC address on
  * @port:     NFP port structure
  *
  * First try to get the MAC address from NSP ETH table. If that
  * fails generate a random address.
  */
-void nfp_net_get_mac_addr(struct nfp_pf *pf, struct nfp_port *port)
+void
+nfp_net_get_mac_addr(struct nfp_pf *pf, struct net_device *netdev,
+                    struct nfp_port *port)
 {
        struct nfp_eth_table_port *eth_port;
 
        eth_port = __nfp_port_get_eth_port(port);
        if (!eth_port) {
-               eth_hw_addr_random(port->netdev);
+               eth_hw_addr_random(netdev);
                return;
        }
 
-       ether_addr_copy(port->netdev->dev_addr, eth_port->mac_addr);
-       ether_addr_copy(port->netdev->perm_addr, eth_port->mac_addr);
+       ether_addr_copy(netdev->dev_addr, eth_port->mac_addr);
+       ether_addr_copy(netdev->perm_addr, eth_port->mac_addr);
 }
 
 static struct nfp_eth_table_port *
@@ -511,16 +514,18 @@ static int nfp_net_pci_map_mem(struct nfp_pf *pf)
                return PTR_ERR(mem);
        }
 
-       min_size =  NFP_MAC_STATS_SIZE * (pf->eth_tbl->max_index + 1);
-       pf->mac_stats_mem = nfp_rtsym_map(pf->rtbl, "_mac_stats",
-                                         "net.macstats", min_size,
-                                         &pf->mac_stats_bar);
-       if (IS_ERR(pf->mac_stats_mem)) {
-               if (PTR_ERR(pf->mac_stats_mem) != -ENOENT) {
-                       err = PTR_ERR(pf->mac_stats_mem);
-                       goto err_unmap_ctrl;
+       if (pf->eth_tbl) {
+               min_size =  NFP_MAC_STATS_SIZE * (pf->eth_tbl->max_index + 1);
+               pf->mac_stats_mem = nfp_rtsym_map(pf->rtbl, "_mac_stats",
+                                                 "net.macstats", min_size,
+                                                 &pf->mac_stats_bar);
+               if (IS_ERR(pf->mac_stats_mem)) {
+                       if (PTR_ERR(pf->mac_stats_mem) != -ENOENT) {
+                               err = PTR_ERR(pf->mac_stats_mem);
+                               goto err_unmap_ctrl;
+                       }
+                       pf->mac_stats_mem = NULL;
                }
-               pf->mac_stats_mem = NULL;
        }
 
        pf->vf_cfg_mem = nfp_net_pf_map_rtsym(pf, "net.vfcfg",
index 0cd077addb26a5bd666a15d3d1c361186d8f8678..6e79da91e475d3fccf99989ece63a865cfbea160 100644 (file)
@@ -348,12 +348,17 @@ int nfp_repr_init(struct nfp_app *app, struct net_device *netdev,
        return err;
 }
 
-static void nfp_repr_free(struct nfp_repr *repr)
+static void __nfp_repr_free(struct nfp_repr *repr)
 {
        free_percpu(repr->stats);
        free_netdev(repr->netdev);
 }
 
+void nfp_repr_free(struct net_device *netdev)
+{
+       __nfp_repr_free(netdev_priv(netdev));
+}
+
 struct net_device *nfp_repr_alloc(struct nfp_app *app)
 {
        struct net_device *netdev;
@@ -385,7 +390,7 @@ static void nfp_repr_clean_and_free(struct nfp_repr *repr)
        nfp_info(repr->app->cpp, "Destroying Representor(%s)\n",
                 repr->netdev->name);
        nfp_repr_clean(repr);
-       nfp_repr_free(repr);
+       __nfp_repr_free(repr);
 }
 
 void nfp_reprs_clean_and_free(struct nfp_app *app, struct nfp_reprs *reprs)
index a621e8ff528e322f59ceef2aa4252c4ac6325297..cd756a15445f29e777c43de80b1879bb596f150e 100644 (file)
@@ -123,6 +123,7 @@ void nfp_repr_inc_rx_stats(struct net_device *netdev, unsigned int len);
 int nfp_repr_init(struct nfp_app *app, struct net_device *netdev,
                  u32 cmsg_port_id, struct nfp_port *port,
                  struct net_device *pf_netdev);
+void nfp_repr_free(struct net_device *netdev);
 struct net_device *nfp_repr_alloc(struct nfp_app *app);
 void nfp_reprs_clean_and_free(struct nfp_app *app, struct nfp_reprs *reprs);
 void nfp_reprs_clean_and_free_by_type(struct nfp_app *app,
index ced62d112aa24287c786d950aed9c168fdc91726..f44d0a85731492e8075838833106d83bcd4c1b3c 100644 (file)
@@ -94,6 +94,8 @@ int nfp_nsp_read_sensors(struct nfp_nsp *state, unsigned int sensor_mask,
 /* MAC Statistics Accumulator */
 #define NFP_RESOURCE_MAC_STATISTICS    "mac.stat"
 
+int nfp_resource_table_init(struct nfp_cpp *cpp);
+
 struct nfp_resource *
 nfp_resource_acquire(struct nfp_cpp *cpp, const char *name);
 
index cd678323bacb49faf4437c9e8cd312bcf09149c1..a0e336bd1d85798bb295bdc1be538bd4efb67e38 100644 (file)
@@ -1330,6 +1330,7 @@ struct nfp_cpp *nfp_cpp_from_nfp6000_pcie(struct pci_dev *pdev)
        /*  Finished with card initialization. */
        dev_info(&pdev->dev,
                 "Netronome Flow Processor NFP4000/NFP6000 PCIe Card Probe\n");
+       pcie_print_link_status(pdev);
 
        nfp = kzalloc(sizeof(*nfp), GFP_KERNEL);
        if (!nfp) {
index c8f2c064cce37cf96a13444927d0959314fe6afd..4e19add1c53982b75fc012cc11a3fdf1bf353e6c 100644 (file)
@@ -295,6 +295,8 @@ void nfp_cpp_mutex_free(struct nfp_cpp_mutex *mutex);
 int nfp_cpp_mutex_lock(struct nfp_cpp_mutex *mutex);
 int nfp_cpp_mutex_unlock(struct nfp_cpp_mutex *mutex);
 int nfp_cpp_mutex_trylock(struct nfp_cpp_mutex *mutex);
+int nfp_cpp_mutex_reclaim(struct nfp_cpp *cpp, int target,
+                         unsigned long long address);
 
 /**
  * nfp_cppcore_pcie_unit() - Get PCI Unit of a CPP handle
index cb28ac03e4cacf093207c84de9c490e6ee91f513..c88bf673cb76777fb3b2ab252695ab6b6f147fa3 100644 (file)
@@ -59,6 +59,11 @@ static u32 nfp_mutex_unlocked(u16 interface)
        return (u32)interface << 16 | 0x0000;
 }
 
+static u32 nfp_mutex_owner(u32 val)
+{
+       return val >> 16;
+}
+
 static bool nfp_mutex_is_locked(u32 val)
 {
        return (val & 0xffff) == 0x000f;
@@ -351,3 +356,43 @@ int nfp_cpp_mutex_trylock(struct nfp_cpp_mutex *mutex)
 
        return nfp_mutex_is_locked(tmp) ? -EBUSY : -EINVAL;
 }
+
+/**
+ * nfp_cpp_mutex_reclaim() - Unlock mutex if held by local endpoint
+ * @cpp:       NFP CPP handle
+ * @target:    NFP CPP target ID (ie NFP_CPP_TARGET_CLS or NFP_CPP_TARGET_MU)
+ * @address:   Offset into the address space of the NFP CPP target ID
+ *
+ * Release lock if held by local system.  Extreme care is advised, call only
+ * when no local lock users can exist.
+ *
+ * Return:      0 if the lock was OK, 1 if locked by us, -errno on invalid mutex
+ */
+int nfp_cpp_mutex_reclaim(struct nfp_cpp *cpp, int target,
+                         unsigned long long address)
+{
+       const u32 mur = NFP_CPP_ID(target, 3, 0);       /* atomic_read */
+       const u32 muw = NFP_CPP_ID(target, 4, 0);       /* atomic_write */
+       u16 interface = nfp_cpp_interface(cpp);
+       int err;
+       u32 tmp;
+
+       err = nfp_cpp_mutex_validate(interface, &target, address);
+       if (err)
+               return err;
+
+       /* Check lock */
+       err = nfp_cpp_readl(cpp, mur, address, &tmp);
+       if (err < 0)
+               return err;
+
+       if (nfp_mutex_is_unlocked(tmp) || nfp_mutex_owner(tmp) != interface)
+               return 0;
+
+       /* Bust the lock */
+       err = nfp_cpp_writel(cpp, muw, address, nfp_mutex_unlocked(interface));
+       if (err < 0)
+               return err;
+
+       return 1;
+}
index 7e14725055c7bc622be1dd4b8d8731f0b27946dd..2dd89dba9311ae6ec97377874f01d439729a2a67 100644 (file)
@@ -338,3 +338,62 @@ u64 nfp_resource_size(struct nfp_resource *res)
 {
        return res->size;
 }
+
+/**
+ * nfp_resource_table_init() - Run initial checks on the resource table
+ * @cpp:       NFP CPP handle
+ *
+ * Start-of-day init procedure for resource table.  Must be called before
+ * any local resource table users may exist.
+ *
+ * Return: 0 on success, -errno on failure
+ */
+int nfp_resource_table_init(struct nfp_cpp *cpp)
+{
+       struct nfp_cpp_mutex *dev_mutex;
+       int i, err;
+
+       err = nfp_cpp_mutex_reclaim(cpp, NFP_RESOURCE_TBL_TARGET,
+                                   NFP_RESOURCE_TBL_BASE);
+       if (err < 0) {
+               nfp_err(cpp, "Error: failed to reclaim resource table mutex\n");
+               return err;
+       }
+       if (err)
+               nfp_warn(cpp, "Warning: busted main resource table mutex\n");
+
+       dev_mutex = nfp_cpp_mutex_alloc(cpp, NFP_RESOURCE_TBL_TARGET,
+                                       NFP_RESOURCE_TBL_BASE,
+                                       NFP_RESOURCE_TBL_KEY);
+       if (!dev_mutex)
+               return -ENOMEM;
+
+       if (nfp_cpp_mutex_lock(dev_mutex)) {
+               nfp_err(cpp, "Error: failed to claim resource table mutex\n");
+               nfp_cpp_mutex_free(dev_mutex);
+               return -EINVAL;
+       }
+
+       /* Resource 0 is the dev_mutex, start from 1 */
+       for (i = 1; i < NFP_RESOURCE_TBL_ENTRIES; i++) {
+               u64 addr = NFP_RESOURCE_TBL_BASE +
+                       sizeof(struct nfp_resource_entry) * i;
+
+               err = nfp_cpp_mutex_reclaim(cpp, NFP_RESOURCE_TBL_TARGET, addr);
+               if (err < 0) {
+                       nfp_err(cpp,
+                               "Error: failed to reclaim resource %d mutex\n",
+                               i);
+                       goto err_unlock;
+               }
+               if (err)
+                       nfp_warn(cpp, "Warning: busted resource %d mutex\n", i);
+       }
+
+       err = 0;
+err_unlock:
+       nfp_cpp_mutex_unlock(dev_mutex);
+       nfp_cpp_mutex_free(dev_mutex);
+
+       return err;
+}
index 27364b7572fc157dad85ddddc905510b1c96701c..b092894dd1287bc7fe00a438ecdd7041f2bed2fe 100644 (file)
@@ -1170,7 +1170,7 @@ static void *nixge_get_nvmem_address(struct device *dev)
 
        cell = nvmem_cell_get(dev, "address");
        if (IS_ERR(cell))
-               return cell;
+               return NULL;
 
        mac = nvmem_cell_read(cell, &cell_size);
        nvmem_cell_put(cell);
@@ -1183,7 +1183,7 @@ static int nixge_probe(struct platform_device *pdev)
        struct nixge_priv *priv;
        struct net_device *ndev;
        struct resource *dmares;
-       const char *mac_addr;
+       const u8 *mac_addr;
        int err;
 
        ndev = alloc_etherdev(sizeof(*priv));
@@ -1202,10 +1202,12 @@ static int nixge_probe(struct platform_device *pdev)
        ndev->max_mtu = NIXGE_JUMBO_MTU;
 
        mac_addr = nixge_get_nvmem_address(&pdev->dev);
-       if (mac_addr && is_valid_ether_addr(mac_addr))
+       if (mac_addr && is_valid_ether_addr(mac_addr)) {
                ether_addr_copy(ndev->dev_addr, mac_addr);
-       else
+               kfree(mac_addr);
+       } else {
                eth_hw_addr_random(ndev);
+       }
 
        priv = netdev_priv(ndev);
        priv->ndev = ndev;
index e07460a68d303267e9f45ab48f875855cee4bdcd..adcff495466eb0e0f0e2c53a366137aaa8631196 100644 (file)
@@ -439,6 +439,59 @@ struct qed_fw_data {
        u32                     init_ops_size;
 };
 
+enum qed_mf_mode_bit {
+       /* Supports PF-classification based on tag */
+       QED_MF_OVLAN_CLSS,
+
+       /* Supports PF-classification based on MAC */
+       QED_MF_LLH_MAC_CLSS,
+
+       /* Supports PF-classification based on protocol type */
+       QED_MF_LLH_PROTO_CLSS,
+
+       /* Requires a default PF to be set */
+       QED_MF_NEED_DEF_PF,
+
+       /* Allow LL2 to multicast/broadcast */
+       QED_MF_LL2_NON_UNICAST,
+
+       /* Allow Cross-PF [& child VFs] Tx-switching */
+       QED_MF_INTER_PF_SWITCH,
+
+       /* Unified Fabtic Port support enabled */
+       QED_MF_UFP_SPECIFIC,
+
+       /* Disable Accelerated Receive Flow Steering (aRFS) */
+       QED_MF_DISABLE_ARFS,
+
+       /* Use vlan for steering */
+       QED_MF_8021Q_TAGGING,
+
+       /* Use stag for steering */
+       QED_MF_8021AD_TAGGING,
+
+       /* Allow DSCP to TC mapping */
+       QED_MF_DSCP_TO_TC_MAP,
+};
+
+enum qed_ufp_mode {
+       QED_UFP_MODE_ETS,
+       QED_UFP_MODE_VNIC_BW,
+       QED_UFP_MODE_UNKNOWN
+};
+
+enum qed_ufp_pri_type {
+       QED_UFP_PRI_OS,
+       QED_UFP_PRI_VNIC,
+       QED_UFP_PRI_UNKNOWN
+};
+
+struct qed_ufp_info {
+       enum qed_ufp_pri_type pri_type;
+       enum qed_ufp_mode mode;
+       u8 tc;
+};
+
 enum BAR_ID {
        BAR_ID_0,               /* used for GRC */
        BAR_ID_1                /* Used for doorbells */
@@ -547,6 +600,8 @@ struct qed_hwfn {
 
        struct qed_dcbx_info            *p_dcbx_info;
 
+       struct qed_ufp_info             ufp_info;
+
        struct qed_dmae_info            dmae_info;
 
        /* QM init */
@@ -669,10 +724,8 @@ struct qed_dev {
        u8                              num_funcs_in_port;
 
        u8                              path_id;
-       enum qed_mf_mode                mf_mode;
-#define IS_MF_DEFAULT(_p_hwfn)  (((_p_hwfn)->cdev)->mf_mode == QED_MF_DEFAULT)
-#define IS_MF_SI(_p_hwfn)       (((_p_hwfn)->cdev)->mf_mode == QED_MF_NPAR)
-#define IS_MF_SD(_p_hwfn)       (((_p_hwfn)->cdev)->mf_mode == QED_MF_OVLAN)
+
+       unsigned long                   mf_bits;
 
        int                             pcie_width;
        int                             pcie_speed;
index 449777f212378d29ca4a43b7e2243a1d378ca2dc..8f31406ec89407713b2ad32c81a30185b2c05727 100644 (file)
@@ -274,8 +274,8 @@ qed_dcbx_process_tlv(struct qed_hwfn *p_hwfn,
                     u32 pri_tc_tbl, int count, u8 dcbx_version)
 {
        enum dcbx_protocol_type type;
+       bool enable, ieee, eth_tlv;
        u8 tc, priority_map;
-       bool enable, ieee;
        u16 protocol_id;
        int priority;
        int i;
@@ -283,6 +283,7 @@ qed_dcbx_process_tlv(struct qed_hwfn *p_hwfn,
        DP_VERBOSE(p_hwfn, QED_MSG_DCB, "Num APP entries = %d\n", count);
 
        ieee = (dcbx_version == DCBX_CONFIG_VERSION_IEEE);
+       eth_tlv = false;
        /* Parse APP TLV */
        for (i = 0; i < count; i++) {
                protocol_id = QED_MFW_GET_FIELD(p_tbl[i].entry,
@@ -304,13 +305,22 @@ qed_dcbx_process_tlv(struct qed_hwfn *p_hwfn,
                         * indication, but we only got here if there was an
                         * app tlv for the protocol, so dcbx must be enabled.
                         */
-                       enable = !(type == DCBX_PROTOCOL_ETH);
+                       if (type == DCBX_PROTOCOL_ETH) {
+                               enable = false;
+                               eth_tlv = true;
+                       } else {
+                               enable = true;
+                       }
 
                        qed_dcbx_update_app_info(p_data, p_hwfn, enable,
                                                 priority, tc, type);
                }
        }
 
+       /* If Eth TLV is not detected, use UFP TC as default TC */
+       if (test_bit(QED_MF_UFP_SPECIFIC, &p_hwfn->cdev->mf_bits) && !eth_tlv)
+               p_data->arr[DCBX_PROTOCOL_ETH].tc = p_hwfn->ufp_info.tc;
+
        /* Update ramrod protocol data and hw_info fields
         * with default info when corresponding APP TLV's are not detected.
         * The enabled field has a different logic for ethernet as only for
index b3211c7d38c212a4cf5881b9e72591bc28ee45d7..39124b594a36248ce63df753dc935d21919ee9cc 100644 (file)
@@ -419,6 +419,7 @@ struct phy_defs {
 #define NUM_RSS_MEM_TYPES              5
 
 #define NUM_BIG_RAM_TYPES              3
+#define BIG_RAM_NAME_LEN               3
 
 #define NUM_PHY_TBUS_ADDRESSES         2048
 #define PHY_DUMP_SIZE_DWORDS           (NUM_PHY_TBUS_ADDRESSES / 2)
@@ -3650,8 +3651,8 @@ static u32 qed_grc_dump_big_ram(struct qed_hwfn *p_hwfn,
                     BIT(big_ram->is_256b_bit_offset[dev_data->chip_id]) ? 256
                                                                         : 128;
 
-       strscpy(type_name, big_ram->instance_name, sizeof(type_name));
-       strscpy(mem_name, big_ram->instance_name, sizeof(mem_name));
+       strncpy(type_name, big_ram->instance_name, BIG_RAM_NAME_LEN);
+       strncpy(mem_name, big_ram->instance_name, BIG_RAM_NAME_LEN);
 
        /* Dump memory header */
        offset += qed_grc_dump_mem_hdr(p_hwfn,
index d2ad5e92c74f57a7d1fcd2f50ff296ba849071cd..5605289626585476478e946f89ec1f7d5e1866f4 100644 (file)
@@ -1149,18 +1149,10 @@ static int qed_calc_hw_mode(struct qed_hwfn *p_hwfn)
                return -EINVAL;
        }
 
-       switch (p_hwfn->cdev->mf_mode) {
-       case QED_MF_DEFAULT:
-       case QED_MF_NPAR:
-               hw_mode |= 1 << MODE_MF_SI;
-               break;
-       case QED_MF_OVLAN:
+       if (test_bit(QED_MF_OVLAN_CLSS, &p_hwfn->cdev->mf_bits))
                hw_mode |= 1 << MODE_MF_SD;
-               break;
-       default:
-               DP_NOTICE(p_hwfn, "Unsupported MF mode, init as DEFAULT\n");
+       else
                hw_mode |= 1 << MODE_MF_SI;
-       }
 
        hw_mode |= 1 << MODE_ASIC;
 
@@ -1507,6 +1499,11 @@ static int qed_hw_init_pf(struct qed_hwfn *p_hwfn,
                STORE_RT_REG(p_hwfn, NIG_REG_LLH_FUNC_TAG_EN_RT_OFFSET, 1);
                STORE_RT_REG(p_hwfn, NIG_REG_LLH_FUNC_TAG_VALUE_RT_OFFSET,
                             p_hwfn->hw_info.ovlan);
+
+               DP_VERBOSE(p_hwfn, NETIF_MSG_HW,
+                          "Configuring LLH_FUNC_FILTER_HDR_SEL\n");
+               STORE_RT_REG(p_hwfn, NIG_REG_LLH_FUNC_FILTER_HDR_SEL_RT_OFFSET,
+                            1);
        }
 
        /* Enable classification by MAC if needed */
@@ -1557,7 +1554,6 @@ static int qed_hw_init_pf(struct qed_hwfn *p_hwfn,
 
                /* send function start command */
                rc = qed_sp_pf_start(p_hwfn, p_ptt, p_tunn,
-                                    p_hwfn->cdev->mf_mode,
                                     allow_npar_tx_switch);
                if (rc) {
                        DP_NOTICE(p_hwfn, "Function start ramrod failed\n");
@@ -1644,6 +1640,7 @@ int qed_hw_init(struct qed_dev *cdev, struct qed_hw_init_params *p_params)
        bool b_default_mtu = true;
        struct qed_hwfn *p_hwfn;
        int rc = 0, mfw_rc, i;
+       u16 ether_type;
 
        if ((p_params->int_mode == QED_INT_MODE_MSI) && (cdev->num_hwfns > 1)) {
                DP_NOTICE(cdev, "MSI mode is not supported for CMT devices\n");
@@ -1677,6 +1674,24 @@ int qed_hw_init(struct qed_dev *cdev, struct qed_hw_init_params *p_params)
                if (rc)
                        return rc;
 
+               if (IS_PF(cdev) && (test_bit(QED_MF_8021Q_TAGGING,
+                                            &cdev->mf_bits) ||
+                                   test_bit(QED_MF_8021AD_TAGGING,
+                                            &cdev->mf_bits))) {
+                       if (test_bit(QED_MF_8021Q_TAGGING, &cdev->mf_bits))
+                               ether_type = ETH_P_8021Q;
+                       else
+                               ether_type = ETH_P_8021AD;
+                       STORE_RT_REG(p_hwfn, PRS_REG_TAG_ETHERTYPE_0_RT_OFFSET,
+                                    ether_type);
+                       STORE_RT_REG(p_hwfn, NIG_REG_TAG_ETHERTYPE_0_RT_OFFSET,
+                                    ether_type);
+                       STORE_RT_REG(p_hwfn, PBF_REG_TAG_ETHERTYPE_0_RT_OFFSET,
+                                    ether_type);
+                       STORE_RT_REG(p_hwfn, DORQ_REG_TAG1_ETHERTYPE_RT_OFFSET,
+                                    ether_type);
+               }
+
                qed_fill_load_req_params(&load_req_params,
                                         p_params->p_drv_load_params);
                rc = qed_mcp_load_req(p_hwfn, p_hwfn->p_main_ptt,
@@ -2639,31 +2654,57 @@ static int qed_hw_get_nvm_info(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
                   link->pause.autoneg,
                   p_caps->default_eee, p_caps->eee_lpi_timer);
 
-       /* Read Multi-function information from shmem */
-       addr = MCP_REG_SCRATCH + nvm_cfg1_offset +
-              offsetof(struct nvm_cfg1, glob) +
-              offsetof(struct nvm_cfg1_glob, generic_cont0);
+       if (IS_LEAD_HWFN(p_hwfn)) {
+               struct qed_dev *cdev = p_hwfn->cdev;
 
-       generic_cont0 = qed_rd(p_hwfn, p_ptt, addr);
+               /* Read Multi-function information from shmem */
+               addr = MCP_REG_SCRATCH + nvm_cfg1_offset +
+                      offsetof(struct nvm_cfg1, glob) +
+                      offsetof(struct nvm_cfg1_glob, generic_cont0);
 
-       mf_mode = (generic_cont0 & NVM_CFG1_GLOB_MF_MODE_MASK) >>
-                 NVM_CFG1_GLOB_MF_MODE_OFFSET;
+               generic_cont0 = qed_rd(p_hwfn, p_ptt, addr);
 
-       switch (mf_mode) {
-       case NVM_CFG1_GLOB_MF_MODE_MF_ALLOWED:
-               p_hwfn->cdev->mf_mode = QED_MF_OVLAN;
-               break;
-       case NVM_CFG1_GLOB_MF_MODE_NPAR1_0:
-               p_hwfn->cdev->mf_mode = QED_MF_NPAR;
-               break;
-       case NVM_CFG1_GLOB_MF_MODE_DEFAULT:
-               p_hwfn->cdev->mf_mode = QED_MF_DEFAULT;
-               break;
+               mf_mode = (generic_cont0 & NVM_CFG1_GLOB_MF_MODE_MASK) >>
+                         NVM_CFG1_GLOB_MF_MODE_OFFSET;
+
+               switch (mf_mode) {
+               case NVM_CFG1_GLOB_MF_MODE_MF_ALLOWED:
+                       cdev->mf_bits = BIT(QED_MF_OVLAN_CLSS);
+                       break;
+               case NVM_CFG1_GLOB_MF_MODE_UFP:
+                       cdev->mf_bits = BIT(QED_MF_OVLAN_CLSS) |
+                                       BIT(QED_MF_LLH_PROTO_CLSS) |
+                                       BIT(QED_MF_UFP_SPECIFIC) |
+                                       BIT(QED_MF_8021Q_TAGGING);
+                       break;
+               case NVM_CFG1_GLOB_MF_MODE_BD:
+                       cdev->mf_bits = BIT(QED_MF_OVLAN_CLSS) |
+                                       BIT(QED_MF_LLH_PROTO_CLSS) |
+                                       BIT(QED_MF_8021AD_TAGGING);
+                       break;
+               case NVM_CFG1_GLOB_MF_MODE_NPAR1_0:
+                       cdev->mf_bits = BIT(QED_MF_LLH_MAC_CLSS) |
+                                       BIT(QED_MF_LLH_PROTO_CLSS) |
+                                       BIT(QED_MF_LL2_NON_UNICAST) |
+                                       BIT(QED_MF_INTER_PF_SWITCH);
+                       break;
+               case NVM_CFG1_GLOB_MF_MODE_DEFAULT:
+                       cdev->mf_bits = BIT(QED_MF_LLH_MAC_CLSS) |
+                                       BIT(QED_MF_LLH_PROTO_CLSS) |
+                                       BIT(QED_MF_LL2_NON_UNICAST);
+                       if (QED_IS_BB(p_hwfn->cdev))
+                               cdev->mf_bits |= BIT(QED_MF_NEED_DEF_PF);
+                       break;
+               }
+
+               DP_INFO(p_hwfn, "Multi function mode is 0x%lx\n",
+                       cdev->mf_bits);
        }
-       DP_INFO(p_hwfn, "Multi function mode is %08x\n",
-               p_hwfn->cdev->mf_mode);
 
-       /* Read Multi-function information from shmem */
+       DP_INFO(p_hwfn, "Multi function mode is 0x%lx\n",
+               p_hwfn->cdev->mf_bits);
+
+       /* Read device capabilities information from shmem */
        addr = MCP_REG_SCRATCH + nvm_cfg1_offset +
                offsetof(struct nvm_cfg1, glob) +
                offsetof(struct nvm_cfg1_glob, device_capabilities);
@@ -2856,6 +2897,8 @@ qed_get_hw_info(struct qed_hwfn *p_hwfn,
                qed_mcp_cmd_port_init(p_hwfn, p_ptt);
 
                qed_get_eee_caps(p_hwfn, p_ptt);
+
+               qed_mcp_read_ufp_config(p_hwfn, p_ptt);
        }
 
        if (qed_mcp_is_init(p_hwfn)) {
@@ -3462,7 +3505,7 @@ int qed_llh_add_mac_filter(struct qed_hwfn *p_hwfn,
        u32 high = 0, low = 0, en;
        int i;
 
-       if (!(IS_MF_SI(p_hwfn) || IS_MF_DEFAULT(p_hwfn)))
+       if (!test_bit(QED_MF_LLH_MAC_CLSS, &p_hwfn->cdev->mf_bits))
                return 0;
 
        qed_llh_mac_to_filter(&high, &low, p_filter);
@@ -3507,7 +3550,7 @@ void qed_llh_remove_mac_filter(struct qed_hwfn *p_hwfn,
        u32 high = 0, low = 0;
        int i;
 
-       if (!(IS_MF_SI(p_hwfn) || IS_MF_DEFAULT(p_hwfn)))
+       if (!test_bit(QED_MF_LLH_MAC_CLSS, &p_hwfn->cdev->mf_bits))
                return;
 
        qed_llh_mac_to_filter(&high, &low, p_filter);
@@ -3549,7 +3592,7 @@ qed_llh_add_protocol_filter(struct qed_hwfn *p_hwfn,
        u32 high = 0, low = 0, en;
        int i;
 
-       if (!(IS_MF_SI(p_hwfn) || IS_MF_DEFAULT(p_hwfn)))
+       if (!test_bit(QED_MF_LLH_PROTO_CLSS, &p_hwfn->cdev->mf_bits))
                return 0;
 
        switch (type) {
@@ -3647,7 +3690,7 @@ qed_llh_remove_protocol_filter(struct qed_hwfn *p_hwfn,
        u32 high = 0, low = 0;
        int i;
 
-       if (!(IS_MF_SI(p_hwfn) || IS_MF_DEFAULT(p_hwfn)))
+       if (!test_bit(QED_MF_LLH_PROTO_CLSS, &p_hwfn->cdev->mf_bits))
                return;
 
        switch (type) {
index 2dc9b312a795e75191d2603496dbb42d7e00945b..cc1b373c0ace56e08564d3527de9f5da3f87b4e4 100644 (file)
@@ -313,6 +313,9 @@ qed_sp_fcoe_conn_offload(struct qed_hwfn *p_hwfn,
        p_data->d_id.addr_mid = p_conn->d_id.addr_mid;
        p_data->d_id.addr_lo = p_conn->d_id.addr_lo;
        p_data->flags = p_conn->flags;
+       if (test_bit(QED_MF_UFP_SPECIFIC, &p_hwfn->cdev->mf_bits))
+               SET_FIELD(p_data->flags,
+                         FCOE_CONN_OFFLOAD_RAMROD_DATA_B_SINGLE_VLAN, 1);
        p_data->def_q_idx = p_conn->def_q_idx;
 
        return qed_spq_post(p_hwfn, p_ent, NULL);
index 7f5ec42dde484f3c7f99d2cac5600cf286292eb9..b5f70eff0182df2e1958e49a63b1fc91b5f0e6f2 100644 (file)
@@ -11993,6 +11993,16 @@ struct public_port {
 #define EEE_REMOTE_TW_TX_OFFSET 0
 #define EEE_REMOTE_TW_RX_MASK   0xffff0000
 #define EEE_REMOTE_TW_RX_OFFSET 16
+
+       u32 oem_cfg_port;
+#define OEM_CFG_CHANNEL_TYPE_MASK                       0x00000003
+#define OEM_CFG_CHANNEL_TYPE_OFFSET                     0
+#define OEM_CFG_CHANNEL_TYPE_VLAN_PARTITION             0x1
+#define OEM_CFG_CHANNEL_TYPE_STAGGED                    0x2
+#define OEM_CFG_SCHED_TYPE_MASK                         0x0000000C
+#define OEM_CFG_SCHED_TYPE_OFFSET                       2
+#define OEM_CFG_SCHED_TYPE_ETS                          0x1
+#define OEM_CFG_SCHED_TYPE_VNIC_BW                      0x2
 };
 
 struct public_func {
@@ -12069,6 +12079,23 @@ struct public_func {
 #define DRV_ID_DRV_INIT_HW_MASK                0x80000000
 #define DRV_ID_DRV_INIT_HW_SHIFT       31
 #define DRV_ID_DRV_INIT_HW_FLAG                (1 << DRV_ID_DRV_INIT_HW_SHIFT)
+
+       u32 oem_cfg_func;
+#define OEM_CFG_FUNC_TC_MASK                    0x0000000F
+#define OEM_CFG_FUNC_TC_OFFSET                  0
+#define OEM_CFG_FUNC_TC_0                       0x0
+#define OEM_CFG_FUNC_TC_1                       0x1
+#define OEM_CFG_FUNC_TC_2                       0x2
+#define OEM_CFG_FUNC_TC_3                       0x3
+#define OEM_CFG_FUNC_TC_4                       0x4
+#define OEM_CFG_FUNC_TC_5                       0x5
+#define OEM_CFG_FUNC_TC_6                       0x6
+#define OEM_CFG_FUNC_TC_7                       0x7
+
+#define OEM_CFG_FUNC_HOST_PRI_CTRL_MASK         0x00000030
+#define OEM_CFG_FUNC_HOST_PRI_CTRL_OFFSET       4
+#define OEM_CFG_FUNC_HOST_PRI_CTRL_VNIC         0x1
+#define OEM_CFG_FUNC_HOST_PRI_CTRL_OS           0x2
 };
 
 struct mcp_mac {
@@ -12495,6 +12522,7 @@ enum MFW_DRV_MSG_TYPE {
        MFW_DRV_MSG_BW_UPDATE10,
        MFW_DRV_MSG_TRANSCEIVER_STATE_CHANGE,
        MFW_DRV_MSG_BW_UPDATE11,
+       MFW_DRV_MSG_OEM_CFG_UPDATE,
        MFW_DRV_MSG_MAX
 };
 
index 8b1b7e8ca56c3b6e41f80e6644a7d0605c1a542f..5e655c3601cf7c6fb1ea0dd53a1d642163b2e89f 100644 (file)
@@ -115,8 +115,7 @@ int qed_l2_alloc(struct qed_hwfn *p_hwfn)
 
 void qed_l2_setup(struct qed_hwfn *p_hwfn)
 {
-       if (p_hwfn->hw_info.personality != QED_PCI_ETH &&
-           p_hwfn->hw_info.personality != QED_PCI_ETH_ROCE)
+       if (!QED_IS_L2_PERSONALITY(p_hwfn))
                return;
 
        mutex_init(&p_hwfn->p_l2_info->lock);
@@ -126,8 +125,7 @@ void qed_l2_free(struct qed_hwfn *p_hwfn)
 {
        u32 i;
 
-       if (p_hwfn->hw_info.personality != QED_PCI_ETH &&
-           p_hwfn->hw_info.personality != QED_PCI_ETH_ROCE)
+       if (!QED_IS_L2_PERSONALITY(p_hwfn))
                return;
 
        if (!p_hwfn->p_l2_info)
index 74fc626b1ec1695818da49cbcd2d495d432edceb..c97ebd681c471196cb4135deafbf8e07efc9d615 100644 (file)
@@ -292,6 +292,7 @@ static void qed_ll2_txq_flush(struct qed_hwfn *p_hwfn, u8 connection_handle)
        struct qed_ll2_tx_packet *p_pkt = NULL;
        struct qed_ll2_info *p_ll2_conn;
        struct qed_ll2_tx_queue *p_tx;
+       unsigned long flags = 0;
        dma_addr_t tx_frag;
 
        p_ll2_conn = qed_ll2_handle_sanity_inactive(p_hwfn, connection_handle);
@@ -300,6 +301,7 @@ static void qed_ll2_txq_flush(struct qed_hwfn *p_hwfn, u8 connection_handle)
 
        p_tx = &p_ll2_conn->tx_queue;
 
+       spin_lock_irqsave(&p_tx->lock, flags);
        while (!list_empty(&p_tx->active_descq)) {
                p_pkt = list_first_entry(&p_tx->active_descq,
                                         struct qed_ll2_tx_packet, list_entry);
@@ -309,6 +311,7 @@ static void qed_ll2_txq_flush(struct qed_hwfn *p_hwfn, u8 connection_handle)
                list_del(&p_pkt->list_entry);
                b_last_packet = list_empty(&p_tx->active_descq);
                list_add_tail(&p_pkt->list_entry, &p_tx->free_descq);
+               spin_unlock_irqrestore(&p_tx->lock, flags);
                if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_OOO) {
                        struct qed_ooo_buffer *p_buffer;
 
@@ -328,7 +331,9 @@ static void qed_ll2_txq_flush(struct qed_hwfn *p_hwfn, u8 connection_handle)
                                                      b_last_frag,
                                                      b_last_packet);
                }
+               spin_lock_irqsave(&p_tx->lock, flags);
        }
+       spin_unlock_irqrestore(&p_tx->lock, flags);
 }
 
 static int qed_ll2_txq_completion(struct qed_hwfn *p_hwfn, void *p_cookie)
@@ -556,6 +561,7 @@ static void qed_ll2_rxq_flush(struct qed_hwfn *p_hwfn, u8 connection_handle)
        struct qed_ll2_info *p_ll2_conn = NULL;
        struct qed_ll2_rx_packet *p_pkt = NULL;
        struct qed_ll2_rx_queue *p_rx;
+       unsigned long flags = 0;
 
        p_ll2_conn = qed_ll2_handle_sanity_inactive(p_hwfn, connection_handle);
        if (!p_ll2_conn)
@@ -563,13 +569,14 @@ static void qed_ll2_rxq_flush(struct qed_hwfn *p_hwfn, u8 connection_handle)
 
        p_rx = &p_ll2_conn->rx_queue;
 
+       spin_lock_irqsave(&p_rx->lock, flags);
        while (!list_empty(&p_rx->active_descq)) {
                p_pkt = list_first_entry(&p_rx->active_descq,
                                         struct qed_ll2_rx_packet, list_entry);
                if (!p_pkt)
                        break;
-
                list_move_tail(&p_pkt->list_entry, &p_rx->free_descq);
+               spin_unlock_irqrestore(&p_rx->lock, flags);
 
                if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_OOO) {
                        struct qed_ooo_buffer *p_buffer;
@@ -588,7 +595,30 @@ static void qed_ll2_rxq_flush(struct qed_hwfn *p_hwfn, u8 connection_handle)
                                                      cookie,
                                                      rx_buf_addr, b_last);
                }
+               spin_lock_irqsave(&p_rx->lock, flags);
        }
+       spin_unlock_irqrestore(&p_rx->lock, flags);
+}
+
+static bool
+qed_ll2_lb_rxq_handler_slowpath(struct qed_hwfn *p_hwfn,
+                               struct core_rx_slow_path_cqe *p_cqe)
+{
+       struct ooo_opaque *iscsi_ooo;
+       u32 cid;
+
+       if (p_cqe->ramrod_cmd_id != CORE_RAMROD_RX_QUEUE_FLUSH)
+               return false;
+
+       iscsi_ooo = (struct ooo_opaque *)&p_cqe->opaque_data;
+       if (iscsi_ooo->ooo_opcode != TCP_EVENT_DELETE_ISLES)
+               return false;
+
+       /* Need to make a flush */
+       cid = le32_to_cpu(iscsi_ooo->cid);
+       qed_ooo_release_connection_isles(p_hwfn, p_hwfn->p_ooo_info, cid);
+
+       return true;
 }
 
 static int qed_ll2_lb_rxq_handler(struct qed_hwfn *p_hwfn,
@@ -617,6 +647,11 @@ static int qed_ll2_lb_rxq_handler(struct qed_hwfn *p_hwfn,
                cq_old_idx = qed_chain_get_cons_idx(&p_rx->rcq_chain);
                cqe_type = cqe->rx_cqe_sp.type;
 
+               if (cqe_type == CORE_RX_CQE_TYPE_SLOW_PATH)
+                       if (qed_ll2_lb_rxq_handler_slowpath(p_hwfn,
+                                                           &cqe->rx_cqe_sp))
+                               continue;
+
                if (cqe_type != CORE_RX_CQE_TYPE_REGULAR) {
                        DP_NOTICE(p_hwfn,
                                  "Got a non-regular LB LL2 completion [type 0x%02x]\n",
@@ -794,6 +829,9 @@ static int qed_ll2_lb_rxq_completion(struct qed_hwfn *p_hwfn, void *p_cookie)
        struct qed_ll2_info *p_ll2_conn = (struct qed_ll2_info *)p_cookie;
        int rc;
 
+       if (!QED_LL2_RX_REGISTERED(p_ll2_conn))
+               return 0;
+
        rc = qed_ll2_lb_rxq_handler(p_hwfn, p_ll2_conn);
        if (rc)
                return rc;
@@ -814,6 +852,9 @@ static int qed_ll2_lb_txq_completion(struct qed_hwfn *p_hwfn, void *p_cookie)
        u16 new_idx = 0, num_bds = 0;
        int rc;
 
+       if (!QED_LL2_TX_REGISTERED(p_ll2_conn))
+               return 0;
+
        new_idx = le16_to_cpu(*p_tx->p_fw_cons);
        num_bds = ((s16)new_idx - (s16)p_tx->bds_idx);
 
@@ -919,12 +960,16 @@ static int qed_sp_ll2_rx_queue_start(struct qed_hwfn *p_hwfn,
        p_ramrod->drop_ttl0_flg = p_ll2_conn->input.rx_drop_ttl0_flg;
        p_ramrod->inner_vlan_stripping_en =
                p_ll2_conn->input.rx_vlan_removal_en;
+
+       if (test_bit(QED_MF_UFP_SPECIFIC, &p_hwfn->cdev->mf_bits) &&
+           p_ll2_conn->input.conn_type == QED_LL2_TYPE_FCOE)
+               p_ramrod->report_outer_vlan = 1;
        p_ramrod->queue_id = p_ll2_conn->queue_id;
        p_ramrod->main_func_queue = p_ll2_conn->main_func_queue ? 1 : 0;
 
-       if ((IS_MF_DEFAULT(p_hwfn) || IS_MF_SI(p_hwfn)) &&
-           p_ramrod->main_func_queue && (conn_type != QED_LL2_TYPE_ROCE) &&
-           (conn_type != QED_LL2_TYPE_IWARP)) {
+       if (test_bit(QED_MF_LL2_NON_UNICAST, &p_hwfn->cdev->mf_bits) &&
+           p_ramrod->main_func_queue && conn_type != QED_LL2_TYPE_ROCE &&
+           conn_type != QED_LL2_TYPE_IWARP) {
                p_ramrod->mf_si_bcast_accept_all = 1;
                p_ramrod->mf_si_mcast_accept_all = 1;
        } else {
@@ -1493,11 +1538,12 @@ int qed_ll2_establish_connection(void *cxt, u8 connection_handle)
        qed_ll2_establish_connection_ooo(p_hwfn, p_ll2_conn);
 
        if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_FCOE) {
+               if (!test_bit(QED_MF_UFP_SPECIFIC, &p_hwfn->cdev->mf_bits))
+                       qed_llh_add_protocol_filter(p_hwfn, p_ptt,
+                                                   ETH_P_FCOE, 0,
+                                                   QED_LLH_FILTER_ETHERTYPE);
                qed_llh_add_protocol_filter(p_hwfn, p_ptt,
-                                           0x8906, 0,
-                                           QED_LLH_FILTER_ETHERTYPE);
-               qed_llh_add_protocol_filter(p_hwfn, p_ptt,
-                                           0x8914, 0,
+                                           ETH_P_FIP, 0,
                                            QED_LLH_FILTER_ETHERTYPE);
        }
 
@@ -1653,11 +1699,16 @@ qed_ll2_prepare_tx_packet_set_bd(struct qed_hwfn *p_hwfn,
 
        start_bd = (struct core_tx_bd *)qed_chain_produce(p_tx_chain);
        if (QED_IS_IWARP_PERSONALITY(p_hwfn) &&
-           p_ll2->input.conn_type == QED_LL2_TYPE_OOO)
+           p_ll2->input.conn_type == QED_LL2_TYPE_OOO) {
                start_bd->nw_vlan_or_lb_echo =
                    cpu_to_le16(IWARP_LL2_IN_ORDER_TX_QUEUE);
-       else
+       } else {
                start_bd->nw_vlan_or_lb_echo = cpu_to_le16(pkt->vlan);
+               if (test_bit(QED_MF_UFP_SPECIFIC, &p_hwfn->cdev->mf_bits) &&
+                   p_ll2->input.conn_type == QED_LL2_TYPE_FCOE)
+                       pkt->remove_stag = true;
+       }
+
        SET_FIELD(start_bd->bitfield1, CORE_TX_BD_L4_HDR_OFFSET_W,
                  cpu_to_le16(pkt->l4_hdr_offset_w));
        SET_FIELD(start_bd->bitfield1, CORE_TX_BD_TX_DST, tx_dest);
@@ -1668,6 +1719,9 @@ qed_ll2_prepare_tx_packet_set_bd(struct qed_hwfn *p_hwfn,
        SET_FIELD(bd_data, CORE_TX_BD_DATA_IP_CSUM, !!(pkt->enable_ip_cksum));
        SET_FIELD(bd_data, CORE_TX_BD_DATA_L4_CSUM, !!(pkt->enable_l4_cksum));
        SET_FIELD(bd_data, CORE_TX_BD_DATA_IP_LEN, !!(pkt->calc_ip_len));
+       SET_FIELD(bd_data, CORE_TX_BD_DATA_DISABLE_STAG_INSERTION,
+                 !!(pkt->remove_stag));
+
        start_bd->bd_data.as_bitfield = cpu_to_le16(bd_data);
        DMA_REGPAIR_LE(start_bd->addr, pkt->first_frag);
        start_bd->nbytes = cpu_to_le16(pkt->first_frag_len);
@@ -1867,28 +1921,37 @@ int qed_ll2_terminate_connection(void *cxt, u8 connection_handle)
 
        /* Stop Tx & Rx of connection, if needed */
        if (QED_LL2_TX_REGISTERED(p_ll2_conn)) {
+               p_ll2_conn->tx_queue.b_cb_registred = false;
+               smp_wmb(); /* Make sure this is seen by ll2_lb_rxq_completion */
                rc = qed_sp_ll2_tx_queue_stop(p_hwfn, p_ll2_conn);
                if (rc)
                        goto out;
+
                qed_ll2_txq_flush(p_hwfn, connection_handle);
+               qed_int_unregister_cb(p_hwfn, p_ll2_conn->tx_queue.tx_sb_index);
        }
 
        if (QED_LL2_RX_REGISTERED(p_ll2_conn)) {
+               p_ll2_conn->rx_queue.b_cb_registred = false;
+               smp_wmb(); /* Make sure this is seen by ll2_lb_rxq_completion */
                rc = qed_sp_ll2_rx_queue_stop(p_hwfn, p_ll2_conn);
                if (rc)
                        goto out;
+
                qed_ll2_rxq_flush(p_hwfn, connection_handle);
+               qed_int_unregister_cb(p_hwfn, p_ll2_conn->rx_queue.rx_sb_index);
        }
 
        if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_OOO)
                qed_ooo_release_all_isles(p_hwfn, p_hwfn->p_ooo_info);
 
        if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_FCOE) {
+               if (!test_bit(QED_MF_UFP_SPECIFIC, &p_hwfn->cdev->mf_bits))
+                       qed_llh_remove_protocol_filter(p_hwfn, p_ptt,
+                                                      ETH_P_FCOE, 0,
+                                                     QED_LLH_FILTER_ETHERTYPE);
                qed_llh_remove_protocol_filter(p_hwfn, p_ptt,
-                                              0x8906, 0,
-                                              QED_LLH_FILTER_ETHERTYPE);
-               qed_llh_remove_protocol_filter(p_hwfn, p_ptt,
-                                              0x8914, 0,
+                                              ETH_P_FIP, 0,
                                               QED_LLH_FILTER_ETHERTYPE);
        }
 
@@ -1925,16 +1988,6 @@ void qed_ll2_release_connection(void *cxt, u8 connection_handle)
        if (!p_ll2_conn)
                return;
 
-       if (QED_LL2_RX_REGISTERED(p_ll2_conn)) {
-               p_ll2_conn->rx_queue.b_cb_registred = false;
-               qed_int_unregister_cb(p_hwfn, p_ll2_conn->rx_queue.rx_sb_index);
-       }
-
-       if (QED_LL2_TX_REGISTERED(p_ll2_conn)) {
-               p_ll2_conn->tx_queue.b_cb_registred = false;
-               qed_int_unregister_cb(p_hwfn, p_ll2_conn->tx_queue.tx_sb_index);
-       }
-
        kfree(p_ll2_conn->tx_queue.descq_mem);
        qed_chain_free(p_hwfn->cdev, &p_ll2_conn->tx_queue.txq_chain);
 
@@ -2360,7 +2413,8 @@ static int qed_ll2_stop(struct qed_dev *cdev)
        return -EINVAL;
 }
 
-static int qed_ll2_start_xmit(struct qed_dev *cdev, struct sk_buff *skb)
+static int qed_ll2_start_xmit(struct qed_dev *cdev, struct sk_buff *skb,
+                             unsigned long xmit_flags)
 {
        struct qed_ll2_tx_pkt_info pkt;
        const skb_frag_t *frag;
@@ -2370,7 +2424,7 @@ static int qed_ll2_start_xmit(struct qed_dev *cdev, struct sk_buff *skb)
        u8 flags = 0;
 
        if (unlikely(skb->ip_summed != CHECKSUM_NONE)) {
-               DP_INFO(cdev, "Cannot transmit a checksumed packet\n");
+               DP_INFO(cdev, "Cannot transmit a checksummed packet\n");
                return -EINVAL;
        }
 
@@ -2405,6 +2459,9 @@ static int qed_ll2_start_xmit(struct qed_dev *cdev, struct sk_buff *skb)
        pkt.first_frag = mapping;
        pkt.first_frag_len = skb->len;
        pkt.cookie = skb;
+       if (test_bit(QED_MF_UFP_SPECIFIC, &cdev->mf_bits) &&
+           test_bit(QED_LL2_XMIT_FLAGS_FIP_DISCOVERY, &xmit_flags))
+               pkt.remove_stag = true;
 
        rc = qed_ll2_prepare_tx_packet(&cdev->hwfns[0], cdev->ll2->handle,
                                       &pkt, 1);
index d1d3787affe86d6216c6f64e34cdf7d4d477075f..9feed3b79cd47585609b1e6d89b29fbd66358bcb 100644 (file)
@@ -264,7 +264,6 @@ int qed_fill_dev_info(struct qed_dev *cdev,
        dev_info->pci_mem_end = cdev->pci_params.mem_end;
        dev_info->pci_irq = cdev->pci_params.irq;
        dev_info->rdma_supported = QED_IS_RDMA_PERSONALITY(p_hwfn);
-       dev_info->is_mf_default = IS_MF_DEFAULT(&cdev->hwfns[0]);
        dev_info->dev_type = cdev->type;
        ether_addr_copy(dev_info->hw_mac, hw_info->hw_mac_addr);
 
@@ -273,7 +272,8 @@ int qed_fill_dev_info(struct qed_dev *cdev,
                dev_info->fw_minor = FW_MINOR_VERSION;
                dev_info->fw_rev = FW_REVISION_VERSION;
                dev_info->fw_eng = FW_ENGINEERING_VERSION;
-               dev_info->mf_mode = cdev->mf_mode;
+               dev_info->b_inter_pf_switch = test_bit(QED_MF_INTER_PF_SWITCH,
+                                                      &cdev->mf_bits);
                dev_info->tx_switching = true;
 
                if (hw_info->b_wol_support == QED_WOL_SUPPORT_PME)
@@ -680,7 +680,7 @@ static int qed_nic_stop(struct qed_dev *cdev)
                        tasklet_disable(p_hwfn->sp_dpc);
                        p_hwfn->b_sp_dpc_enabled = false;
                        DP_VERBOSE(cdev, NETIF_MSG_IFDOWN,
-                                  "Disabled sp taskelt [hwfn %d] at %p\n",
+                                  "Disabled sp tasklet [hwfn %d] at %p\n",
                                   i, p_hwfn->sp_dpc);
                }
        }
index 0550f0ee11b3a817e336073274b6c8ca891604f9..e80f5e7c7992041b8e8a98e00575ca2ac1430460 100644 (file)
@@ -40,6 +40,7 @@
 #include <linux/string.h>
 #include <linux/etherdevice.h>
 #include "qed.h"
+#include "qed_cxt.h"
 #include "qed_dcbx.h"
 #include "qed_hsi.h"
 #include "qed_hw.h"
@@ -1486,6 +1487,80 @@ static void qed_mcp_update_stag(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
                    &resp, &param);
 }
 
+void qed_mcp_read_ufp_config(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+{
+       struct public_func shmem_info;
+       u32 port_cfg, val;
+
+       if (!test_bit(QED_MF_UFP_SPECIFIC, &p_hwfn->cdev->mf_bits))
+               return;
+
+       memset(&p_hwfn->ufp_info, 0, sizeof(p_hwfn->ufp_info));
+       port_cfg = qed_rd(p_hwfn, p_ptt, p_hwfn->mcp_info->port_addr +
+                         offsetof(struct public_port, oem_cfg_port));
+       val = (port_cfg & OEM_CFG_CHANNEL_TYPE_MASK) >>
+               OEM_CFG_CHANNEL_TYPE_OFFSET;
+       if (val != OEM_CFG_CHANNEL_TYPE_STAGGED)
+               DP_NOTICE(p_hwfn, "Incorrect UFP Channel type  %d\n", val);
+
+       val = (port_cfg & OEM_CFG_SCHED_TYPE_MASK) >> OEM_CFG_SCHED_TYPE_OFFSET;
+       if (val == OEM_CFG_SCHED_TYPE_ETS) {
+               p_hwfn->ufp_info.mode = QED_UFP_MODE_ETS;
+       } else if (val == OEM_CFG_SCHED_TYPE_VNIC_BW) {
+               p_hwfn->ufp_info.mode = QED_UFP_MODE_VNIC_BW;
+       } else {
+               p_hwfn->ufp_info.mode = QED_UFP_MODE_UNKNOWN;
+               DP_NOTICE(p_hwfn, "Unknown UFP scheduling mode %d\n", val);
+       }
+
+       qed_mcp_get_shmem_func(p_hwfn, p_ptt, &shmem_info, MCP_PF_ID(p_hwfn));
+       val = (port_cfg & OEM_CFG_FUNC_TC_MASK) >> OEM_CFG_FUNC_TC_OFFSET;
+       p_hwfn->ufp_info.tc = (u8)val;
+       val = (port_cfg & OEM_CFG_FUNC_HOST_PRI_CTRL_MASK) >>
+               OEM_CFG_FUNC_HOST_PRI_CTRL_OFFSET;
+       if (val == OEM_CFG_FUNC_HOST_PRI_CTRL_VNIC) {
+               p_hwfn->ufp_info.pri_type = QED_UFP_PRI_VNIC;
+       } else if (val == OEM_CFG_FUNC_HOST_PRI_CTRL_OS) {
+               p_hwfn->ufp_info.pri_type = QED_UFP_PRI_OS;
+       } else {
+               p_hwfn->ufp_info.pri_type = QED_UFP_PRI_UNKNOWN;
+               DP_NOTICE(p_hwfn, "Unknown Host priority control %d\n", val);
+       }
+
+       DP_NOTICE(p_hwfn,
+                 "UFP shmem config: mode = %d tc = %d pri_type = %d\n",
+                 p_hwfn->ufp_info.mode,
+                 p_hwfn->ufp_info.tc, p_hwfn->ufp_info.pri_type);
+}
+
+static int
+qed_mcp_handle_ufp_event(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+{
+       qed_mcp_read_ufp_config(p_hwfn, p_ptt);
+
+       if (p_hwfn->ufp_info.mode == QED_UFP_MODE_VNIC_BW) {
+               p_hwfn->qm_info.ooo_tc = p_hwfn->ufp_info.tc;
+               p_hwfn->hw_info.offload_tc = p_hwfn->ufp_info.tc;
+
+               qed_qm_reconf(p_hwfn, p_ptt);
+       } else if (p_hwfn->ufp_info.mode == QED_UFP_MODE_ETS) {
+               /* Merge UFP TC with the dcbx TC data */
+               qed_dcbx_mib_update_event(p_hwfn, p_ptt,
+                                         QED_DCBX_OPERATIONAL_MIB);
+       } else {
+               DP_ERR(p_hwfn, "Invalid sched type, discard the UFP config\n");
+               return -EINVAL;
+       }
+
+       /* update storm FW with negotiation results */
+       qed_sp_pf_update_ufp(p_hwfn);
+
+       /* update stag pcp value */
+       qed_sp_pf_update_stag(p_hwfn);
+
+       return 0;
+}
+
 int qed_mcp_handle_events(struct qed_hwfn *p_hwfn,
                          struct qed_ptt *p_ptt)
 {
@@ -1529,6 +1604,9 @@ int qed_mcp_handle_events(struct qed_hwfn *p_hwfn,
                        qed_dcbx_mib_update_event(p_hwfn, p_ptt,
                                                  QED_DCBX_OPERATIONAL_MIB);
                        break;
+               case MFW_DRV_MSG_OEM_CFG_UPDATE:
+                       qed_mcp_handle_ufp_event(p_hwfn, p_ptt);
+                       break;
                case MFW_DRV_MSG_TRANSCEIVER_STATE_CHANGE:
                        qed_mcp_handle_transceiver_change(p_hwfn, p_ptt);
                        break;
index 3af3896420b97c5dcebd403dc7a4cdba65f82f10..250579ba632bde6bebeb90eaca8a55f7e97bbd6a 100644 (file)
@@ -1004,6 +1004,14 @@ int qed_mcp_get_capabilities(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
  */
 int qed_mcp_set_capabilities(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
 
+/**
+ * @brief Read ufp config from the shared memory.
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ */
+void qed_mcp_read_ufp_config(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
+
 /**
  * @brief Populate the nvm info shadow in the given hardware function
  *
index fb7c2d1562ae7a1fca7a5345e2aecf4a8dc2da7d..6acfd43c1a4fdc3ad02ac0320dcf0a4049ceae1a 100644 (file)
@@ -848,7 +848,7 @@ int qed_roce_query_qp(struct qed_hwfn *p_hwfn,
 
        if (!(qp->resp_offloaded)) {
                DP_NOTICE(p_hwfn,
-                         "The responder's qp should be offloded before requester's\n");
+                         "The responder's qp should be offloaded before requester's\n");
                return -EINVAL;
        }
 
index ab4ad8a1e2a5e3a9e1a846d82341468719073ea1..e95431f6acd46fb6ace4c20cfe227388c890cdea 100644 (file)
@@ -416,7 +416,6 @@ int qed_sp_init_request(struct qed_hwfn *p_hwfn,
  * @param p_hwfn
  * @param p_ptt
  * @param p_tunn
- * @param mode
  * @param allow_npar_tx_switch
  *
  * @return int
@@ -425,7 +424,7 @@ int qed_sp_init_request(struct qed_hwfn *p_hwfn,
 int qed_sp_pf_start(struct qed_hwfn *p_hwfn,
                    struct qed_ptt *p_ptt,
                    struct qed_tunnel_info *p_tunn,
-                   enum qed_mf_mode mode, bool allow_npar_tx_switch);
+                   bool allow_npar_tx_switch);
 
 /**
  * @brief qed_sp_pf_update - PF Function Update Ramrod
@@ -463,6 +462,15 @@ int qed_sp_pf_update_stag(struct qed_hwfn *p_hwfn);
  * @return int
  */
 
+/**
+ * @brief qed_sp_pf_update_ufp - PF ufp update Ramrod
+ *
+ * @param p_hwfn
+ *
+ * @return int
+ */
+int qed_sp_pf_update_ufp(struct qed_hwfn *p_hwfn);
+
 int qed_sp_pf_stop(struct qed_hwfn *p_hwfn);
 
 int qed_sp_pf_update_tunn_cfg(struct qed_hwfn *p_hwfn,
index 5e927b6cac221c187ad37f77d4a5563c83e9bc90..8de644b4721efd63a7d3efa410139228d0b2f739 100644 (file)
@@ -306,7 +306,7 @@ qed_tunn_set_pf_start_params(struct qed_hwfn *p_hwfn,
 int qed_sp_pf_start(struct qed_hwfn *p_hwfn,
                    struct qed_ptt *p_ptt,
                    struct qed_tunnel_info *p_tunn,
-                   enum qed_mf_mode mode, bool allow_npar_tx_switch)
+                   bool allow_npar_tx_switch)
 {
        struct pf_start_ramrod_data *p_ramrod = NULL;
        u16 sb = qed_int_get_sp_sb_id(p_hwfn);
@@ -314,7 +314,7 @@ int qed_sp_pf_start(struct qed_hwfn *p_hwfn,
        struct qed_spq_entry *p_ent = NULL;
        struct qed_sp_init_data init_data;
        int rc = -EINVAL;
-       u8 page_cnt;
+       u8 page_cnt, i;
 
        /* update initial eq producer */
        qed_eq_prod_update(p_hwfn,
@@ -339,21 +339,36 @@ int qed_sp_pf_start(struct qed_hwfn *p_hwfn,
        p_ramrod->dont_log_ramrods      = 0;
        p_ramrod->log_type_mask         = cpu_to_le16(0xf);
 
-       switch (mode) {
-       case QED_MF_DEFAULT:
-       case QED_MF_NPAR:
-               p_ramrod->mf_mode = MF_NPAR;
-               break;
-       case QED_MF_OVLAN:
+       if (test_bit(QED_MF_OVLAN_CLSS, &p_hwfn->cdev->mf_bits))
                p_ramrod->mf_mode = MF_OVLAN;
-               break;
-       default:
-               DP_NOTICE(p_hwfn, "Unsupported MF mode, init as DEFAULT\n");
+       else
                p_ramrod->mf_mode = MF_NPAR;
-       }
 
        p_ramrod->outer_tag_config.outer_tag.tci =
-               cpu_to_le16(p_hwfn->hw_info.ovlan);
+                               cpu_to_le16(p_hwfn->hw_info.ovlan);
+       if (test_bit(QED_MF_8021Q_TAGGING, &p_hwfn->cdev->mf_bits)) {
+               p_ramrod->outer_tag_config.outer_tag.tpid = ETH_P_8021Q;
+       } else if (test_bit(QED_MF_8021AD_TAGGING, &p_hwfn->cdev->mf_bits)) {
+               p_ramrod->outer_tag_config.outer_tag.tpid = ETH_P_8021AD;
+               p_ramrod->outer_tag_config.enable_stag_pri_change = 1;
+       }
+
+       p_ramrod->outer_tag_config.pri_map_valid = 1;
+       for (i = 0; i < QED_MAX_PFC_PRIORITIES; i++)
+               p_ramrod->outer_tag_config.inner_to_outer_pri_map[i] = i;
+
+       /* enable_stag_pri_change should be set if port is in BD mode or,
+        * UFP with Host Control mode.
+        */
+       if (test_bit(QED_MF_UFP_SPECIFIC, &p_hwfn->cdev->mf_bits)) {
+               if (p_hwfn->ufp_info.pri_type == QED_UFP_PRI_OS)
+                       p_ramrod->outer_tag_config.enable_stag_pri_change = 1;
+               else
+                       p_ramrod->outer_tag_config.enable_stag_pri_change = 0;
+
+               p_ramrod->outer_tag_config.outer_tag.tci |=
+                   cpu_to_le16(((u16)p_hwfn->ufp_info.tc << 13));
+       }
 
        /* Place EQ address in RAMROD */
        DMA_REGPAIR_LE(p_ramrod->event_ring_pbl_addr,
@@ -365,7 +380,7 @@ int qed_sp_pf_start(struct qed_hwfn *p_hwfn,
 
        qed_tunn_set_pf_start_params(p_hwfn, p_tunn, &p_ramrod->tunnel_config);
 
-       if (IS_MF_SI(p_hwfn))
+       if (test_bit(QED_MF_INTER_PF_SWITCH, &p_hwfn->cdev->mf_bits))
                p_ramrod->allow_npar_tx_switching = allow_npar_tx_switch;
 
        switch (p_hwfn->hw_info.personality) {
@@ -434,6 +449,39 @@ int qed_sp_pf_update(struct qed_hwfn *p_hwfn)
        return qed_spq_post(p_hwfn, p_ent, NULL);
 }
 
+int qed_sp_pf_update_ufp(struct qed_hwfn *p_hwfn)
+{
+       struct qed_spq_entry *p_ent = NULL;
+       struct qed_sp_init_data init_data;
+       int rc = -EOPNOTSUPP;
+
+       if (p_hwfn->ufp_info.pri_type == QED_UFP_PRI_UNKNOWN) {
+               DP_INFO(p_hwfn, "Invalid priority type %d\n",
+                       p_hwfn->ufp_info.pri_type);
+               return -EINVAL;
+       }
+
+       /* Get SPQ entry */
+       memset(&init_data, 0, sizeof(init_data));
+       init_data.cid = qed_spq_get_cid(p_hwfn);
+       init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
+       init_data.comp_mode = QED_SPQ_MODE_CB;
+
+       rc = qed_sp_init_request(p_hwfn, &p_ent,
+                                COMMON_RAMROD_PF_UPDATE, PROTOCOLID_COMMON,
+                                &init_data);
+       if (rc)
+               return rc;
+
+       p_ent->ramrod.pf_update.update_enable_stag_pri_change = true;
+       if (p_hwfn->ufp_info.pri_type == QED_UFP_PRI_OS)
+               p_ent->ramrod.pf_update.enable_stag_pri_change = 1;
+       else
+               p_ent->ramrod.pf_update.enable_stag_pri_change = 0;
+
+       return qed_spq_post(p_hwfn, p_ent, NULL);
+}
+
 /* Set pf update ramrod command params */
 int qed_sp_pf_update_tunn_cfg(struct qed_hwfn *p_hwfn,
                              struct qed_ptt *p_ptt,
index 9935978c55424f37a0e038b0c4c2b7ac1944ef6b..2d3f09ed413b489b87275126c6cdef676815b4d9 100644 (file)
@@ -290,15 +290,12 @@ struct qede_agg_info {
         * aggregation.
         */
        struct sw_rx_data buffer;
-       dma_addr_t buffer_mapping;
-
        struct sk_buff *skb;
 
        /* We need some structs from the start cookie until termination */
        u16 vlan_tag;
-       u16 start_cqe_bd_len;
-       u8 start_cqe_placement_offset;
 
+       bool tpa_start_fail;
        u8 state;
        u8 frag_id;
 
index ecbf1ded7a399c179f16d997800d70e49d6b9697..8c6fdad91986f7577338c5336d805848517951e5 100644 (file)
@@ -1508,7 +1508,8 @@ static int qede_selftest_receive_traffic(struct qede_dev *edev)
                len =  le16_to_cpu(fp_cqe->len_on_first_bd);
                data_ptr = (u8 *)(page_address(sw_rx_data->data) +
                                  fp_cqe->placement_offset +
-                                 sw_rx_data->page_offset);
+                                 sw_rx_data->page_offset +
+                                 rxq->rx_headroom);
                if (ether_addr_equal(data_ptr,  edev->ndev->dev_addr) &&
                    ether_addr_equal(data_ptr + ETH_ALEN,
                                     edev->ndev->dev_addr)) {
index 14941303189dcffcbb1d302d925389a785c257d0..6c702399b801db1cf88f247ebcdedea93a0d24f9 100644 (file)
@@ -660,7 +660,8 @@ static int qede_fill_frag_skb(struct qede_dev *edev,
 
        /* Add one frag and update the appropriate fields in the skb */
        skb_fill_page_desc(skb, tpa_info->frag_id++,
-                          current_bd->data, current_bd->page_offset,
+                          current_bd->data,
+                          current_bd->page_offset + rxq->rx_headroom,
                           len_on_bd);
 
        if (unlikely(qede_realloc_rx_buffer(rxq, current_bd))) {
@@ -671,8 +672,7 @@ static int qede_fill_frag_skb(struct qede_dev *edev,
                goto out;
        }
 
-       qed_chain_consume(&rxq->rx_bd_ring);
-       rxq->sw_rx_cons++;
+       qede_rx_bd_ring_consume(rxq);
 
        skb->data_len += len_on_bd;
        skb->truesize += rxq->rx_buf_seg_size;
@@ -721,64 +721,129 @@ static u8 qede_check_tunn_csum(u16 flag)
        return QEDE_CSUM_UNNECESSARY | tcsum;
 }
 
+static inline struct sk_buff *
+qede_build_skb(struct qede_rx_queue *rxq,
+              struct sw_rx_data *bd, u16 len, u16 pad)
+{
+       struct sk_buff *skb;
+       void *buf;
+
+       buf = page_address(bd->data) + bd->page_offset;
+       skb = build_skb(buf, rxq->rx_buf_seg_size);
+
+       skb_reserve(skb, pad);
+       skb_put(skb, len);
+
+       return skb;
+}
+
+static struct sk_buff *
+qede_tpa_rx_build_skb(struct qede_dev *edev,
+                     struct qede_rx_queue *rxq,
+                     struct sw_rx_data *bd, u16 len, u16 pad,
+                     bool alloc_skb)
+{
+       struct sk_buff *skb;
+
+       skb = qede_build_skb(rxq, bd, len, pad);
+       bd->page_offset += rxq->rx_buf_seg_size;
+
+       if (bd->page_offset == PAGE_SIZE) {
+               if (unlikely(qede_alloc_rx_buffer(rxq, true))) {
+                       DP_NOTICE(edev,
+                                 "Failed to allocate RX buffer for tpa start\n");
+                       bd->page_offset -= rxq->rx_buf_seg_size;
+                       page_ref_inc(bd->data);
+                       dev_kfree_skb_any(skb);
+                       return NULL;
+               }
+       } else {
+               page_ref_inc(bd->data);
+               qede_reuse_page(rxq, bd);
+       }
+
+       /* We've consumed the first BD and prepared an SKB */
+       qede_rx_bd_ring_consume(rxq);
+
+       return skb;
+}
+
+static struct sk_buff *
+qede_rx_build_skb(struct qede_dev *edev,
+                 struct qede_rx_queue *rxq,
+                 struct sw_rx_data *bd, u16 len, u16 pad)
+{
+       struct sk_buff *skb = NULL;
+
+       /* For smaller frames still need to allocate skb, memcpy
+        * data and benefit in reusing the page segment instead of
+        * un-mapping it.
+        */
+       if ((len + pad <= edev->rx_copybreak)) {
+               unsigned int offset = bd->page_offset + pad;
+
+               skb = netdev_alloc_skb(edev->ndev, QEDE_RX_HDR_SIZE);
+               if (unlikely(!skb))
+                       return NULL;
+
+               skb_reserve(skb, pad);
+               memcpy(skb_put(skb, len),
+                      page_address(bd->data) + offset, len);
+               qede_reuse_page(rxq, bd);
+               goto out;
+       }
+
+       skb = qede_build_skb(rxq, bd, len, pad);
+
+       if (unlikely(qede_realloc_rx_buffer(rxq, bd))) {
+               /* Incr page ref count to reuse on allocation failure so
+                * that it doesn't get freed while freeing SKB [as its
+                * already mapped there].
+                */
+               page_ref_inc(bd->data);
+               dev_kfree_skb_any(skb);
+               return NULL;
+       }
+out:
+       /* We've consumed the first BD and prepared an SKB */
+       qede_rx_bd_ring_consume(rxq);
+
+       return skb;
+}
+
 static void qede_tpa_start(struct qede_dev *edev,
                           struct qede_rx_queue *rxq,
                           struct eth_fast_path_rx_tpa_start_cqe *cqe)
 {
        struct qede_agg_info *tpa_info = &rxq->tpa_info[cqe->tpa_agg_index];
-       struct eth_rx_bd *rx_bd_cons = qed_chain_consume(&rxq->rx_bd_ring);
-       struct eth_rx_bd *rx_bd_prod = qed_chain_produce(&rxq->rx_bd_ring);
-       struct sw_rx_data *replace_buf = &tpa_info->buffer;
-       dma_addr_t mapping = tpa_info->buffer_mapping;
        struct sw_rx_data *sw_rx_data_cons;
-       struct sw_rx_data *sw_rx_data_prod;
+       u16 pad;
 
        sw_rx_data_cons = &rxq->sw_rx_ring[rxq->sw_rx_cons & NUM_RX_BDS_MAX];
-       sw_rx_data_prod = &rxq->sw_rx_ring[rxq->sw_rx_prod & NUM_RX_BDS_MAX];
+       pad = cqe->placement_offset + rxq->rx_headroom;
 
-       /* Use pre-allocated replacement buffer - we can't release the agg.
-        * start until its over and we don't want to risk allocation failing
-        * here, so re-allocate when aggregation will be over.
-        */
-       sw_rx_data_prod->mapping = replace_buf->mapping;
-
-       sw_rx_data_prod->data = replace_buf->data;
-       rx_bd_prod->addr.hi = cpu_to_le32(upper_32_bits(mapping));
-       rx_bd_prod->addr.lo = cpu_to_le32(lower_32_bits(mapping));
-       sw_rx_data_prod->page_offset = replace_buf->page_offset;
-
-       rxq->sw_rx_prod++;
+       tpa_info->skb = qede_tpa_rx_build_skb(edev, rxq, sw_rx_data_cons,
+                                             le16_to_cpu(cqe->len_on_first_bd),
+                                             pad, false);
+       tpa_info->buffer.page_offset = sw_rx_data_cons->page_offset;
+       tpa_info->buffer.mapping = sw_rx_data_cons->mapping;
 
-       /* move partial skb from cons to pool (don't unmap yet)
-        * save mapping, incase we drop the packet later on.
-        */
-       tpa_info->buffer = *sw_rx_data_cons;
-       mapping = HILO_U64(le32_to_cpu(rx_bd_cons->addr.hi),
-                          le32_to_cpu(rx_bd_cons->addr.lo));
-
-       tpa_info->buffer_mapping = mapping;
-       rxq->sw_rx_cons++;
-
-       /* set tpa state to start only if we are able to allocate skb
-        * for this aggregation, otherwise mark as error and aggregation will
-        * be dropped
-        */
-       tpa_info->skb = netdev_alloc_skb(edev->ndev,
-                                        le16_to_cpu(cqe->len_on_first_bd));
        if (unlikely(!tpa_info->skb)) {
                DP_NOTICE(edev, "Failed to allocate SKB for gro\n");
+
+               /* Consume from ring but do not produce since
+                * this might be used by FW still, it will be re-used
+                * at TPA end.
+                */
+               tpa_info->tpa_start_fail = true;
+               qede_rx_bd_ring_consume(rxq);
                tpa_info->state = QEDE_AGG_STATE_ERROR;
                goto cons_buf;
        }
 
-       /* Start filling in the aggregation info */
-       skb_put(tpa_info->skb, le16_to_cpu(cqe->len_on_first_bd));
        tpa_info->frag_id = 0;
        tpa_info->state = QEDE_AGG_STATE_START;
 
-       /* Store some information from first CQE */
-       tpa_info->start_cqe_placement_offset = cqe->placement_offset;
-       tpa_info->start_cqe_bd_len = le16_to_cpu(cqe->len_on_first_bd);
        if ((le16_to_cpu(cqe->pars_flags.flags) >>
             PARSING_AND_ERR_FLAGS_TAG8021QEXIST_SHIFT) &
            PARSING_AND_ERR_FLAGS_TAG8021QEXIST_MASK)
@@ -899,6 +964,10 @@ static int qede_tpa_end(struct qede_dev *edev,
        tpa_info = &rxq->tpa_info[cqe->tpa_agg_index];
        skb = tpa_info->skb;
 
+       if (tpa_info->buffer.page_offset == PAGE_SIZE)
+               dma_unmap_page(rxq->dev, tpa_info->buffer.mapping,
+                              PAGE_SIZE, rxq->data_direction);
+
        for (i = 0; cqe->len_list[i]; i++)
                qede_fill_frag_skb(edev, rxq, cqe->tpa_agg_index,
                                   le16_to_cpu(cqe->len_list[i]));
@@ -919,11 +988,6 @@ static int qede_tpa_end(struct qede_dev *edev,
                       "Strange - total packet len [cqe] is %4x but SKB has len %04x\n",
                       le16_to_cpu(cqe->total_packet_len), skb->len);
 
-       memcpy(skb->data,
-              page_address(tpa_info->buffer.data) +
-              tpa_info->start_cqe_placement_offset +
-              tpa_info->buffer.page_offset, tpa_info->start_cqe_bd_len);
-
        /* Finalize the SKB */
        skb->protocol = eth_type_trans(skb, edev->ndev);
        skb->ip_summed = CHECKSUM_UNNECESSARY;
@@ -940,6 +1004,12 @@ static int qede_tpa_end(struct qede_dev *edev,
        return 1;
 err:
        tpa_info->state = QEDE_AGG_STATE_NONE;
+
+       if (tpa_info->tpa_start_fail) {
+               qede_reuse_page(rxq, &tpa_info->buffer);
+               tpa_info->tpa_start_fail = false;
+       }
+
        dev_kfree_skb_any(tpa_info->skb);
        tpa_info->skb = NULL;
        return 0;
@@ -1058,65 +1128,6 @@ static bool qede_rx_xdp(struct qede_dev *edev,
        return false;
 }
 
-static struct sk_buff *qede_rx_allocate_skb(struct qede_dev *edev,
-                                           struct qede_rx_queue *rxq,
-                                           struct sw_rx_data *bd, u16 len,
-                                           u16 pad)
-{
-       unsigned int offset = bd->page_offset + pad;
-       struct skb_frag_struct *frag;
-       struct page *page = bd->data;
-       unsigned int pull_len;
-       struct sk_buff *skb;
-       unsigned char *va;
-
-       /* Allocate a new SKB with a sufficient large header len */
-       skb = netdev_alloc_skb(edev->ndev, QEDE_RX_HDR_SIZE);
-       if (unlikely(!skb))
-               return NULL;
-
-       /* Copy data into SKB - if it's small, we can simply copy it and
-        * re-use the already allcoated & mapped memory.
-        */
-       if (len + pad <= edev->rx_copybreak) {
-               skb_put_data(skb, page_address(page) + offset, len);
-               qede_reuse_page(rxq, bd);
-               goto out;
-       }
-
-       frag = &skb_shinfo(skb)->frags[0];
-
-       skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
-                       page, offset, len, rxq->rx_buf_seg_size);
-
-       va = skb_frag_address(frag);
-       pull_len = eth_get_headlen(va, QEDE_RX_HDR_SIZE);
-
-       /* Align the pull_len to optimize memcpy */
-       memcpy(skb->data, va, ALIGN(pull_len, sizeof(long)));
-
-       /* Correct the skb & frag sizes offset after the pull */
-       skb_frag_size_sub(frag, pull_len);
-       frag->page_offset += pull_len;
-       skb->data_len -= pull_len;
-       skb->tail += pull_len;
-
-       if (unlikely(qede_realloc_rx_buffer(rxq, bd))) {
-               /* Incr page ref count to reuse on allocation failure so
-                * that it doesn't get freed while freeing SKB [as its
-                * already mapped there].
-                */
-               page_ref_inc(page);
-               dev_kfree_skb_any(skb);
-               return NULL;
-       }
-
-out:
-       /* We've consumed the first BD and prepared an SKB */
-       qede_rx_bd_ring_consume(rxq);
-       return skb;
-}
-
 static int qede_rx_build_jumbo(struct qede_dev *edev,
                               struct qede_rx_queue *rxq,
                               struct sk_buff *skb,
@@ -1157,7 +1168,7 @@ static int qede_rx_build_jumbo(struct qede_dev *edev,
                               PAGE_SIZE, DMA_FROM_DEVICE);
 
                skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags++,
-                                  bd->data, 0, cur_size);
+                                  bd->data, rxq->rx_headroom, cur_size);
 
                skb->truesize += PAGE_SIZE;
                skb->data_len += cur_size;
@@ -1256,7 +1267,7 @@ static int qede_rx_process_cqe(struct qede_dev *edev,
        /* Basic validation passed; Need to prepare an SKB. This would also
         * guarantee to finally consume the first BD upon success.
         */
-       skb = qede_rx_allocate_skb(edev, rxq, bd, len, pad);
+       skb = qede_rx_build_skb(edev, rxq, bd, len, pad);
        if (!skb) {
                rxq->rx_alloc_errors++;
                qede_recycle_rx_bd_ring(rxq, fp_cqe->bd_num);
index a01e7d6e5442f079e9006811b82b4feb02dc23bc..7abaf27405309a251c48acb0c5e3eb7ffd8384f4 100644 (file)
@@ -199,7 +199,7 @@ static int qede_sriov_configure(struct pci_dev *pdev, int num_vfs_param)
 
        /* Enable/Disable Tx switching for PF */
        if ((rc == num_vfs_param) && netif_running(edev->ndev) &&
-           qed_info->mf_mode != QED_MF_NPAR && qed_info->tx_switching) {
+           !qed_info->b_inter_pf_switch && qed_info->tx_switching) {
                vport_params->vport_id = 0;
                vport_params->update_tx_switching_flg = 1;
                vport_params->tx_switching_flg = num_vfs_param ? 1 : 0;
@@ -1066,13 +1066,12 @@ static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode)
 
        DP_INFO(edev, "Starting qede_remove\n");
 
+       qede_rdma_dev_remove(edev);
        unregister_netdev(ndev);
        cancel_delayed_work_sync(&edev->sp_task);
 
        qede_ptp_disable(edev);
 
-       qede_rdma_dev_remove(edev);
-
        edev->ops->common->set_power_state(cdev, PCI_D0);
 
        pci_set_drvdata(pdev, NULL);
@@ -1197,30 +1196,8 @@ static void qede_free_rx_buffers(struct qede_dev *edev,
        }
 }
 
-static void qede_free_sge_mem(struct qede_dev *edev, struct qede_rx_queue *rxq)
-{
-       int i;
-
-       if (edev->gro_disable)
-               return;
-
-       for (i = 0; i < ETH_TPA_MAX_AGGS_NUM; i++) {
-               struct qede_agg_info *tpa_info = &rxq->tpa_info[i];
-               struct sw_rx_data *replace_buf = &tpa_info->buffer;
-
-               if (replace_buf->data) {
-                       dma_unmap_page(&edev->pdev->dev,
-                                      replace_buf->mapping,
-                                      PAGE_SIZE, DMA_FROM_DEVICE);
-                       __free_page(replace_buf->data);
-               }
-       }
-}
-
 static void qede_free_mem_rxq(struct qede_dev *edev, struct qede_rx_queue *rxq)
 {
-       qede_free_sge_mem(edev, rxq);
-
        /* Free rx buffers */
        qede_free_rx_buffers(edev, rxq);
 
@@ -1232,45 +1209,15 @@ static void qede_free_mem_rxq(struct qede_dev *edev, struct qede_rx_queue *rxq)
        edev->ops->common->chain_free(edev->cdev, &rxq->rx_comp_ring);
 }
 
-static int qede_alloc_sge_mem(struct qede_dev *edev, struct qede_rx_queue *rxq)
+static void qede_set_tpa_param(struct qede_rx_queue *rxq)
 {
-       dma_addr_t mapping;
        int i;
 
-       if (edev->gro_disable)
-               return 0;
-
        for (i = 0; i < ETH_TPA_MAX_AGGS_NUM; i++) {
                struct qede_agg_info *tpa_info = &rxq->tpa_info[i];
-               struct sw_rx_data *replace_buf = &tpa_info->buffer;
 
-               replace_buf->data = alloc_pages(GFP_ATOMIC, 0);
-               if (unlikely(!replace_buf->data)) {
-                       DP_NOTICE(edev,
-                                 "Failed to allocate TPA skb pool [replacement buffer]\n");
-                       goto err;
-               }
-
-               mapping = dma_map_page(&edev->pdev->dev, replace_buf->data, 0,
-                                      PAGE_SIZE, DMA_FROM_DEVICE);
-               if (unlikely(dma_mapping_error(&edev->pdev->dev, mapping))) {
-                       DP_NOTICE(edev,
-                                 "Failed to map TPA replacement buffer\n");
-                       goto err;
-               }
-
-               replace_buf->mapping = mapping;
-               tpa_info->buffer.page_offset = 0;
-               tpa_info->buffer_mapping = mapping;
                tpa_info->state = QEDE_AGG_STATE_NONE;
        }
-
-       return 0;
-err:
-       qede_free_sge_mem(edev, rxq);
-       edev->gro_disable = 1;
-       edev->ndev->features &= ~NETIF_F_GRO_HW;
-       return -ENOMEM;
 }
 
 /* This function allocates all memory needed per Rx queue */
@@ -1281,19 +1228,24 @@ static int qede_alloc_mem_rxq(struct qede_dev *edev, struct qede_rx_queue *rxq)
        rxq->num_rx_buffers = edev->q_num_rx_buffers;
 
        rxq->rx_buf_size = NET_IP_ALIGN + ETH_OVERHEAD + edev->ndev->mtu;
-       rxq->rx_headroom = edev->xdp_prog ? XDP_PACKET_HEADROOM : 0;
+
+       rxq->rx_headroom = edev->xdp_prog ? XDP_PACKET_HEADROOM : NET_SKB_PAD;
+       size = rxq->rx_headroom +
+              SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
 
        /* Make sure that the headroom and  payload fit in a single page */
-       if (rxq->rx_buf_size + rxq->rx_headroom > PAGE_SIZE)
-               rxq->rx_buf_size = PAGE_SIZE - rxq->rx_headroom;
+       if (rxq->rx_buf_size + size > PAGE_SIZE)
+               rxq->rx_buf_size = PAGE_SIZE - size;
 
-       /* Segment size to spilt a page in multiple equal parts,
+       /* Segment size to spilt a page in multiple equal parts ,
         * unless XDP is used in which case we'd use the entire page.
         */
-       if (!edev->xdp_prog)
-               rxq->rx_buf_seg_size = roundup_pow_of_two(rxq->rx_buf_size);
-       else
+       if (!edev->xdp_prog) {
+               size = size + rxq->rx_buf_size;
+               rxq->rx_buf_seg_size = roundup_pow_of_two(size);
+       } else {
                rxq->rx_buf_seg_size = PAGE_SIZE;
+       }
 
        /* Allocate the parallel driver ring for Rx buffers */
        size = sizeof(*rxq->sw_rx_ring) * RX_RING_SIZE;
@@ -1337,7 +1289,8 @@ static int qede_alloc_mem_rxq(struct qede_dev *edev, struct qede_rx_queue *rxq)
                }
        }
 
-       rc = qede_alloc_sge_mem(edev, rxq);
+       if (!edev->gro_disable)
+               qede_set_tpa_param(rxq);
 err:
        return rc;
 }
@@ -1928,7 +1881,7 @@ static int qede_start_queues(struct qede_dev *edev, bool clear_stats)
        vport_update_params->update_vport_active_flg = 1;
        vport_update_params->vport_active_flg = 1;
 
-       if ((qed_info->mf_mode == QED_MF_NPAR || pci_num_vf(edev->pdev)) &&
+       if ((qed_info->b_inter_pf_switch || pci_num_vf(edev->pdev)) &&
            qed_info->tx_switching) {
                vport_update_params->update_tx_switching_flg = 1;
                vport_update_params->tx_switching_flg = 1;
index 50b142fad6b8206c806a91248a0b7ac9f5b196d7..1900bf7e67d1297dc9b24648e99d0ec50f779305 100644 (file)
@@ -238,7 +238,7 @@ qede_rdma_get_free_event_node(struct qede_dev *edev)
        }
 
        if (!found) {
-               event_node = kzalloc(sizeof(*event_node), GFP_KERNEL);
+               event_node = kzalloc(sizeof(*event_node), GFP_ATOMIC);
                if (!event_node) {
                        DP_NOTICE(edev,
                                  "qedr: Could not allocate memory for rdma work\n");
index d5a32b7c7dc5a4d97c89ba9d33ca769e51c00daf..031f6e6ee9c17af99c91cc62f12af3e5e3e097be 100644 (file)
@@ -683,10 +683,11 @@ static int emac_tx_q_desc_alloc(struct emac_adapter *adpt,
                                struct emac_tx_queue *tx_q)
 {
        struct emac_ring_header *ring_header = &adpt->ring_header;
+       int node = dev_to_node(adpt->netdev->dev.parent);
        size_t size;
 
        size = sizeof(struct emac_buffer) * tx_q->tpd.count;
-       tx_q->tpd.tpbuff = kzalloc(size, GFP_KERNEL);
+       tx_q->tpd.tpbuff = kzalloc_node(size, GFP_KERNEL, node);
        if (!tx_q->tpd.tpbuff)
                return -ENOMEM;
 
@@ -723,11 +724,12 @@ static void emac_rx_q_bufs_free(struct emac_adapter *adpt)
 static int emac_rx_descs_alloc(struct emac_adapter *adpt)
 {
        struct emac_ring_header *ring_header = &adpt->ring_header;
+       int node = dev_to_node(adpt->netdev->dev.parent);
        struct emac_rx_queue *rx_q = &adpt->rx_q;
        size_t size;
 
        size = sizeof(struct emac_buffer) * rx_q->rfd.count;
-       rx_q->rfd.rfbuff = kzalloc(size, GFP_KERNEL);
+       rx_q->rfd.rfbuff = kzalloc_node(size, GFP_KERNEL, node);
        if (!rx_q->rfd.rfbuff)
                return -ENOMEM;
 
@@ -920,14 +922,13 @@ static void emac_mac_rx_descs_refill(struct emac_adapter *adpt,
 static void emac_adjust_link(struct net_device *netdev)
 {
        struct emac_adapter *adpt = netdev_priv(netdev);
-       struct emac_sgmii *sgmii = &adpt->phy;
        struct phy_device *phydev = netdev->phydev;
 
        if (phydev->link) {
                emac_mac_start(adpt);
-               sgmii->link_up(adpt);
+               emac_sgmii_link_change(adpt, true);
        } else {
-               sgmii->link_down(adpt);
+               emac_sgmii_link_change(adpt, false);
                emac_mac_stop(adpt);
        }
 
index e8ab512ee7e3450babda0c2ff1062fc253861d93..562420b834dfa5c5e9a9a74a019d2382fe0f53d9 100644 (file)
 
 #define SERDES_START_WAIT_TIMES                        100
 
+int emac_sgmii_init(struct emac_adapter *adpt)
+{
+       if (!(adpt->phy.sgmii_ops && adpt->phy.sgmii_ops->init))
+               return 0;
+
+       return adpt->phy.sgmii_ops->init(adpt);
+}
+
+int emac_sgmii_open(struct emac_adapter *adpt)
+{
+       if (!(adpt->phy.sgmii_ops && adpt->phy.sgmii_ops->open))
+               return 0;
+
+       return adpt->phy.sgmii_ops->open(adpt);
+}
+
+void emac_sgmii_close(struct emac_adapter *adpt)
+{
+       if (!(adpt->phy.sgmii_ops && adpt->phy.sgmii_ops->close))
+               return;
+
+       adpt->phy.sgmii_ops->close(adpt);
+}
+
+int emac_sgmii_link_change(struct emac_adapter *adpt, bool link_state)
+{
+       if (!(adpt->phy.sgmii_ops && adpt->phy.sgmii_ops->link_change))
+               return 0;
+
+       return adpt->phy.sgmii_ops->link_change(adpt, link_state);
+}
+
+void emac_sgmii_reset(struct emac_adapter *adpt)
+{
+       if (!(adpt->phy.sgmii_ops && adpt->phy.sgmii_ops->reset))
+               return;
+
+       adpt->phy.sgmii_ops->reset(adpt);
+}
+
 /* Initialize the SGMII link between the internal and external PHYs. */
 static void emac_sgmii_link_init(struct emac_adapter *adpt)
 {
@@ -163,21 +203,21 @@ static void emac_sgmii_reset_prepare(struct emac_adapter *adpt)
        msleep(50);
 }
 
-void emac_sgmii_reset(struct emac_adapter *adpt)
+static void emac_sgmii_common_reset(struct emac_adapter *adpt)
 {
        int ret;
 
        emac_sgmii_reset_prepare(adpt);
        emac_sgmii_link_init(adpt);
 
-       ret = adpt->phy.initialize(adpt);
+       ret = emac_sgmii_init(adpt);
        if (ret)
                netdev_err(adpt->netdev,
                           "could not reinitialize internal PHY (error=%i)\n",
                           ret);
 }
 
-static int emac_sgmii_open(struct emac_adapter *adpt)
+static int emac_sgmii_common_open(struct emac_adapter *adpt)
 {
        struct emac_sgmii *sgmii = &adpt->phy;
        int ret;
@@ -201,43 +241,53 @@ static int emac_sgmii_open(struct emac_adapter *adpt)
        return 0;
 }
 
-static int emac_sgmii_close(struct emac_adapter *adpt)
+static void emac_sgmii_common_close(struct emac_adapter *adpt)
 {
        struct emac_sgmii *sgmii = &adpt->phy;
 
        /* Make sure interrupts are disabled */
        writel(0, sgmii->base + EMAC_SGMII_PHY_INTERRUPT_MASK);
        free_irq(sgmii->irq, adpt);
-
-       return 0;
 }
 
 /* The error interrupts are only valid after the link is up */
-static int emac_sgmii_link_up(struct emac_adapter *adpt)
+static int emac_sgmii_common_link_change(struct emac_adapter *adpt, bool linkup)
 {
        struct emac_sgmii *sgmii = &adpt->phy;
        int ret;
 
-       /* Clear and enable interrupts */
-       ret = emac_sgmii_irq_clear(adpt, 0xff);
-       if (ret)
-               return ret;
+       if (linkup) {
+               /* Clear and enable interrupts */
+               ret = emac_sgmii_irq_clear(adpt, 0xff);
+               if (ret)
+                       return ret;
 
-       writel(SGMII_ISR_MASK, sgmii->base + EMAC_SGMII_PHY_INTERRUPT_MASK);
+               writel(SGMII_ISR_MASK,
+                      sgmii->base + EMAC_SGMII_PHY_INTERRUPT_MASK);
+       } else {
+               /* Disable interrupts */
+               writel(0, sgmii->base + EMAC_SGMII_PHY_INTERRUPT_MASK);
+               synchronize_irq(sgmii->irq);
+       }
 
        return 0;
 }
 
-static int emac_sgmii_link_down(struct emac_adapter *adpt)
-{
-       struct emac_sgmii *sgmii = &adpt->phy;
-
-       /* Disable interrupts */
-       writel(0, sgmii->base + EMAC_SGMII_PHY_INTERRUPT_MASK);
-       synchronize_irq(sgmii->irq);
+static struct sgmii_ops qdf2432_ops = {
+       .init = emac_sgmii_init_qdf2432,
+       .open = emac_sgmii_common_open,
+       .close = emac_sgmii_common_close,
+       .link_change = emac_sgmii_common_link_change,
+       .reset = emac_sgmii_common_reset,
+};
 
-       return 0;
-}
+static struct sgmii_ops qdf2400_ops = {
+       .init = emac_sgmii_init_qdf2400,
+       .open = emac_sgmii_common_open,
+       .close = emac_sgmii_common_close,
+       .link_change = emac_sgmii_common_link_change,
+       .reset = emac_sgmii_common_reset,
+};
 
 static int emac_sgmii_acpi_match(struct device *dev, void *data)
 {
@@ -249,7 +299,7 @@ static int emac_sgmii_acpi_match(struct device *dev, void *data)
                {}
        };
        const struct acpi_device_id *id = acpi_match_device(match_table, dev);
-       emac_sgmii_function *initialize = data;
+       struct sgmii_ops **ops = data;
 
        if (id) {
                acpi_handle handle = ACPI_HANDLE(dev);
@@ -270,10 +320,10 @@ static int emac_sgmii_acpi_match(struct device *dev, void *data)
 
                switch (hrv) {
                case 1:
-                       *initialize = emac_sgmii_init_qdf2432;
+                       *ops = &qdf2432_ops;
                        return 1;
                case 2:
-                       *initialize = emac_sgmii_init_qdf2400;
+                       *ops = &qdf2400_ops;
                        return 1;
                }
        }
@@ -294,14 +344,6 @@ static const struct of_device_id emac_sgmii_dt_match[] = {
        {}
 };
 
-/* Dummy function for systems without an internal PHY. This avoids having
- * to check for NULL pointers before calling the functions.
- */
-static int emac_sgmii_dummy(struct emac_adapter *adpt)
-{
-       return 0;
-}
-
 int emac_sgmii_config(struct platform_device *pdev, struct emac_adapter *adpt)
 {
        struct platform_device *sgmii_pdev = NULL;
@@ -312,22 +354,11 @@ int emac_sgmii_config(struct platform_device *pdev, struct emac_adapter *adpt)
        if (has_acpi_companion(&pdev->dev)) {
                struct device *dev;
 
-               dev = device_find_child(&pdev->dev, &phy->initialize,
+               dev = device_find_child(&pdev->dev, &phy->sgmii_ops,
                                        emac_sgmii_acpi_match);
 
                if (!dev) {
                        dev_warn(&pdev->dev, "cannot find internal phy node\n");
-                       /* There is typically no internal PHY on emulation
-                        * systems, so if we can't find the node, assume
-                        * we are on an emulation system and stub-out
-                        * support for the internal PHY.  These systems only
-                        * use ACPI.
-                        */
-                       phy->open = emac_sgmii_dummy;
-                       phy->close = emac_sgmii_dummy;
-                       phy->link_up = emac_sgmii_dummy;
-                       phy->link_down = emac_sgmii_dummy;
-
                        return 0;
                }
 
@@ -355,14 +386,9 @@ int emac_sgmii_config(struct platform_device *pdev, struct emac_adapter *adpt)
                        goto error_put_device;
                }
 
-               phy->initialize = (emac_sgmii_function)match->data;
+               phy->sgmii_ops->init = match->data;
        }
 
-       phy->open = emac_sgmii_open;
-       phy->close = emac_sgmii_close;
-       phy->link_up = emac_sgmii_link_up;
-       phy->link_down = emac_sgmii_link_down;
-
        /* Base address is the first address */
        res = platform_get_resource(sgmii_pdev, IORESOURCE_MEM, 0);
        if (!res) {
@@ -386,7 +412,7 @@ int emac_sgmii_config(struct platform_device *pdev, struct emac_adapter *adpt)
                }
        }
 
-       ret = phy->initialize(adpt);
+       ret = emac_sgmii_init(adpt);
        if (ret)
                goto error;
 
index e7c0c3b2baa444a64823f2fec608c6566498c090..31ba21eb61d2b36dd0b670062a0509bec56e7530 100644 (file)
 struct emac_adapter;
 struct platform_device;
 
-typedef int (*emac_sgmii_function)(struct emac_adapter *adpt);
+/** emac_sgmii - internal emac phy
+ * @init initialization function
+ * @open called when the driver is opened
+ * @close called when the driver is closed
+ * @link_change called when the link state changes
+ */
+struct sgmii_ops {
+       int (*init)(struct emac_adapter *adpt);
+       int (*open)(struct emac_adapter *adpt);
+       void (*close)(struct emac_adapter *adpt);
+       int (*link_change)(struct emac_adapter *adpt, bool link_state);
+       void (*reset)(struct emac_adapter *adpt);
+};
 
 /** emac_sgmii - internal emac phy
  * @base base address
  * @digital per-lane digital block
  * @irq the interrupt number
  * @decode_error_count reference count of consecutive decode errors
- * @initialize initialization function
- * @open called when the driver is opened
- * @close called when the driver is closed
- * @link_up called when the link comes up
- * @link_down called when the link comes down
+ * @sgmii_ops sgmii ops
  */
 struct emac_sgmii {
        void __iomem            *base;
        void __iomem            *digital;
        unsigned int            irq;
        atomic_t                decode_error_count;
-       emac_sgmii_function     initialize;
-       emac_sgmii_function     open;
-       emac_sgmii_function     close;
-       emac_sgmii_function     link_up;
-       emac_sgmii_function     link_down;
+       struct  sgmii_ops       *sgmii_ops;
 };
 
 int emac_sgmii_config(struct platform_device *pdev, struct emac_adapter *adpt);
-void emac_sgmii_reset(struct emac_adapter *adpt);
 
 int emac_sgmii_init_fsm9900(struct emac_adapter *adpt);
 int emac_sgmii_init_qdf2432(struct emac_adapter *adpt);
 int emac_sgmii_init_qdf2400(struct emac_adapter *adpt);
 
+int emac_sgmii_init(struct emac_adapter *adpt);
+int emac_sgmii_open(struct emac_adapter *adpt);
+void emac_sgmii_close(struct emac_adapter *adpt);
+int emac_sgmii_link_change(struct emac_adapter *adpt, bool link_state);
+void emac_sgmii_reset(struct emac_adapter *adpt);
 #endif
index 13235baf476633df5b3ecfd5b4aa90c1519328b9..2a0cbc535a2ed5d527f6a622093d653766c1082d 100644 (file)
@@ -253,7 +253,7 @@ static int emac_open(struct net_device *netdev)
                return ret;
        }
 
-       ret = adpt->phy.open(adpt);
+       ret = emac_sgmii_open(adpt);
        if (ret) {
                emac_mac_rx_tx_rings_free_all(adpt);
                free_irq(irq->irq, irq);
@@ -264,7 +264,7 @@ static int emac_open(struct net_device *netdev)
        if (ret) {
                emac_mac_rx_tx_rings_free_all(adpt);
                free_irq(irq->irq, irq);
-               adpt->phy.close(adpt);
+               emac_sgmii_close(adpt);
                return ret;
        }
 
@@ -278,7 +278,7 @@ static int emac_close(struct net_device *netdev)
 
        mutex_lock(&adpt->reset_lock);
 
-       adpt->phy.close(adpt);
+       emac_sgmii_close(adpt);
        emac_mac_down(adpt);
        emac_mac_rx_tx_rings_free_all(adpt);
 
@@ -761,11 +761,10 @@ static void emac_shutdown(struct platform_device *pdev)
 {
        struct net_device *netdev = dev_get_drvdata(&pdev->dev);
        struct emac_adapter *adpt = netdev_priv(netdev);
-       struct emac_sgmii *sgmii = &adpt->phy;
 
        if (netdev->flags & IFF_UP) {
                /* Closing the SGMII turns off its interrupts */
-               sgmii->close(adpt);
+               emac_sgmii_close(adpt);
 
                /* Resetting the MAC turns off all DMA and its interrupts */
                emac_mac_reset(adpt);
index 0b5b5da801988324d687752b7fabb14e5487da0c..34ac45a774e72e3f28c8e888ec9c4b8c24bc3086 100644 (file)
@@ -54,11 +54,24 @@ struct rmnet_pcpu_stats {
        struct u64_stats_sync syncp;
 };
 
+struct rmnet_priv_stats {
+       u64 csum_ok;
+       u64 csum_valid_unset;
+       u64 csum_validation_failed;
+       u64 csum_err_bad_buffer;
+       u64 csum_err_invalid_ip_version;
+       u64 csum_err_invalid_transport;
+       u64 csum_fragmented_pkt;
+       u64 csum_skipped;
+       u64 csum_sw;
+};
+
 struct rmnet_priv {
        u8 mux_id;
        struct net_device *real_dev;
        struct rmnet_pcpu_stats __percpu *pcpu_stats;
        struct gro_cells gro_cells;
+       struct rmnet_priv_stats stats;
 };
 
 struct rmnet_port *rmnet_get_port(struct net_device *real_dev);
index 6fcd586e980483ef8480dea17eeba7219495a5a7..7fd86d40a3374df1fba991ece10e6ec48bc197e1 100644 (file)
@@ -148,7 +148,7 @@ static int rmnet_map_egress_handler(struct sk_buff *skb,
 
        if (skb_headroom(skb) < required_headroom) {
                if (pskb_expand_head(skb, required_headroom, 0, GFP_KERNEL))
-                       goto fail;
+                       return -ENOMEM;
        }
 
        if (port->data_format & RMNET_FLAGS_EGRESS_MAP_CKSUMV4)
@@ -156,17 +156,13 @@ static int rmnet_map_egress_handler(struct sk_buff *skb,
 
        map_header = rmnet_map_add_map_header(skb, additional_header_len, 0);
        if (!map_header)
-               goto fail;
+               return -ENOMEM;
 
        map_header->mux_id = mux_id;
 
        skb->protocol = htons(ETH_P_MAP);
 
        return 0;
-
-fail:
-       kfree_skb(skb);
-       return -ENOMEM;
 }
 
 static void
@@ -228,15 +224,18 @@ void rmnet_egress_handler(struct sk_buff *skb)
        mux_id = priv->mux_id;
 
        port = rmnet_get_port(skb->dev);
-       if (!port) {
-               kfree_skb(skb);
-               return;
-       }
+       if (!port)
+               goto drop;
 
        if (rmnet_map_egress_handler(skb, port, mux_id, orig_dev))
-               return;
+               goto drop;
 
        rmnet_vnd_tx_fixup(skb, orig_dev);
 
        dev_queue_xmit(skb);
+       return;
+
+drop:
+       this_cpu_inc(priv->pcpu_stats->stats.tx_drops);
+       kfree_skb(skb);
 }
index 78fdad0c6f76b1358906f99f095cd9abb2a27562..56a93df962e6a66c83653e720b4c4571679fdb73 100644 (file)
@@ -69,17 +69,9 @@ static void rmnet_map_send_ack(struct sk_buff *skb,
        struct rmnet_map_control_command *cmd;
        int xmit_status;
 
-       if (port->data_format & RMNET_FLAGS_INGRESS_MAP_CKSUMV4) {
-               if (skb->len < sizeof(struct rmnet_map_header) +
-                   RMNET_MAP_GET_LENGTH(skb) +
-                   sizeof(struct rmnet_map_dl_csum_trailer)) {
-                       kfree_skb(skb);
-                       return;
-               }
-
-               skb_trim(skb, skb->len -
-                        sizeof(struct rmnet_map_dl_csum_trailer));
-       }
+       if (port->data_format & RMNET_FLAGS_INGRESS_MAP_CKSUMV4)
+               skb_trim(skb,
+                        skb->len - sizeof(struct rmnet_map_dl_csum_trailer));
 
        skb->protocol = htons(ETH_P_MAP);
 
index a6ea09416f8ddac418da84aea6dc3ca2ce2c4de5..57a9c314a665fc8aae9ce94073b9a256fa4973ee 100644 (file)
@@ -48,7 +48,8 @@ static __sum16 *rmnet_map_get_csum_field(unsigned char protocol,
 
 static int
 rmnet_map_ipv4_dl_csum_trailer(struct sk_buff *skb,
-                              struct rmnet_map_dl_csum_trailer *csum_trailer)
+                              struct rmnet_map_dl_csum_trailer *csum_trailer,
+                              struct rmnet_priv *priv)
 {
        __sum16 *csum_field, csum_temp, pseudo_csum, hdr_csum, ip_payload_csum;
        u16 csum_value, csum_value_final;
@@ -58,19 +59,25 @@ rmnet_map_ipv4_dl_csum_trailer(struct sk_buff *skb,
 
        ip4h = (struct iphdr *)(skb->data);
        if ((ntohs(ip4h->frag_off) & IP_MF) ||
-           ((ntohs(ip4h->frag_off) & IP_OFFSET) > 0))
+           ((ntohs(ip4h->frag_off) & IP_OFFSET) > 0)) {
+               priv->stats.csum_fragmented_pkt++;
                return -EOPNOTSUPP;
+       }
 
        txporthdr = skb->data + ip4h->ihl * 4;
 
        csum_field = rmnet_map_get_csum_field(ip4h->protocol, txporthdr);
 
-       if (!csum_field)
+       if (!csum_field) {
+               priv->stats.csum_err_invalid_transport++;
                return -EPROTONOSUPPORT;
+       }
 
        /* RFC 768 - Skip IPv4 UDP packets where sender checksum field is 0 */
-       if (*csum_field == 0 && ip4h->protocol == IPPROTO_UDP)
+       if (*csum_field == 0 && ip4h->protocol == IPPROTO_UDP) {
+               priv->stats.csum_skipped++;
                return 0;
+       }
 
        csum_value = ~ntohs(csum_trailer->csum_value);
        hdr_csum = ~ip_fast_csum(ip4h, (int)ip4h->ihl);
@@ -102,16 +109,20 @@ rmnet_map_ipv4_dl_csum_trailer(struct sk_buff *skb,
                }
        }
 
-       if (csum_value_final == ntohs((__force __be16)*csum_field))
+       if (csum_value_final == ntohs((__force __be16)*csum_field)) {
+               priv->stats.csum_ok++;
                return 0;
-       else
+       } else {
+               priv->stats.csum_validation_failed++;
                return -EINVAL;
+       }
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
 static int
 rmnet_map_ipv6_dl_csum_trailer(struct sk_buff *skb,
-                              struct rmnet_map_dl_csum_trailer *csum_trailer)
+                              struct rmnet_map_dl_csum_trailer *csum_trailer,
+                              struct rmnet_priv *priv)
 {
        __sum16 *csum_field, ip6_payload_csum, pseudo_csum, csum_temp;
        u16 csum_value, csum_value_final;
@@ -125,8 +136,10 @@ rmnet_map_ipv6_dl_csum_trailer(struct sk_buff *skb,
        txporthdr = skb->data + sizeof(struct ipv6hdr);
        csum_field = rmnet_map_get_csum_field(ip6h->nexthdr, txporthdr);
 
-       if (!csum_field)
+       if (!csum_field) {
+               priv->stats.csum_err_invalid_transport++;
                return -EPROTONOSUPPORT;
+       }
 
        csum_value = ~ntohs(csum_trailer->csum_value);
        ip6_hdr_csum = (__force __be16)
@@ -164,10 +177,13 @@ rmnet_map_ipv6_dl_csum_trailer(struct sk_buff *skb,
                }
        }
 
-       if (csum_value_final == ntohs((__force __be16)*csum_field))
+       if (csum_value_final == ntohs((__force __be16)*csum_field)) {
+               priv->stats.csum_ok++;
                return 0;
-       else
+       } else {
+               priv->stats.csum_validation_failed++;
                return -EINVAL;
+       }
 }
 #endif
 
@@ -339,24 +355,34 @@ struct sk_buff *rmnet_map_deaggregate(struct sk_buff *skb,
  */
 int rmnet_map_checksum_downlink_packet(struct sk_buff *skb, u16 len)
 {
+       struct rmnet_priv *priv = netdev_priv(skb->dev);
        struct rmnet_map_dl_csum_trailer *csum_trailer;
 
-       if (unlikely(!(skb->dev->features & NETIF_F_RXCSUM)))
+       if (unlikely(!(skb->dev->features & NETIF_F_RXCSUM))) {
+               priv->stats.csum_sw++;
                return -EOPNOTSUPP;
+       }
 
        csum_trailer = (struct rmnet_map_dl_csum_trailer *)(skb->data + len);
 
-       if (!csum_trailer->valid)
+       if (!csum_trailer->valid) {
+               priv->stats.csum_valid_unset++;
                return -EINVAL;
+       }
 
-       if (skb->protocol == htons(ETH_P_IP))
-               return rmnet_map_ipv4_dl_csum_trailer(skb, csum_trailer);
-       else if (skb->protocol == htons(ETH_P_IPV6))
+       if (skb->protocol == htons(ETH_P_IP)) {
+               return rmnet_map_ipv4_dl_csum_trailer(skb, csum_trailer, priv);
+       } else if (skb->protocol == htons(ETH_P_IPV6)) {
 #if IS_ENABLED(CONFIG_IPV6)
-               return rmnet_map_ipv6_dl_csum_trailer(skb, csum_trailer);
+               return rmnet_map_ipv6_dl_csum_trailer(skb, csum_trailer, priv);
 #else
+               priv->stats.csum_err_invalid_ip_version++;
                return -EPROTONOSUPPORT;
 #endif
+       } else {
+               priv->stats.csum_err_invalid_ip_version++;
+               return -EPROTONOSUPPORT;
+       }
 
        return 0;
 }
@@ -367,6 +393,7 @@ int rmnet_map_checksum_downlink_packet(struct sk_buff *skb, u16 len)
 void rmnet_map_checksum_uplink_packet(struct sk_buff *skb,
                                      struct net_device *orig_dev)
 {
+       struct rmnet_priv *priv = netdev_priv(orig_dev);
        struct rmnet_map_ul_csum_header *ul_header;
        void *iphdr;
 
@@ -389,8 +416,11 @@ void rmnet_map_checksum_uplink_packet(struct sk_buff *skb,
                        rmnet_map_ipv6_ul_csum_header(iphdr, ul_header, skb);
                        return;
 #else
+                       priv->stats.csum_err_invalid_ip_version++;
                        goto sw_csum;
 #endif
+               } else {
+                       priv->stats.csum_err_invalid_ip_version++;
                }
        }
 
@@ -399,4 +429,6 @@ void rmnet_map_checksum_uplink_packet(struct sk_buff *skb,
        ul_header->csum_insert_offset = 0;
        ul_header->csum_enabled = 0;
        ul_header->udp_ip4_ind = 0;
+
+       priv->stats.csum_sw++;
 }
index 2ea16a088de8731cc96a2db88c844fa1ee0810a0..cb02e1a015c1a207db2dda91518562844d343a1f 100644 (file)
@@ -152,6 +152,56 @@ static const struct net_device_ops rmnet_vnd_ops = {
        .ndo_get_stats64 = rmnet_get_stats64,
 };
 
+static const char rmnet_gstrings_stats[][ETH_GSTRING_LEN] = {
+       "Checksum ok",
+       "Checksum valid bit not set",
+       "Checksum validation failed",
+       "Checksum error bad buffer",
+       "Checksum error bad ip version",
+       "Checksum error bad transport",
+       "Checksum skipped on ip fragment",
+       "Checksum skipped",
+       "Checksum computed in software",
+};
+
+static void rmnet_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
+{
+       switch (stringset) {
+       case ETH_SS_STATS:
+               memcpy(buf, &rmnet_gstrings_stats,
+                      sizeof(rmnet_gstrings_stats));
+               break;
+       }
+}
+
+static int rmnet_get_sset_count(struct net_device *dev, int sset)
+{
+       switch (sset) {
+       case ETH_SS_STATS:
+               return ARRAY_SIZE(rmnet_gstrings_stats);
+       default:
+               return -EOPNOTSUPP;
+       }
+}
+
+static void rmnet_get_ethtool_stats(struct net_device *dev,
+                                   struct ethtool_stats *stats, u64 *data)
+{
+       struct rmnet_priv *priv = netdev_priv(dev);
+       struct rmnet_priv_stats *st = &priv->stats;
+
+       if (!data)
+               return;
+
+       memcpy(data, st, ARRAY_SIZE(rmnet_gstrings_stats) * sizeof(u64));
+}
+
+static const struct ethtool_ops rmnet_ethtool_ops = {
+       .get_ethtool_stats = rmnet_get_ethtool_stats,
+       .get_strings = rmnet_get_strings,
+       .get_sset_count = rmnet_get_sset_count,
+};
+
 /* Called by kernel whenever a new rmnet<n> device is created. Sets MTU,
  * flags, ARP type, needed headroom, etc...
  */
@@ -170,6 +220,7 @@ void rmnet_vnd_setup(struct net_device *rmnet_dev)
        rmnet_dev->flags &= ~(IFF_BROADCAST | IFF_MULTICAST);
 
        rmnet_dev->needs_free_netdev = true;
+       rmnet_dev->ethtool_ops = &rmnet_ethtool_ops;
 }
 
 /* Exposed API */
index d24b47b8e0b27e0f44243f5a1011779c0ebd09f9..d118da5a10a2059df4255d46a2652714f5c44a93 100644 (file)
@@ -2224,7 +2224,7 @@ static void rtl8139_poll_controller(struct net_device *dev)
        struct rtl8139_private *tp = netdev_priv(dev);
        const int irq = tp->pci_dev->irq;
 
-       disable_irq(irq);
+       disable_irq_nosync(irq);
        rtl8139_interrupt(irq, dev);
        enable_irq(irq);
 }
index a5d00ee942450b10fa0dba0951955b1aba9de304..75dfac0248f45cb423fd9883e38349a456b1dc0d 100644 (file)
@@ -410,13 +410,8 @@ enum rtl8168_8101_registers {
        CSIAR                   = 0x68,
 #define        CSIAR_FLAG                      0x80000000
 #define        CSIAR_WRITE_CMD                 0x80000000
-#define        CSIAR_BYTE_ENABLE               0x0f
-#define        CSIAR_BYTE_ENABLE_SHIFT         12
-#define        CSIAR_ADDR_MASK                 0x0fff
-#define CSIAR_FUNC_CARD                        0x00000000
-#define CSIAR_FUNC_SDIO                        0x00010000
-#define CSIAR_FUNC_NIC                 0x00020000
-#define CSIAR_FUNC_NIC2                        0x00010000
+#define        CSIAR_BYTE_ENABLE               0x0000f000
+#define        CSIAR_ADDR_MASK                 0x00000fff
        PMCH                    = 0x6f,
        EPHYAR                  = 0x80,
 #define        EPHYAR_FLAG                     0x80000000
@@ -599,6 +594,7 @@ enum rtl_register_content {
        RxChkSum        = (1 << 5),
        PCIDAC          = (1 << 4),
        PCIMulRW        = (1 << 3),
+#define INTT_MASK      GENMASK(1, 0)
        INTT_0          = 0x0000,       // 8168
        INTT_1          = 0x0001,       // 8168
        INTT_2          = 0x0002,       // 8168
@@ -689,6 +685,7 @@ enum rtl_rx_desc_bit {
 };
 
 #define RsvdMask       0x3fffc000
+#define CPCMD_QUIRK_MASK       (Normal_mode | RxVlan | RxChkSum | INTT_MASK)
 
 struct TxDesc {
        __le32 opts1;
@@ -774,21 +771,11 @@ struct rtl8169_private {
                int (*read)(struct rtl8169_private *, int);
        } mdio_ops;
 
-       struct pll_power_ops {
-               void (*down)(struct rtl8169_private *);
-               void (*up)(struct rtl8169_private *);
-       } pll_power_ops;
-
        struct jumbo_ops {
                void (*enable)(struct rtl8169_private *);
                void (*disable)(struct rtl8169_private *);
        } jumbo_ops;
 
-       struct csi_ops {
-               void (*write)(struct rtl8169_private *, int, int);
-               u32 (*read)(struct rtl8169_private *, int);
-       } csi_ops;
-
        int (*set_speed)(struct net_device *, u8 aneg, u16 sp, u8 dpx, u32 adv);
        int (*get_link_ksettings)(struct net_device *,
                                  struct ethtool_link_ksettings *);
@@ -1614,23 +1601,8 @@ static u32 __rtl8169_get_wol(struct rtl8169_private *tp)
        if (options & LinkUp)
                wolopts |= WAKE_PHY;
        switch (tp->mac_version) {
-       case RTL_GIGA_MAC_VER_34:
-       case RTL_GIGA_MAC_VER_35:
-       case RTL_GIGA_MAC_VER_36:
-       case RTL_GIGA_MAC_VER_37:
-       case RTL_GIGA_MAC_VER_38:
-       case RTL_GIGA_MAC_VER_40:
-       case RTL_GIGA_MAC_VER_41:
-       case RTL_GIGA_MAC_VER_42:
-       case RTL_GIGA_MAC_VER_43:
-       case RTL_GIGA_MAC_VER_44:
-       case RTL_GIGA_MAC_VER_45:
-       case RTL_GIGA_MAC_VER_46:
-       case RTL_GIGA_MAC_VER_47:
-       case RTL_GIGA_MAC_VER_48:
-       case RTL_GIGA_MAC_VER_49:
-       case RTL_GIGA_MAC_VER_50:
-       case RTL_GIGA_MAC_VER_51:
+       case RTL_GIGA_MAC_VER_34 ... RTL_GIGA_MAC_VER_38:
+       case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_51:
                if (rtl_eri_read(tp, 0xdc, ERIAR_EXGMAC) & MagicPacket_v2)
                        wolopts |= WAKE_MAGIC;
                break;
@@ -1691,23 +1663,8 @@ static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts)
        RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
 
        switch (tp->mac_version) {
-       case RTL_GIGA_MAC_VER_34:
-       case RTL_GIGA_MAC_VER_35:
-       case RTL_GIGA_MAC_VER_36:
-       case RTL_GIGA_MAC_VER_37:
-       case RTL_GIGA_MAC_VER_38:
-       case RTL_GIGA_MAC_VER_40:
-       case RTL_GIGA_MAC_VER_41:
-       case RTL_GIGA_MAC_VER_42:
-       case RTL_GIGA_MAC_VER_43:
-       case RTL_GIGA_MAC_VER_44:
-       case RTL_GIGA_MAC_VER_45:
-       case RTL_GIGA_MAC_VER_46:
-       case RTL_GIGA_MAC_VER_47:
-       case RTL_GIGA_MAC_VER_48:
-       case RTL_GIGA_MAC_VER_49:
-       case RTL_GIGA_MAC_VER_50:
-       case RTL_GIGA_MAC_VER_51:
+       case RTL_GIGA_MAC_VER_34 ... RTL_GIGA_MAC_VER_38:
+       case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_51:
                tmp = ARRAY_SIZE(cfg) - 1;
                if (wolopts & WAKE_MAGIC)
                        rtl_w0w1_eri(tp,
@@ -1935,12 +1892,14 @@ static netdev_features_t rtl8169_fix_features(struct net_device *dev,
        return features;
 }
 
-static void __rtl8169_set_features(struct net_device *dev,
-                                  netdev_features_t features)
+static int rtl8169_set_features(struct net_device *dev,
+                               netdev_features_t features)
 {
        struct rtl8169_private *tp = netdev_priv(dev);
        u32 rx_config;
 
+       rtl_lock_work(tp);
+
        rx_config = RTL_R32(tp, RxConfig);
        if (features & NETIF_F_RXALL)
                rx_config |= (AcceptErr | AcceptRunt);
@@ -1959,28 +1918,14 @@ static void __rtl8169_set_features(struct net_device *dev,
        else
                tp->cp_cmd &= ~RxVlan;
 
-       tp->cp_cmd |= RTL_R16(tp, CPlusCmd) & ~(RxVlan | RxChkSum);
-
        RTL_W16(tp, CPlusCmd, tp->cp_cmd);
        RTL_R16(tp, CPlusCmd);
-}
-
-static int rtl8169_set_features(struct net_device *dev,
-                               netdev_features_t features)
-{
-       struct rtl8169_private *tp = netdev_priv(dev);
 
-       features &= NETIF_F_RXALL | NETIF_F_RXCSUM | NETIF_F_HW_VLAN_CTAG_RX;
-
-       rtl_lock_work(tp);
-       if (features ^ dev->features)
-               __rtl8169_set_features(dev, features);
        rtl_unlock_work(tp);
 
        return 0;
 }
 
-
 static inline u32 rtl8169_tx_vlan_tag(struct sk_buff *skb)
 {
        return (skb_vlan_tag_present(skb)) ?
@@ -2354,7 +2299,7 @@ static int rtl_get_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
        if (IS_ERR(ci))
                return PTR_ERR(ci);
 
-       scale = &ci->scalev[RTL_R16(tp, CPlusCmd) & 3];
+       scale = &ci->scalev[tp->cp_cmd & INTT_MASK];
 
        /* read IntrMitigate and adjust according to scale */
        for (w = RTL_R16(tp, IntrMitigate); w; w >>= RTL_COALESCE_SHIFT, p++) {
@@ -2453,7 +2398,7 @@ static int rtl_set_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
 
        RTL_W16(tp, IntrMitigate, swab16(w));
 
-       tp->cp_cmd = (tp->cp_cmd & ~3) | cp01;
+       tp->cp_cmd = (tp->cp_cmd & ~INTT_MASK) | cp01;
        RTL_W16(tp, CPlusCmd, tp->cp_cmd);
        RTL_R16(tp, CPlusCmd);
 
@@ -4638,18 +4583,7 @@ static void rtl_init_mdio_ops(struct rtl8169_private *tp)
                ops->write      = r8168dp_2_mdio_write;
                ops->read       = r8168dp_2_mdio_read;
                break;
-       case RTL_GIGA_MAC_VER_40:
-       case RTL_GIGA_MAC_VER_41:
-       case RTL_GIGA_MAC_VER_42:
-       case RTL_GIGA_MAC_VER_43:
-       case RTL_GIGA_MAC_VER_44:
-       case RTL_GIGA_MAC_VER_45:
-       case RTL_GIGA_MAC_VER_46:
-       case RTL_GIGA_MAC_VER_47:
-       case RTL_GIGA_MAC_VER_48:
-       case RTL_GIGA_MAC_VER_49:
-       case RTL_GIGA_MAC_VER_50:
-       case RTL_GIGA_MAC_VER_51:
+       case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_51:
                ops->write      = r8168g_mdio_write;
                ops->read       = r8168g_mdio_read;
                break;
@@ -4694,21 +4628,7 @@ static void rtl_wol_suspend_quirk(struct rtl8169_private *tp)
        case RTL_GIGA_MAC_VER_32:
        case RTL_GIGA_MAC_VER_33:
        case RTL_GIGA_MAC_VER_34:
-       case RTL_GIGA_MAC_VER_37:
-       case RTL_GIGA_MAC_VER_38:
-       case RTL_GIGA_MAC_VER_39:
-       case RTL_GIGA_MAC_VER_40:
-       case RTL_GIGA_MAC_VER_41:
-       case RTL_GIGA_MAC_VER_42:
-       case RTL_GIGA_MAC_VER_43:
-       case RTL_GIGA_MAC_VER_44:
-       case RTL_GIGA_MAC_VER_45:
-       case RTL_GIGA_MAC_VER_46:
-       case RTL_GIGA_MAC_VER_47:
-       case RTL_GIGA_MAC_VER_48:
-       case RTL_GIGA_MAC_VER_49:
-       case RTL_GIGA_MAC_VER_50:
-       case RTL_GIGA_MAC_VER_51:
+       case RTL_GIGA_MAC_VER_37 ... RTL_GIGA_MAC_VER_51:
                RTL_W32(tp, RxConfig, RTL_R32(tp, RxConfig) |
                        AcceptBroadcast | AcceptMulticast | AcceptMyPhys);
                break;
@@ -4728,79 +4648,13 @@ static bool rtl_wol_pll_power_down(struct rtl8169_private *tp)
        return true;
 }
 
-static void r810x_phy_power_down(struct rtl8169_private *tp)
-{
-       rtl_writephy(tp, 0x1f, 0x0000);
-       rtl_writephy(tp, MII_BMCR, BMCR_PDOWN);
-}
-
-static void r810x_phy_power_up(struct rtl8169_private *tp)
-{
-       rtl_writephy(tp, 0x1f, 0x0000);
-       rtl_writephy(tp, MII_BMCR, BMCR_ANENABLE);
-}
-
-static void r810x_pll_power_down(struct rtl8169_private *tp)
-{
-       if (rtl_wol_pll_power_down(tp))
-               return;
-
-       r810x_phy_power_down(tp);
-
-       switch (tp->mac_version) {
-       case RTL_GIGA_MAC_VER_07:
-       case RTL_GIGA_MAC_VER_08:
-       case RTL_GIGA_MAC_VER_09:
-       case RTL_GIGA_MAC_VER_10:
-       case RTL_GIGA_MAC_VER_13:
-       case RTL_GIGA_MAC_VER_16:
-               break;
-       default:
-               RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) & ~0x80);
-               break;
-       }
-}
-
-static void r810x_pll_power_up(struct rtl8169_private *tp)
-{
-       r810x_phy_power_up(tp);
-
-       switch (tp->mac_version) {
-       case RTL_GIGA_MAC_VER_07:
-       case RTL_GIGA_MAC_VER_08:
-       case RTL_GIGA_MAC_VER_09:
-       case RTL_GIGA_MAC_VER_10:
-       case RTL_GIGA_MAC_VER_13:
-       case RTL_GIGA_MAC_VER_16:
-               break;
-       case RTL_GIGA_MAC_VER_47:
-       case RTL_GIGA_MAC_VER_48:
-               RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) | 0xc0);
-               break;
-       default:
-               RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) | 0x80);
-               break;
-       }
-}
-
 static void r8168_phy_power_up(struct rtl8169_private *tp)
 {
        rtl_writephy(tp, 0x1f, 0x0000);
        switch (tp->mac_version) {
        case RTL_GIGA_MAC_VER_11:
        case RTL_GIGA_MAC_VER_12:
-       case RTL_GIGA_MAC_VER_17:
-       case RTL_GIGA_MAC_VER_18:
-       case RTL_GIGA_MAC_VER_19:
-       case RTL_GIGA_MAC_VER_20:
-       case RTL_GIGA_MAC_VER_21:
-       case RTL_GIGA_MAC_VER_22:
-       case RTL_GIGA_MAC_VER_23:
-       case RTL_GIGA_MAC_VER_24:
-       case RTL_GIGA_MAC_VER_25:
-       case RTL_GIGA_MAC_VER_26:
-       case RTL_GIGA_MAC_VER_27:
-       case RTL_GIGA_MAC_VER_28:
+       case RTL_GIGA_MAC_VER_17 ... RTL_GIGA_MAC_VER_28:
        case RTL_GIGA_MAC_VER_31:
                rtl_writephy(tp, 0x0e, 0x0000);
                break;
@@ -4808,6 +4662,9 @@ static void r8168_phy_power_up(struct rtl8169_private *tp)
                break;
        }
        rtl_writephy(tp, MII_BMCR, BMCR_ANENABLE);
+
+       /* give MAC/PHY some time to resume */
+       msleep(20);
 }
 
 static void r8168_phy_power_down(struct rtl8169_private *tp)
@@ -4823,18 +4680,7 @@ static void r8168_phy_power_down(struct rtl8169_private *tp)
 
        case RTL_GIGA_MAC_VER_11:
        case RTL_GIGA_MAC_VER_12:
-       case RTL_GIGA_MAC_VER_17:
-       case RTL_GIGA_MAC_VER_18:
-       case RTL_GIGA_MAC_VER_19:
-       case RTL_GIGA_MAC_VER_20:
-       case RTL_GIGA_MAC_VER_21:
-       case RTL_GIGA_MAC_VER_22:
-       case RTL_GIGA_MAC_VER_23:
-       case RTL_GIGA_MAC_VER_24:
-       case RTL_GIGA_MAC_VER_25:
-       case RTL_GIGA_MAC_VER_26:
-       case RTL_GIGA_MAC_VER_27:
-       case RTL_GIGA_MAC_VER_28:
+       case RTL_GIGA_MAC_VER_17 ... RTL_GIGA_MAC_VER_28:
        case RTL_GIGA_MAC_VER_31:
                rtl_writephy(tp, 0x0e, 0x0200);
        default:
@@ -4848,12 +4694,6 @@ static void r8168_pll_power_down(struct rtl8169_private *tp)
        if (r8168_check_dash(tp))
                return;
 
-       if ((tp->mac_version == RTL_GIGA_MAC_VER_23 ||
-            tp->mac_version == RTL_GIGA_MAC_VER_24) &&
-           (RTL_R16(tp, CPlusCmd) & ASF)) {
-               return;
-       }
-
        if (tp->mac_version == RTL_GIGA_MAC_VER_32 ||
            tp->mac_version == RTL_GIGA_MAC_VER_33)
                rtl_ephy_write(tp, 0x19, 0xff64);
@@ -4864,16 +4704,15 @@ static void r8168_pll_power_down(struct rtl8169_private *tp)
        r8168_phy_power_down(tp);
 
        switch (tp->mac_version) {
-       case RTL_GIGA_MAC_VER_25:
-       case RTL_GIGA_MAC_VER_26:
-       case RTL_GIGA_MAC_VER_27:
-       case RTL_GIGA_MAC_VER_28:
-       case RTL_GIGA_MAC_VER_31:
-       case RTL_GIGA_MAC_VER_32:
-       case RTL_GIGA_MAC_VER_33:
+       case RTL_GIGA_MAC_VER_25 ... RTL_GIGA_MAC_VER_33:
+       case RTL_GIGA_MAC_VER_37:
+       case RTL_GIGA_MAC_VER_39:
+       case RTL_GIGA_MAC_VER_43:
        case RTL_GIGA_MAC_VER_44:
        case RTL_GIGA_MAC_VER_45:
        case RTL_GIGA_MAC_VER_46:
+       case RTL_GIGA_MAC_VER_47:
+       case RTL_GIGA_MAC_VER_48:
        case RTL_GIGA_MAC_VER_50:
        case RTL_GIGA_MAC_VER_51:
                RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) & ~0x80);
@@ -4891,18 +4730,17 @@ static void r8168_pll_power_down(struct rtl8169_private *tp)
 static void r8168_pll_power_up(struct rtl8169_private *tp)
 {
        switch (tp->mac_version) {
-       case RTL_GIGA_MAC_VER_25:
-       case RTL_GIGA_MAC_VER_26:
-       case RTL_GIGA_MAC_VER_27:
-       case RTL_GIGA_MAC_VER_28:
-       case RTL_GIGA_MAC_VER_31:
-       case RTL_GIGA_MAC_VER_32:
-       case RTL_GIGA_MAC_VER_33:
+       case RTL_GIGA_MAC_VER_25 ... RTL_GIGA_MAC_VER_33:
+       case RTL_GIGA_MAC_VER_37:
+       case RTL_GIGA_MAC_VER_39:
+       case RTL_GIGA_MAC_VER_43:
                RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) | 0x80);
                break;
        case RTL_GIGA_MAC_VER_44:
        case RTL_GIGA_MAC_VER_45:
        case RTL_GIGA_MAC_VER_46:
+       case RTL_GIGA_MAC_VER_47:
+       case RTL_GIGA_MAC_VER_48:
        case RTL_GIGA_MAC_VER_50:
        case RTL_GIGA_MAC_VER_51:
                RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) | 0xc0);
@@ -4919,127 +4757,41 @@ static void r8168_pll_power_up(struct rtl8169_private *tp)
        r8168_phy_power_up(tp);
 }
 
-static void rtl_generic_op(struct rtl8169_private *tp,
-                          void (*op)(struct rtl8169_private *))
-{
-       if (op)
-               op(tp);
-}
-
 static void rtl_pll_power_down(struct rtl8169_private *tp)
 {
-       rtl_generic_op(tp, tp->pll_power_ops.down);
+       switch (tp->mac_version) {
+       case RTL_GIGA_MAC_VER_01 ... RTL_GIGA_MAC_VER_06:
+       case RTL_GIGA_MAC_VER_13 ... RTL_GIGA_MAC_VER_15:
+               break;
+       default:
+               r8168_pll_power_down(tp);
+       }
 }
 
 static void rtl_pll_power_up(struct rtl8169_private *tp)
 {
-       rtl_generic_op(tp, tp->pll_power_ops.up);
-}
-
-static void rtl_init_pll_power_ops(struct rtl8169_private *tp)
-{
-       struct pll_power_ops *ops = &tp->pll_power_ops;
-
        switch (tp->mac_version) {
-       case RTL_GIGA_MAC_VER_07:
-       case RTL_GIGA_MAC_VER_08:
-       case RTL_GIGA_MAC_VER_09:
-       case RTL_GIGA_MAC_VER_10:
-       case RTL_GIGA_MAC_VER_16:
-       case RTL_GIGA_MAC_VER_29:
-       case RTL_GIGA_MAC_VER_30:
-       case RTL_GIGA_MAC_VER_37:
-       case RTL_GIGA_MAC_VER_39:
-       case RTL_GIGA_MAC_VER_43:
-       case RTL_GIGA_MAC_VER_47:
-       case RTL_GIGA_MAC_VER_48:
-               ops->down       = r810x_pll_power_down;
-               ops->up         = r810x_pll_power_up;
-               break;
-
-       case RTL_GIGA_MAC_VER_11:
-       case RTL_GIGA_MAC_VER_12:
-       case RTL_GIGA_MAC_VER_17:
-       case RTL_GIGA_MAC_VER_18:
-       case RTL_GIGA_MAC_VER_19:
-       case RTL_GIGA_MAC_VER_20:
-       case RTL_GIGA_MAC_VER_21:
-       case RTL_GIGA_MAC_VER_22:
-       case RTL_GIGA_MAC_VER_23:
-       case RTL_GIGA_MAC_VER_24:
-       case RTL_GIGA_MAC_VER_25:
-       case RTL_GIGA_MAC_VER_26:
-       case RTL_GIGA_MAC_VER_27:
-       case RTL_GIGA_MAC_VER_28:
-       case RTL_GIGA_MAC_VER_31:
-       case RTL_GIGA_MAC_VER_32:
-       case RTL_GIGA_MAC_VER_33:
-       case RTL_GIGA_MAC_VER_34:
-       case RTL_GIGA_MAC_VER_35:
-       case RTL_GIGA_MAC_VER_36:
-       case RTL_GIGA_MAC_VER_38:
-       case RTL_GIGA_MAC_VER_40:
-       case RTL_GIGA_MAC_VER_41:
-       case RTL_GIGA_MAC_VER_42:
-       case RTL_GIGA_MAC_VER_44:
-       case RTL_GIGA_MAC_VER_45:
-       case RTL_GIGA_MAC_VER_46:
-       case RTL_GIGA_MAC_VER_49:
-       case RTL_GIGA_MAC_VER_50:
-       case RTL_GIGA_MAC_VER_51:
-               ops->down       = r8168_pll_power_down;
-               ops->up         = r8168_pll_power_up;
+       case RTL_GIGA_MAC_VER_01 ... RTL_GIGA_MAC_VER_06:
+       case RTL_GIGA_MAC_VER_13 ... RTL_GIGA_MAC_VER_15:
                break;
-
        default:
-               ops->down       = NULL;
-               ops->up         = NULL;
-               break;
+               r8168_pll_power_up(tp);
        }
 }
 
 static void rtl_init_rxcfg(struct rtl8169_private *tp)
 {
        switch (tp->mac_version) {
-       case RTL_GIGA_MAC_VER_01:
-       case RTL_GIGA_MAC_VER_02:
-       case RTL_GIGA_MAC_VER_03:
-       case RTL_GIGA_MAC_VER_04:
-       case RTL_GIGA_MAC_VER_05:
-       case RTL_GIGA_MAC_VER_06:
-       case RTL_GIGA_MAC_VER_10:
-       case RTL_GIGA_MAC_VER_11:
-       case RTL_GIGA_MAC_VER_12:
-       case RTL_GIGA_MAC_VER_13:
-       case RTL_GIGA_MAC_VER_14:
-       case RTL_GIGA_MAC_VER_15:
-       case RTL_GIGA_MAC_VER_16:
-       case RTL_GIGA_MAC_VER_17:
+       case RTL_GIGA_MAC_VER_01 ... RTL_GIGA_MAC_VER_06:
+       case RTL_GIGA_MAC_VER_10 ... RTL_GIGA_MAC_VER_17:
                RTL_W32(tp, RxConfig, RX_FIFO_THRESH | RX_DMA_BURST);
                break;
-       case RTL_GIGA_MAC_VER_18:
-       case RTL_GIGA_MAC_VER_19:
-       case RTL_GIGA_MAC_VER_20:
-       case RTL_GIGA_MAC_VER_21:
-       case RTL_GIGA_MAC_VER_22:
-       case RTL_GIGA_MAC_VER_23:
-       case RTL_GIGA_MAC_VER_24:
+       case RTL_GIGA_MAC_VER_18 ... RTL_GIGA_MAC_VER_24:
        case RTL_GIGA_MAC_VER_34:
        case RTL_GIGA_MAC_VER_35:
                RTL_W32(tp, RxConfig, RX128_INT_EN | RX_MULTI_EN | RX_DMA_BURST);
                break;
-       case RTL_GIGA_MAC_VER_40:
-       case RTL_GIGA_MAC_VER_41:
-       case RTL_GIGA_MAC_VER_42:
-       case RTL_GIGA_MAC_VER_43:
-       case RTL_GIGA_MAC_VER_44:
-       case RTL_GIGA_MAC_VER_45:
-       case RTL_GIGA_MAC_VER_46:
-       case RTL_GIGA_MAC_VER_47:
-       case RTL_GIGA_MAC_VER_48:
-       case RTL_GIGA_MAC_VER_49:
-       case RTL_GIGA_MAC_VER_50:
-       case RTL_GIGA_MAC_VER_51:
+       case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_51:
                RTL_W32(tp, RxConfig, RX128_INT_EN | RX_MULTI_EN | RX_DMA_BURST | RX_EARLY_OFF);
                break;
        default:
@@ -5055,16 +4807,20 @@ static void rtl8169_init_ring_indexes(struct rtl8169_private *tp)
 
 static void rtl_hw_jumbo_enable(struct rtl8169_private *tp)
 {
-       RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
-       rtl_generic_op(tp, tp->jumbo_ops.enable);
-       RTL_W8(tp, Cfg9346, Cfg9346_Lock);
+       if (tp->jumbo_ops.enable) {
+               RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
+               tp->jumbo_ops.enable(tp);
+               RTL_W8(tp, Cfg9346, Cfg9346_Lock);
+       }
 }
 
 static void rtl_hw_jumbo_disable(struct rtl8169_private *tp)
 {
-       RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
-       rtl_generic_op(tp, tp->jumbo_ops.disable);
-       RTL_W8(tp, Cfg9346, Cfg9346_Lock);
+       if (tp->jumbo_ops.disable) {
+               RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
+               tp->jumbo_ops.disable(tp);
+               RTL_W8(tp, Cfg9346, Cfg9346_Lock);
+       }
 }
 
 static void r8168c_hw_jumbo_enable(struct rtl8169_private *tp)
@@ -5176,18 +4932,7 @@ static void rtl_init_jumbo_ops(struct rtl8169_private *tp)
         * No action needed for jumbo frames with 8169.
         * No jumbo for 810x at all.
         */
-       case RTL_GIGA_MAC_VER_40:
-       case RTL_GIGA_MAC_VER_41:
-       case RTL_GIGA_MAC_VER_42:
-       case RTL_GIGA_MAC_VER_43:
-       case RTL_GIGA_MAC_VER_44:
-       case RTL_GIGA_MAC_VER_45:
-       case RTL_GIGA_MAC_VER_46:
-       case RTL_GIGA_MAC_VER_47:
-       case RTL_GIGA_MAC_VER_48:
-       case RTL_GIGA_MAC_VER_49:
-       case RTL_GIGA_MAC_VER_50:
-       case RTL_GIGA_MAC_VER_51:
+       case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_51:
        default:
                ops->disable    = NULL;
                ops->enable     = NULL;
@@ -5273,32 +5018,21 @@ static void rtl8169_hw_reset(struct rtl8169_private *tp)
 
        rtl_rx_close(tp);
 
-       if (tp->mac_version == RTL_GIGA_MAC_VER_27 ||
-           tp->mac_version == RTL_GIGA_MAC_VER_28 ||
-           tp->mac_version == RTL_GIGA_MAC_VER_31) {
+       switch (tp->mac_version) {
+       case RTL_GIGA_MAC_VER_27:
+       case RTL_GIGA_MAC_VER_28:
+       case RTL_GIGA_MAC_VER_31:
                rtl_udelay_loop_wait_low(tp, &rtl_npq_cond, 20, 42*42);
-       } else if (tp->mac_version == RTL_GIGA_MAC_VER_34 ||
-                  tp->mac_version == RTL_GIGA_MAC_VER_35 ||
-                  tp->mac_version == RTL_GIGA_MAC_VER_36 ||
-                  tp->mac_version == RTL_GIGA_MAC_VER_37 ||
-                  tp->mac_version == RTL_GIGA_MAC_VER_38 ||
-                  tp->mac_version == RTL_GIGA_MAC_VER_40 ||
-                  tp->mac_version == RTL_GIGA_MAC_VER_41 ||
-                  tp->mac_version == RTL_GIGA_MAC_VER_42 ||
-                  tp->mac_version == RTL_GIGA_MAC_VER_43 ||
-                  tp->mac_version == RTL_GIGA_MAC_VER_44 ||
-                  tp->mac_version == RTL_GIGA_MAC_VER_45 ||
-                  tp->mac_version == RTL_GIGA_MAC_VER_46 ||
-                  tp->mac_version == RTL_GIGA_MAC_VER_47 ||
-                  tp->mac_version == RTL_GIGA_MAC_VER_48 ||
-                  tp->mac_version == RTL_GIGA_MAC_VER_49 ||
-                  tp->mac_version == RTL_GIGA_MAC_VER_50 ||
-                  tp->mac_version == RTL_GIGA_MAC_VER_51) {
+               break;
+       case RTL_GIGA_MAC_VER_34 ... RTL_GIGA_MAC_VER_38:
+       case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_51:
                RTL_W8(tp, ChipCmd, RTL_R8(tp, ChipCmd) | StopReq);
                rtl_udelay_loop_wait_high(tp, &rtl_txcfg_empty_cond, 100, 666);
-       } else {
+               break;
+       default:
                RTL_W8(tp, ChipCmd, RTL_R8(tp, ChipCmd) | StopReq);
                udelay(100);
+               break;
        }
 
        rtl_hw_reset(tp);
@@ -5311,10 +5045,10 @@ static void rtl_set_rx_tx_config_registers(struct rtl8169_private *tp)
                (InterFrameGap << TxInterFrameGapShift));
 }
 
-static void rtl_hw_start(struct  rtl8169_private *tp)
+static void rtl_set_rx_max_size(struct rtl8169_private *tp)
 {
-       tp->hw_start(tp);
-       rtl_irq_enable_all(tp);
+       /* Low hurts. Let's disable the filtering. */
+       RTL_W16(tp, RxMaxSize, R8169_RX_BUF_SIZE + 1);
 }
 
 static void rtl_set_rx_tx_desc_registers(struct rtl8169_private *tp)
@@ -5330,21 +5064,6 @@ static void rtl_set_rx_tx_desc_registers(struct rtl8169_private *tp)
        RTL_W32(tp, RxDescAddrLow, ((u64) tp->RxPhyAddr) & DMA_BIT_MASK(32));
 }
 
-static u16 rtl_rw_cpluscmd(struct rtl8169_private *tp)
-{
-       u16 cmd;
-
-       cmd = RTL_R16(tp, CPlusCmd);
-       RTL_W16(tp, CPlusCmd, cmd);
-       return cmd;
-}
-
-static void rtl_set_rx_max_size(struct rtl8169_private *tp)
-{
-       /* Low hurts. Let's disable the filtering. */
-       RTL_W16(tp, RxMaxSize, R8169_RX_BUF_SIZE + 1);
-}
-
 static void rtl8169_set_magic_reg(struct rtl8169_private *tp, unsigned mac_version)
 {
        static const struct rtl_cfg2_info {
@@ -5422,33 +5141,34 @@ static void rtl_set_rx_mode(struct net_device *dev)
        RTL_W32(tp, RxConfig, tmp);
 }
 
-static void rtl_hw_start_8169(struct rtl8169_private *tp)
+static void rtl_hw_start(struct  rtl8169_private *tp)
 {
-       if (tp->mac_version == RTL_GIGA_MAC_VER_05) {
-               RTL_W16(tp, CPlusCmd, RTL_R16(tp, CPlusCmd) | PCIMulRW);
-               pci_write_config_byte(tp->pci_dev, PCI_CACHE_LINE_SIZE, 0x08);
-       }
-
        RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
-       if (tp->mac_version == RTL_GIGA_MAC_VER_01 ||
-           tp->mac_version == RTL_GIGA_MAC_VER_02 ||
-           tp->mac_version == RTL_GIGA_MAC_VER_03 ||
-           tp->mac_version == RTL_GIGA_MAC_VER_04)
-               RTL_W8(tp, ChipCmd, CmdTxEnb | CmdRxEnb);
 
-       rtl_init_rxcfg(tp);
-
-       RTL_W8(tp, EarlyTxThres, NoEarlyTx);
+       tp->hw_start(tp);
 
        rtl_set_rx_max_size(tp);
+       rtl_set_rx_tx_desc_registers(tp);
+       rtl_set_rx_tx_config_registers(tp);
+       RTL_W8(tp, Cfg9346, Cfg9346_Lock);
 
-       if (tp->mac_version == RTL_GIGA_MAC_VER_01 ||
-           tp->mac_version == RTL_GIGA_MAC_VER_02 ||
-           tp->mac_version == RTL_GIGA_MAC_VER_03 ||
-           tp->mac_version == RTL_GIGA_MAC_VER_04)
-               rtl_set_rx_tx_config_registers(tp);
+       /* Initially a 10 us delay. Turned it into a PCI commit. - FR */
+       RTL_R8(tp, IntrMask);
+       RTL_W8(tp, ChipCmd, CmdTxEnb | CmdRxEnb);
+       rtl_set_rx_mode(tp->dev);
+       /* no early-rx interrupts */
+       RTL_W16(tp, MultiIntr, RTL_R16(tp, MultiIntr) & 0xf000);
+       rtl_irq_enable_all(tp);
+}
 
-       tp->cp_cmd |= rtl_rw_cpluscmd(tp) | PCIMulRW;
+static void rtl_hw_start_8169(struct rtl8169_private *tp)
+{
+       if (tp->mac_version == RTL_GIGA_MAC_VER_05)
+               pci_write_config_byte(tp->pci_dev, PCI_CACHE_LINE_SIZE, 0x08);
+
+       RTL_W8(tp, EarlyTxThres, NoEarlyTx);
+
+       tp->cp_cmd |= PCIMulRW;
 
        if (tp->mac_version == RTL_GIGA_MAC_VER_02 ||
            tp->mac_version == RTL_GIGA_MAC_VER_03) {
@@ -5467,56 +5187,7 @@ static void rtl_hw_start_8169(struct rtl8169_private *tp)
         */
        RTL_W16(tp, IntrMitigate, 0x0000);
 
-       rtl_set_rx_tx_desc_registers(tp);
-
-       if (tp->mac_version != RTL_GIGA_MAC_VER_01 &&
-           tp->mac_version != RTL_GIGA_MAC_VER_02 &&
-           tp->mac_version != RTL_GIGA_MAC_VER_03 &&
-           tp->mac_version != RTL_GIGA_MAC_VER_04) {
-               RTL_W8(tp, ChipCmd, CmdTxEnb | CmdRxEnb);
-               rtl_set_rx_tx_config_registers(tp);
-       }
-
-       RTL_W8(tp, Cfg9346, Cfg9346_Lock);
-
-       /* Initially a 10 us delay. Turned it into a PCI commit. - FR */
-       RTL_R8(tp, IntrMask);
-
        RTL_W32(tp, RxMissed, 0);
-
-       rtl_set_rx_mode(tp->dev);
-
-       /* no early-rx interrupts */
-       RTL_W16(tp, MultiIntr, RTL_R16(tp, MultiIntr) & 0xf000);
-}
-
-static void rtl_csi_write(struct rtl8169_private *tp, int addr, int value)
-{
-       if (tp->csi_ops.write)
-               tp->csi_ops.write(tp, addr, value);
-}
-
-static u32 rtl_csi_read(struct rtl8169_private *tp, int addr)
-{
-       return tp->csi_ops.read ? tp->csi_ops.read(tp, addr) : ~0;
-}
-
-static void rtl_csi_access_enable(struct rtl8169_private *tp, u32 bits)
-{
-       u32 csi;
-
-       csi = rtl_csi_read(tp, 0x070c) & 0x00ffffff;
-       rtl_csi_write(tp, 0x070c, csi | bits);
-}
-
-static void rtl_csi_access_enable_1(struct rtl8169_private *tp)
-{
-       rtl_csi_access_enable(tp, 0x17000000);
-}
-
-static void rtl_csi_access_enable_2(struct rtl8169_private *tp)
-{
-       rtl_csi_access_enable(tp, 0x27000000);
 }
 
 DECLARE_RTL_COND(rtl_csiar_cond)
@@ -5524,101 +5195,55 @@ DECLARE_RTL_COND(rtl_csiar_cond)
        return RTL_R32(tp, CSIAR) & CSIAR_FLAG;
 }
 
-static void r8169_csi_write(struct rtl8169_private *tp, int addr, int value)
+static void rtl_csi_write(struct rtl8169_private *tp, int addr, int value)
 {
+       u32 func = PCI_FUNC(tp->pci_dev->devfn);
+
        RTL_W32(tp, CSIDR, value);
        RTL_W32(tp, CSIAR, CSIAR_WRITE_CMD | (addr & CSIAR_ADDR_MASK) |
-               CSIAR_BYTE_ENABLE << CSIAR_BYTE_ENABLE_SHIFT);
+               CSIAR_BYTE_ENABLE | func << 16);
 
        rtl_udelay_loop_wait_low(tp, &rtl_csiar_cond, 10, 100);
 }
 
-static u32 r8169_csi_read(struct rtl8169_private *tp, int addr)
+static u32 rtl_csi_read(struct rtl8169_private *tp, int addr)
 {
-       RTL_W32(tp, CSIAR, (addr & CSIAR_ADDR_MASK) |
-               CSIAR_BYTE_ENABLE << CSIAR_BYTE_ENABLE_SHIFT);
+       u32 func = PCI_FUNC(tp->pci_dev->devfn);
+
+       RTL_W32(tp, CSIAR, (addr & CSIAR_ADDR_MASK) | func << 16 |
+               CSIAR_BYTE_ENABLE);
 
        return rtl_udelay_loop_wait_high(tp, &rtl_csiar_cond, 10, 100) ?
                RTL_R32(tp, CSIDR) : ~0;
 }
 
-static void r8402_csi_write(struct rtl8169_private *tp, int addr, int value)
+static void rtl_csi_access_enable(struct rtl8169_private *tp, u8 val)
 {
-       RTL_W32(tp, CSIDR, value);
-       RTL_W32(tp, CSIAR, CSIAR_WRITE_CMD | (addr & CSIAR_ADDR_MASK) |
-               CSIAR_BYTE_ENABLE << CSIAR_BYTE_ENABLE_SHIFT |
-               CSIAR_FUNC_NIC);
-
-       rtl_udelay_loop_wait_low(tp, &rtl_csiar_cond, 10, 100);
-}
+       struct pci_dev *pdev = tp->pci_dev;
+       u32 csi;
 
-static u32 r8402_csi_read(struct rtl8169_private *tp, int addr)
-{
-       RTL_W32(tp, CSIAR, (addr & CSIAR_ADDR_MASK) | CSIAR_FUNC_NIC |
-               CSIAR_BYTE_ENABLE << CSIAR_BYTE_ENABLE_SHIFT);
+       /* According to Realtek the value at config space address 0x070f
+        * controls the L0s/L1 entrance latency. We try standard ECAM access
+        * first and if it fails fall back to CSI.
+        */
+       if (pdev->cfg_size > 0x070f &&
+           pci_write_config_byte(pdev, 0x070f, val) == PCIBIOS_SUCCESSFUL)
+               return;
 
-       return rtl_udelay_loop_wait_high(tp, &rtl_csiar_cond, 10, 100) ?
-               RTL_R32(tp, CSIDR) : ~0;
+       netdev_notice_once(tp->dev,
+               "No native access to PCI extended config space, falling back to CSI\n");
+       csi = rtl_csi_read(tp, 0x070c) & 0x00ffffff;
+       rtl_csi_write(tp, 0x070c, csi | val << 24);
 }
 
-static void r8411_csi_write(struct rtl8169_private *tp, int addr, int value)
+static void rtl_csi_access_enable_1(struct rtl8169_private *tp)
 {
-       RTL_W32(tp, CSIDR, value);
-       RTL_W32(tp, CSIAR, CSIAR_WRITE_CMD | (addr & CSIAR_ADDR_MASK) |
-               CSIAR_BYTE_ENABLE << CSIAR_BYTE_ENABLE_SHIFT |
-               CSIAR_FUNC_NIC2);
-
-       rtl_udelay_loop_wait_low(tp, &rtl_csiar_cond, 10, 100);
+       rtl_csi_access_enable(tp, 0x17);
 }
 
-static u32 r8411_csi_read(struct rtl8169_private *tp, int addr)
+static void rtl_csi_access_enable_2(struct rtl8169_private *tp)
 {
-       RTL_W32(tp, CSIAR, (addr & CSIAR_ADDR_MASK) | CSIAR_FUNC_NIC2 |
-               CSIAR_BYTE_ENABLE << CSIAR_BYTE_ENABLE_SHIFT);
-
-       return rtl_udelay_loop_wait_high(tp, &rtl_csiar_cond, 10, 100) ?
-               RTL_R32(tp, CSIDR) : ~0;
-}
-
-static void rtl_init_csi_ops(struct rtl8169_private *tp)
-{
-       struct csi_ops *ops = &tp->csi_ops;
-
-       switch (tp->mac_version) {
-       case RTL_GIGA_MAC_VER_01:
-       case RTL_GIGA_MAC_VER_02:
-       case RTL_GIGA_MAC_VER_03:
-       case RTL_GIGA_MAC_VER_04:
-       case RTL_GIGA_MAC_VER_05:
-       case RTL_GIGA_MAC_VER_06:
-       case RTL_GIGA_MAC_VER_10:
-       case RTL_GIGA_MAC_VER_11:
-       case RTL_GIGA_MAC_VER_12:
-       case RTL_GIGA_MAC_VER_13:
-       case RTL_GIGA_MAC_VER_14:
-       case RTL_GIGA_MAC_VER_15:
-       case RTL_GIGA_MAC_VER_16:
-       case RTL_GIGA_MAC_VER_17:
-               ops->write      = NULL;
-               ops->read       = NULL;
-               break;
-
-       case RTL_GIGA_MAC_VER_37:
-       case RTL_GIGA_MAC_VER_38:
-               ops->write      = r8402_csi_write;
-               ops->read       = r8402_csi_read;
-               break;
-
-       case RTL_GIGA_MAC_VER_44:
-               ops->write      = r8411_csi_write;
-               ops->read       = r8411_csi_read;
-               break;
-
-       default:
-               ops->write      = r8169_csi_write;
-               ops->read       = r8169_csi_read;
-               break;
-       }
+       rtl_csi_access_enable(tp, 0x27);
 }
 
 struct ephy_info {
@@ -5665,22 +5290,12 @@ static void rtl_pcie_state_l2l3_enable(struct rtl8169_private *tp, bool enable)
        RTL_W8(tp, Config3, data);
 }
 
-#define R8168_CPCMD_QUIRK_MASK (\
-       EnableBist | \
-       Mac_dbgo_oe | \
-       Force_half_dup | \
-       Force_rxflow_en | \
-       Force_txflow_en | \
-       Cxpl_dbg_sel | \
-       ASF | \
-       PktCntrDisable | \
-       Mac_dbgo_sel)
-
 static void rtl_hw_start_8168bb(struct rtl8169_private *tp)
 {
        RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en);
 
-       RTL_W16(tp, CPlusCmd, RTL_R16(tp, CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
+       tp->cp_cmd &= CPCMD_QUIRK_MASK;
+       RTL_W16(tp, CPlusCmd, tp->cp_cmd);
 
        if (tp->dev->mtu <= ETH_DATA_LEN) {
                rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B |
@@ -5708,7 +5323,8 @@ static void __rtl_hw_start_8168cp(struct rtl8169_private *tp)
 
        rtl_disable_clock_request(tp);
 
-       RTL_W16(tp, CPlusCmd, RTL_R16(tp, CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
+       tp->cp_cmd &= CPCMD_QUIRK_MASK;
+       RTL_W16(tp, CPlusCmd, tp->cp_cmd);
 }
 
 static void rtl_hw_start_8168cp_1(struct rtl8169_private *tp)
@@ -5737,7 +5353,8 @@ static void rtl_hw_start_8168cp_2(struct rtl8169_private *tp)
        if (tp->dev->mtu <= ETH_DATA_LEN)
                rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
 
-       RTL_W16(tp, CPlusCmd, RTL_R16(tp, CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
+       tp->cp_cmd &= CPCMD_QUIRK_MASK;
+       RTL_W16(tp, CPlusCmd, tp->cp_cmd);
 }
 
 static void rtl_hw_start_8168cp_3(struct rtl8169_private *tp)
@@ -5754,7 +5371,8 @@ static void rtl_hw_start_8168cp_3(struct rtl8169_private *tp)
        if (tp->dev->mtu <= ETH_DATA_LEN)
                rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
 
-       RTL_W16(tp, CPlusCmd, RTL_R16(tp, CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
+       tp->cp_cmd &= CPCMD_QUIRK_MASK;
+       RTL_W16(tp, CPlusCmd, tp->cp_cmd);
 }
 
 static void rtl_hw_start_8168c_1(struct rtl8169_private *tp)
@@ -5811,7 +5429,8 @@ static void rtl_hw_start_8168d(struct rtl8169_private *tp)
        if (tp->dev->mtu <= ETH_DATA_LEN)
                rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
 
-       RTL_W16(tp, CPlusCmd, RTL_R16(tp, CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
+       tp->cp_cmd &= CPCMD_QUIRK_MASK;
+       RTL_W16(tp, CPlusCmd, tp->cp_cmd);
 }
 
 static void rtl_hw_start_8168dp(struct rtl8169_private *tp)
@@ -6274,14 +5893,10 @@ static void rtl_hw_start_8168ep_3(struct rtl8169_private *tp)
 
 static void rtl_hw_start_8168(struct rtl8169_private *tp)
 {
-       RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
-
        RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
 
-       rtl_set_rx_max_size(tp);
-
-       tp->cp_cmd |= RTL_R16(tp, CPlusCmd) | PktCntrDisable | INTT_1;
-
+       tp->cp_cmd &= ~INTT_MASK;
+       tp->cp_cmd |= PktCntrDisable | INTT_1;
        RTL_W16(tp, CPlusCmd, tp->cp_cmd);
 
        RTL_W16(tp, IntrMitigate, 0x5151);
@@ -6292,12 +5907,6 @@ static void rtl_hw_start_8168(struct rtl8169_private *tp)
                tp->event_slow &= ~RxOverflow;
        }
 
-       rtl_set_rx_tx_desc_registers(tp);
-
-       rtl_set_rx_tx_config_registers(tp);
-
-       RTL_R8(tp, IntrMask);
-
        switch (tp->mac_version) {
        case RTL_GIGA_MAC_VER_11:
                rtl_hw_start_8168bb(tp);
@@ -6401,27 +6010,8 @@ static void rtl_hw_start_8168(struct rtl8169_private *tp)
                       tp->dev->name, tp->mac_version);
                break;
        }
-
-       RTL_W8(tp, Cfg9346, Cfg9346_Lock);
-
-       RTL_W8(tp, ChipCmd, CmdTxEnb | CmdRxEnb);
-
-       rtl_set_rx_mode(tp->dev);
-
-       RTL_W16(tp, MultiIntr, RTL_R16(tp, MultiIntr) & 0xf000);
 }
 
-#define R810X_CPCMD_QUIRK_MASK (\
-       EnableBist | \
-       Mac_dbgo_oe | \
-       Force_half_dup | \
-       Force_rxflow_en | \
-       Force_txflow_en | \
-       Cxpl_dbg_sel | \
-       ASF | \
-       PktCntrDisable | \
-       Mac_dbgo_sel)
-
 static void rtl_hw_start_8102e_1(struct rtl8169_private *tp)
 {
        static const struct ephy_info e_info_8102e_1[] = {
@@ -6555,19 +6145,11 @@ static void rtl_hw_start_8101(struct rtl8169_private *tp)
                pcie_capability_set_word(tp->pci_dev, PCI_EXP_DEVCTL,
                                         PCI_EXP_DEVCTL_NOSNOOP_EN);
 
-       RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
-
        RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
 
-       rtl_set_rx_max_size(tp);
-
-       tp->cp_cmd &= ~R810X_CPCMD_QUIRK_MASK;
+       tp->cp_cmd &= CPCMD_QUIRK_MASK;
        RTL_W16(tp, CPlusCmd, tp->cp_cmd);
 
-       rtl_set_rx_tx_desc_registers(tp);
-
-       rtl_set_rx_tx_config_registers(tp);
-
        switch (tp->mac_version) {
        case RTL_GIGA_MAC_VER_07:
                rtl_hw_start_8102e_1(tp);
@@ -6604,17 +6186,7 @@ static void rtl_hw_start_8101(struct rtl8169_private *tp)
                break;
        }
 
-       RTL_W8(tp, Cfg9346, Cfg9346_Lock);
-
        RTL_W16(tp, IntrMitigate, 0x0000);
-
-       RTL_W8(tp, ChipCmd, CmdTxEnb | CmdRxEnb);
-
-       rtl_set_rx_mode(tp->dev);
-
-       RTL_R8(tp, IntrMask);
-
-       RTL_W16(tp, MultiIntr, RTL_R16(tp, MultiIntr) & 0xf000);
 }
 
 static int rtl8169_change_mtu(struct net_device *dev, int new_mtu)
@@ -6942,18 +6514,6 @@ static int msdn_giant_send_check(struct sk_buff *skb)
        return ret;
 }
 
-static inline __be16 get_protocol(struct sk_buff *skb)
-{
-       __be16 protocol;
-
-       if (skb->protocol == htons(ETH_P_8021Q))
-               protocol = vlan_eth_hdr(skb)->h_vlan_encapsulated_proto;
-       else
-               protocol = skb->protocol;
-
-       return protocol;
-}
-
 static bool rtl8169_tso_csum_v1(struct rtl8169_private *tp,
                                struct sk_buff *skb, u32 *opts)
 {
@@ -6990,7 +6550,7 @@ static bool rtl8169_tso_csum_v2(struct rtl8169_private *tp,
                        return false;
                }
 
-               switch (get_protocol(skb)) {
+               switch (vlan_get_protocol(skb)) {
                case htons(ETH_P_IP):
                        opts[0] |= TD1_GTSENV4;
                        break;
@@ -7022,7 +6582,7 @@ static bool rtl8169_tso_csum_v2(struct rtl8169_private *tp,
                        return false;
                }
 
-               switch (get_protocol(skb)) {
+               switch (vlan_get_protocol(skb)) {
                case htons(ETH_P_IP):
                        opts[1] |= TD1_IPv4_CS;
                        ip_protocol = ip_hdr(skb)->protocol;
@@ -7637,8 +7197,6 @@ static int rtl_open(struct net_device *dev)
 
        rtl8169_init_phy(dev, tp);
 
-       __rtl8169_set_features(dev, dev->features);
-
        rtl_pll_power_up(tp);
 
        rtl_hw_start(tp);
@@ -8045,20 +7603,10 @@ static void rtl_hw_init_8168ep(struct rtl8169_private *tp)
 static void rtl_hw_initialize(struct rtl8169_private *tp)
 {
        switch (tp->mac_version) {
-       case RTL_GIGA_MAC_VER_40:
-       case RTL_GIGA_MAC_VER_41:
-       case RTL_GIGA_MAC_VER_42:
-       case RTL_GIGA_MAC_VER_43:
-       case RTL_GIGA_MAC_VER_44:
-       case RTL_GIGA_MAC_VER_45:
-       case RTL_GIGA_MAC_VER_46:
-       case RTL_GIGA_MAC_VER_47:
-       case RTL_GIGA_MAC_VER_48:
+       case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_48:
                rtl_hw_init_8168g(tp);
                break;
-       case RTL_GIGA_MAC_VER_49:
-       case RTL_GIGA_MAC_VER_50:
-       case RTL_GIGA_MAC_VER_51:
+       case RTL_GIGA_MAC_VER_49 ... RTL_GIGA_MAC_VER_51:
                rtl_hw_init_8168ep(tp);
                break;
        default:
@@ -8141,7 +7689,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
        /* Identify chip attached to board */
        rtl8169_get_mac_version(tp, cfg->default_ver);
 
-       tp->cp_cmd = 0;
+       tp->cp_cmd = RTL_R16(tp, CPlusCmd);
 
        if ((sizeof(dma_addr_t) > 4) &&
            (use_dac == 1 || (use_dac == -1 && pci_is_pcie(pdev) &&
@@ -8174,9 +7722,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
        pci_set_master(pdev);
 
        rtl_init_mdio_ops(tp);
-       rtl_init_pll_power_ops(tp);
        rtl_init_jumbo_ops(tp);
-       rtl_init_csi_ops(tp);
 
        rtl8169_print_mac_version(tp);
 
@@ -8212,29 +7758,18 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
        u64_stats_init(&tp->tx_stats.syncp);
 
        /* Get MAC address */
-       if (tp->mac_version == RTL_GIGA_MAC_VER_35 ||
-           tp->mac_version == RTL_GIGA_MAC_VER_36 ||
-           tp->mac_version == RTL_GIGA_MAC_VER_37 ||
-           tp->mac_version == RTL_GIGA_MAC_VER_38 ||
-           tp->mac_version == RTL_GIGA_MAC_VER_40 ||
-           tp->mac_version == RTL_GIGA_MAC_VER_41 ||
-           tp->mac_version == RTL_GIGA_MAC_VER_42 ||
-           tp->mac_version == RTL_GIGA_MAC_VER_43 ||
-           tp->mac_version == RTL_GIGA_MAC_VER_44 ||
-           tp->mac_version == RTL_GIGA_MAC_VER_45 ||
-           tp->mac_version == RTL_GIGA_MAC_VER_46 ||
-           tp->mac_version == RTL_GIGA_MAC_VER_47 ||
-           tp->mac_version == RTL_GIGA_MAC_VER_48 ||
-           tp->mac_version == RTL_GIGA_MAC_VER_49 ||
-           tp->mac_version == RTL_GIGA_MAC_VER_50 ||
-           tp->mac_version == RTL_GIGA_MAC_VER_51) {
-               u16 mac_addr[3];
-
+       switch (tp->mac_version) {
+               u8 mac_addr[ETH_ALEN] __aligned(4);
+       case RTL_GIGA_MAC_VER_35 ... RTL_GIGA_MAC_VER_38:
+       case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_51:
                *(u32 *)&mac_addr[0] = rtl_eri_read(tp, 0xe0, ERIAR_EXGMAC);
-               *(u16 *)&mac_addr[2] = rtl_eri_read(tp, 0xe4, ERIAR_EXGMAC);
+               *(u16 *)&mac_addr[4] = rtl_eri_read(tp, 0xe4, ERIAR_EXGMAC);
 
-               if (is_valid_ether_addr((u8 *)mac_addr))
-                       rtl_rar_set(tp, (u8 *)mac_addr);
+               if (is_valid_ether_addr(mac_addr))
+                       rtl_rar_set(tp, mac_addr);
+               break;
+       default:
+               break;
        }
        for (i = 0; i < ETH_ALEN; i++)
                dev->dev_addr[i] = RTL_R8(tp, MAC0 + i);
index b6b90a6314e31d2d59223c7b57ff8e47d49f66c4..d9cadfb1bc4a74715ccc8329cd6737419852f6dd 100644 (file)
@@ -442,12 +442,22 @@ static void sh_eth_modify(struct net_device *ndev, int enum_index, u32 clear,
 static void sh_eth_tsu_write(struct sh_eth_private *mdp, u32 data,
                             int enum_index)
 {
-       iowrite32(data, mdp->tsu_addr + mdp->reg_offset[enum_index]);
+       u16 offset = mdp->reg_offset[enum_index];
+
+       if (WARN_ON(offset == SH_ETH_OFFSET_INVALID))
+               return;
+
+       iowrite32(data, mdp->tsu_addr + offset);
 }
 
 static u32 sh_eth_tsu_read(struct sh_eth_private *mdp, int enum_index)
 {
-       return ioread32(mdp->tsu_addr + mdp->reg_offset[enum_index]);
+       u16 offset = mdp->reg_offset[enum_index];
+
+       if (WARN_ON(offset == SH_ETH_OFFSET_INVALID))
+               return ~0U;
+
+       return ioread32(mdp->tsu_addr + offset);
 }
 
 static void sh_eth_select_mii(struct net_device *ndev)
@@ -456,6 +466,9 @@ static void sh_eth_select_mii(struct net_device *ndev)
        u32 value;
 
        switch (mdp->phy_interface) {
+       case PHY_INTERFACE_MODE_RGMII ... PHY_INTERFACE_MODE_RGMII_TXID:
+               value = 0x3;
+               break;
        case PHY_INTERFACE_MODE_GMII:
                value = 0x2;
                break;
@@ -693,7 +706,7 @@ static struct sh_eth_cpu_data rcar_gen1_data = {
                          EESIPR_RTLFIP | EESIPR_RTSFIP |
                          EESIPR_PREIP | EESIPR_CERFIP,
 
-       .tx_check       = EESR_FTC | EESR_CND | EESR_DLC | EESR_CD | EESR_RTO,
+       .tx_check       = EESR_FTC | EESR_CND | EESR_DLC | EESR_CD | EESR_TRO,
        .eesr_err_check = EESR_TWB | EESR_TABT | EESR_RABT | EESR_RFE |
                          EESR_RDE | EESR_RFRMER | EESR_TFE | EESR_TDE,
        .fdr_value      = 0x00000f0f,
@@ -725,7 +738,7 @@ static struct sh_eth_cpu_data rcar_gen2_data = {
                          EESIPR_RTLFIP | EESIPR_RTSFIP |
                          EESIPR_PREIP | EESIPR_CERFIP,
 
-       .tx_check       = EESR_FTC | EESR_CND | EESR_DLC | EESR_CD | EESR_RTO,
+       .tx_check       = EESR_FTC | EESR_CND | EESR_DLC | EESR_CD | EESR_TRO,
        .eesr_err_check = EESR_TWB | EESR_TABT | EESR_RABT | EESR_RFE |
                          EESR_RDE | EESR_RFRMER | EESR_TFE | EESR_TDE,
        .fdr_value      = 0x00000f0f,
@@ -740,6 +753,49 @@ static struct sh_eth_cpu_data rcar_gen2_data = {
        .rmiimode       = 1,
        .magic          = 1,
 };
+
+/* R8A77980 */
+static struct sh_eth_cpu_data r8a77980_data = {
+       .soft_reset     = sh_eth_soft_reset_gether,
+
+       .set_duplex     = sh_eth_set_duplex,
+       .set_rate       = sh_eth_set_rate_gether,
+
+       .register_type  = SH_ETH_REG_GIGABIT,
+
+       .edtrr_trns     = EDTRR_TRNS_GETHER,
+       .ecsr_value     = ECSR_PSRTO | ECSR_LCHNG | ECSR_ICD | ECSR_MPD,
+       .ecsipr_value   = ECSIPR_PSRTOIP | ECSIPR_LCHNGIP | ECSIPR_ICDIP |
+                         ECSIPR_MPDIP,
+       .eesipr_value   = EESIPR_RFCOFIP | EESIPR_ECIIP |
+                         EESIPR_FTCIP | EESIPR_TDEIP | EESIPR_TFUFIP |
+                         EESIPR_FRIP | EESIPR_RDEIP | EESIPR_RFOFIP |
+                         EESIPR_RMAFIP | EESIPR_RRFIP |
+                         EESIPR_RTLFIP | EESIPR_RTSFIP |
+                         EESIPR_PREIP | EESIPR_CERFIP,
+
+       .tx_check       = EESR_FTC | EESR_CD | EESR_TRO,
+       .eesr_err_check = EESR_TWB1 | EESR_TWB | EESR_TABT | EESR_RABT |
+                         EESR_RFE | EESR_RDE | EESR_RFRMER |
+                         EESR_TFE | EESR_TDE | EESR_ECI,
+       .fdr_value      = 0x0000070f,
+
+       .apr            = 1,
+       .mpr            = 1,
+       .tpauser        = 1,
+       .bculr          = 1,
+       .hw_swap        = 1,
+       .nbst           = 1,
+       .rpadir         = 1,
+       .rpadir_value   = 2 << 16,
+       .no_trimd       = 1,
+       .no_ade         = 1,
+       .xdfar_rw       = 1,
+       .hw_checksum    = 1,
+       .select_mii     = 1,
+       .magic          = 1,
+       .cexcr          = 1,
+};
 #endif /* CONFIG_OF */
 
 static void sh_eth_set_rate_sh7724(struct net_device *ndev)
@@ -775,7 +831,7 @@ static struct sh_eth_cpu_data sh7724_data = {
                          EESIPR_RTLFIP | EESIPR_RTSFIP |
                          EESIPR_PREIP | EESIPR_CERFIP,
 
-       .tx_check       = EESR_FTC | EESR_CND | EESR_DLC | EESR_CD | EESR_RTO,
+       .tx_check       = EESR_FTC | EESR_CND | EESR_DLC | EESR_CD | EESR_TRO,
        .eesr_err_check = EESR_TWB | EESR_TABT | EESR_RABT | EESR_RFE |
                          EESR_RDE | EESR_RFRMER | EESR_TFE | EESR_TDE,
 
@@ -820,7 +876,7 @@ static struct sh_eth_cpu_data sh7757_data = {
                          EESIPR_RRFIP | EESIPR_RTLFIP | EESIPR_RTSFIP |
                          EESIPR_PREIP | EESIPR_CERFIP,
 
-       .tx_check       = EESR_FTC | EESR_CND | EESR_DLC | EESR_CD | EESR_RTO,
+       .tx_check       = EESR_FTC | EESR_CND | EESR_DLC | EESR_CD | EESR_TRO,
        .eesr_err_check = EESR_TWB | EESR_TABT | EESR_RABT | EESR_RFE |
                          EESR_RDE | EESR_RFRMER | EESR_TFE | EESR_TDE,
 
@@ -1421,8 +1477,13 @@ static int sh_eth_dev_init(struct net_device *ndev)
 
        sh_eth_write(ndev, mdp->cd->trscer_err_mask, TRSCER);
 
+       /* DMA transfer burst mode */
+       if (mdp->cd->nbst)
+               sh_eth_modify(ndev, EDMR, EDMR_NBST, EDMR_NBST);
+
+       /* Burst cycle count upper-limit */
        if (mdp->cd->bculr)
-               sh_eth_write(ndev, 0x800, BCULR);       /* Burst sycle set */
+               sh_eth_write(ndev, 0x800, BCULR);
 
        sh_eth_write(ndev, mdp->cd->fcftr_value, FCFTR);
 
@@ -2610,12 +2671,6 @@ static int sh_eth_change_mtu(struct net_device *ndev, int new_mtu)
 }
 
 /* For TSU_POSTn. Please refer to the manual about this (strange) bitfields */
-static void *sh_eth_tsu_get_post_reg_offset(struct sh_eth_private *mdp,
-                                           int entry)
-{
-       return sh_eth_tsu_get_offset(mdp, TSU_POST1) + (entry / 8 * 4);
-}
-
 static u32 sh_eth_tsu_get_post_mask(int entry)
 {
        return 0x0f << (28 - ((entry % 8) * 4));
@@ -2630,27 +2685,25 @@ static void sh_eth_tsu_enable_cam_entry_post(struct net_device *ndev,
                                             int entry)
 {
        struct sh_eth_private *mdp = netdev_priv(ndev);
+       int reg = TSU_POST1 + entry / 8;
        u32 tmp;
-       void *reg_offset;
 
-       reg_offset = sh_eth_tsu_get_post_reg_offset(mdp, entry);
-       tmp = ioread32(reg_offset);
-       iowrite32(tmp | sh_eth_tsu_get_post_bit(mdp, entry), reg_offset);
+       tmp = sh_eth_tsu_read(mdp, reg);
+       sh_eth_tsu_write(mdp, tmp | sh_eth_tsu_get_post_bit(mdp, entry), reg);
 }
 
 static bool sh_eth_tsu_disable_cam_entry_post(struct net_device *ndev,
                                              int entry)
 {
        struct sh_eth_private *mdp = netdev_priv(ndev);
+       int reg = TSU_POST1 + entry / 8;
        u32 post_mask, ref_mask, tmp;
-       void *reg_offset;
 
-       reg_offset = sh_eth_tsu_get_post_reg_offset(mdp, entry);
        post_mask = sh_eth_tsu_get_post_mask(entry);
        ref_mask = sh_eth_tsu_get_post_bit(mdp, entry) & ~post_mask;
 
-       tmp = ioread32(reg_offset);
-       iowrite32(tmp & ~post_mask, reg_offset);
+       tmp = sh_eth_tsu_read(mdp, reg);
+       sh_eth_tsu_write(mdp, tmp & ~post_mask, reg);
 
        /* If other port enables, the function returns "true" */
        return tmp & ref_mask;
@@ -3023,15 +3076,10 @@ static int sh_mdio_init(struct sh_eth_private *mdp,
                 pdev->name, pdev->id);
 
        /* register MDIO bus */
-       if (dev->of_node) {
-               ret = of_mdiobus_register(mdp->mii_bus, dev->of_node);
-       } else {
-               if (pd->phy_irq > 0)
-                       mdp->mii_bus->irq[pd->phy] = pd->phy_irq;
-
-               ret = mdiobus_register(mdp->mii_bus);
-       }
+       if (pd->phy_irq > 0)
+               mdp->mii_bus->irq[pd->phy] = pd->phy_irq;
 
+       ret = of_mdiobus_register(mdp->mii_bus, dev->of_node);
        if (ret)
                goto out_free_bus;
 
@@ -3130,6 +3178,7 @@ static const struct of_device_id sh_eth_match_table[] = {
        { .compatible = "renesas,ether-r8a7791", .data = &rcar_gen2_data },
        { .compatible = "renesas,ether-r8a7793", .data = &rcar_gen2_data },
        { .compatible = "renesas,ether-r8a7794", .data = &rcar_gen2_data },
+       { .compatible = "renesas,gether-r8a77980", .data = &r8a77980_data },
        { .compatible = "renesas,ether-r7s72100", .data = &r7s72100_data },
        { .compatible = "renesas,rcar-gen1-ether", .data = &rcar_gen1_data },
        { .compatible = "renesas,rcar-gen2-ether", .data = &rcar_gen2_data },
index a5b792ce2ae7d046e78ec4c7bfa886a805bc00e8..5dee19b61aeed8713b9d38b393bdbc379bd7d02c 100644 (file)
@@ -163,7 +163,7 @@ enum {
 };
 
 /* Driver's parameters */
-#if defined(CONFIG_CPU_SH4) || defined(CONFIG_ARCH_SHMOBILE)
+#if defined(CONFIG_CPU_SH4) || defined(CONFIG_ARCH_RENESAS)
 #define SH_ETH_RX_ALIGN                32
 #else
 #define SH_ETH_RX_ALIGN                2
@@ -184,6 +184,7 @@ enum GECMR_BIT {
 
 /* EDMR */
 enum DMAC_M_BIT {
+       EDMR_NBST = 0x80,
        EDMR_EL = 0x40, /* Litte endian */
        EDMR_DL1 = 0x20, EDMR_DL0 = 0x10,
        EDMR_SRST_GETHER = 0x03,
@@ -242,7 +243,7 @@ enum EESR_BIT {
        EESR_CND        = 0x00000800,
        EESR_DLC        = 0x00000400,
        EESR_CD         = 0x00000200,
-       EESR_RTO        = 0x00000100,
+       EESR_TRO        = 0x00000100,
        EESR_RMAF       = 0x00000080,
        EESR_CEEF       = 0x00000040,
        EESR_CELF       = 0x00000020,
@@ -262,7 +263,7 @@ enum EESR_BIT {
                                 EESR_CERF)  /* Recv frame CRC error */
 
 #define DEFAULT_TX_CHECK       (EESR_FTC | EESR_CND | EESR_DLC | EESR_CD | \
-                                EESR_RTO)
+                                EESR_TRO)
 #define DEFAULT_EESR_ERR_CHECK (EESR_TWB | EESR_TABT | EESR_RABT | EESR_RFE | \
                                 EESR_RDE | EESR_RFRMER | EESR_ADE | \
                                 EESR_TFE | EESR_TDE)
@@ -498,20 +499,21 @@ struct sh_eth_cpu_data {
 
        /* hardware features */
        unsigned long irq_flags; /* IRQ configuration flags */
-       unsigned no_psr:1;      /* EtherC DO NOT have PSR */
-       unsigned apr:1;         /* EtherC have APR */
-       unsigned mpr:1;         /* EtherC have MPR */
-       unsigned tpauser:1;     /* EtherC have TPAUSER */
-       unsigned bculr:1;       /* EtherC have BCULR */
-       unsigned tsu:1;         /* EtherC have TSU */
-       unsigned hw_swap:1;     /* E-DMAC have DE bit in EDMR */
-       unsigned rpadir:1;      /* E-DMAC have RPADIR */
-       unsigned no_trimd:1;    /* E-DMAC DO NOT have TRIMD */
-       unsigned no_ade:1;      /* E-DMAC DO NOT have ADE bit in EESR */
+       unsigned no_psr:1;      /* EtherC DOES NOT have PSR */
+       unsigned apr:1;         /* EtherC has APR */
+       unsigned mpr:1;         /* EtherC has MPR */
+       unsigned tpauser:1;     /* EtherC has TPAUSER */
+       unsigned bculr:1;       /* EtherC has BCULR */
+       unsigned tsu:1;         /* EtherC has TSU */
+       unsigned hw_swap:1;     /* E-DMAC has DE bit in EDMR */
+       unsigned nbst:1;        /* E-DMAC has NBST bit in EDMR */
+       unsigned rpadir:1;      /* E-DMAC has RPADIR */
+       unsigned no_trimd:1;    /* E-DMAC DOES NOT have TRIMD */
+       unsigned no_ade:1;      /* E-DMAC DOES NOT have ADE bit in EESR */
        unsigned no_xdfar:1;    /* E-DMAC DOES NOT have RDFAR/TDFAR */
        unsigned xdfar_rw:1;    /* E-DMAC has writeable RDFAR/TDFAR */
        unsigned hw_checksum:1; /* E-DMAC has CSMR */
-       unsigned select_mii:1;  /* EtherC have RMII_MII (MII select register) */
+       unsigned select_mii:1;  /* EtherC has RMII_MII (MII select register) */
        unsigned rmiimode:1;    /* EtherC has RMIIMODE register */
        unsigned rtrate:1;      /* EtherC has RTRATE register */
        unsigned magic:1;       /* EtherC has ECMR.MPDE and ECSR.MPD */
index 056cb60936303563ac835a8a450e29566cd2bb86..e73e4febeedbaec5bf545a877320c9e67c09d019 100644 (file)
@@ -2738,6 +2738,8 @@ static void rocker_switchdev_event_work(struct work_struct *work)
        switch (switchdev_work->event) {
        case SWITCHDEV_FDB_ADD_TO_DEVICE:
                fdb_info = &switchdev_work->fdb_info;
+               if (!fdb_info->added_by_user)
+                       break;
                err = rocker_world_port_fdb_add(rocker_port, fdb_info);
                if (err) {
                        netdev_dbg(rocker_port->dev, "fdb add failed err=%d\n", err);
@@ -2747,6 +2749,8 @@ static void rocker_switchdev_event_work(struct work_struct *work)
                break;
        case SWITCHDEV_FDB_DEL_TO_DEVICE:
                fdb_info = &switchdev_work->fdb_info;
+               if (!fdb_info->added_by_user)
+                       break;
                err = rocker_world_port_fdb_del(rocker_port, fdb_info);
                if (err)
                        netdev_dbg(rocker_port->dev, "fdb add failed err=%d\n", err);
index 83ce229f4eb7a29a9502b7f843aee4f5a8a8675a..d90a7b1f4088623ccd664d136389482b242a3456 100644 (file)
@@ -3999,29 +3999,6 @@ static void efx_ef10_prepare_flr(struct efx_nic *efx)
        atomic_set(&efx->active_queues, 0);
 }
 
-static bool efx_ef10_filter_equal(const struct efx_filter_spec *left,
-                                 const struct efx_filter_spec *right)
-{
-       if ((left->match_flags ^ right->match_flags) |
-           ((left->flags ^ right->flags) &
-            (EFX_FILTER_FLAG_RX | EFX_FILTER_FLAG_TX)))
-               return false;
-
-       return memcmp(&left->outer_vid, &right->outer_vid,
-                     sizeof(struct efx_filter_spec) -
-                     offsetof(struct efx_filter_spec, outer_vid)) == 0;
-}
-
-static unsigned int efx_ef10_filter_hash(const struct efx_filter_spec *spec)
-{
-       BUILD_BUG_ON(offsetof(struct efx_filter_spec, outer_vid) & 3);
-       return jhash2((const u32 *)&spec->outer_vid,
-                     (sizeof(struct efx_filter_spec) -
-                      offsetof(struct efx_filter_spec, outer_vid)) / 4,
-                     0);
-       /* XXX should we randomise the initval? */
-}
-
 /* Decide whether a filter should be exclusive or else should allow
  * delivery to additional recipients.  Currently we decide that
  * filters for specific local unicast MAC and IP addresses are
@@ -4346,7 +4323,7 @@ static s32 efx_ef10_filter_insert(struct efx_nic *efx,
                goto out_unlock;
        match_pri = rc;
 
-       hash = efx_ef10_filter_hash(spec);
+       hash = efx_filter_spec_hash(spec);
        is_mc_recip = efx_filter_is_mc_recipient(spec);
        if (is_mc_recip)
                bitmap_zero(mc_rem_map, EFX_EF10_FILTER_SEARCH_LIMIT);
@@ -4378,7 +4355,7 @@ static s32 efx_ef10_filter_insert(struct efx_nic *efx,
                if (!saved_spec) {
                        if (ins_index < 0)
                                ins_index = i;
-               } else if (efx_ef10_filter_equal(spec, saved_spec)) {
+               } else if (efx_filter_spec_equal(spec, saved_spec)) {
                        if (spec->priority < saved_spec->priority &&
                            spec->priority != EFX_FILTER_PRI_AUTO) {
                                rc = -EPERM;
@@ -4762,27 +4739,63 @@ static s32 efx_ef10_filter_get_rx_ids(struct efx_nic *efx,
 static bool efx_ef10_filter_rfs_expire_one(struct efx_nic *efx, u32 flow_id,
                                           unsigned int filter_idx)
 {
+       struct efx_filter_spec *spec, saved_spec;
        struct efx_ef10_filter_table *table;
-       struct efx_filter_spec *spec;
-       bool ret;
+       struct efx_arfs_rule *rule = NULL;
+       bool ret = true, force = false;
+       u16 arfs_id;
 
        down_read(&efx->filter_sem);
        table = efx->filter_state;
        down_write(&table->lock);
        spec = efx_ef10_filter_entry_spec(table, filter_idx);
 
-       if (!spec || spec->priority != EFX_FILTER_PRI_HINT) {
-               ret = true;
+       if (!spec || spec->priority != EFX_FILTER_PRI_HINT)
                goto out_unlock;
-       }
 
-       if (!rps_may_expire_flow(efx->net_dev, spec->dmaq_id, flow_id, 0)) {
+       spin_lock_bh(&efx->rps_hash_lock);
+       if (!efx->rps_hash_table) {
+               /* In the absence of the table, we always return 0 to ARFS. */
+               arfs_id = 0;
+       } else {
+               rule = efx_rps_hash_find(efx, spec);
+               if (!rule)
+                       /* ARFS table doesn't know of this filter, so remove it */
+                       goto expire;
+               arfs_id = rule->arfs_id;
+               ret = efx_rps_check_rule(rule, filter_idx, &force);
+               if (force)
+                       goto expire;
+               if (!ret) {
+                       spin_unlock_bh(&efx->rps_hash_lock);
+                       goto out_unlock;
+               }
+       }
+       if (!rps_may_expire_flow(efx->net_dev, spec->dmaq_id, flow_id, arfs_id))
                ret = false;
-               goto out_unlock;
+       else if (rule)
+               rule->filter_id = EFX_ARFS_FILTER_ID_REMOVING;
+expire:
+       saved_spec = *spec; /* remove operation will kfree spec */
+       spin_unlock_bh(&efx->rps_hash_lock);
+       /* At this point (since we dropped the lock), another thread might queue
+        * up a fresh insertion request (but the actual insertion will be held
+        * up by our possession of the filter table lock).  In that case, it
+        * will set rule->filter_id to EFX_ARFS_FILTER_ID_PENDING, meaning that
+        * the rule is not removed by efx_rps_hash_del() below.
+        */
+       if (ret)
+               ret = efx_ef10_filter_remove_internal(efx, 1U << spec->priority,
+                                                     filter_idx, true) == 0;
+       /* While we can't safely dereference rule (we dropped the lock), we can
+        * still test it for NULL.
+        */
+       if (ret && rule) {
+               /* Expiring, so remove entry from ARFS table */
+               spin_lock_bh(&efx->rps_hash_lock);
+               efx_rps_hash_del(efx, &saved_spec);
+               spin_unlock_bh(&efx->rps_hash_lock);
        }
-
-       ret = efx_ef10_filter_remove_internal(efx, 1U << spec->priority,
-                                             filter_idx, true) == 0;
 out_unlock:
        up_write(&table->lock);
        up_read(&efx->filter_sem);
index c30b9e26f13112f9b0a5984c83e6874d1bbaaa11..88c1eee677c222c68adca6e23fba8c05083eca83 100644 (file)
@@ -3059,6 +3059,10 @@ static int efx_init_struct(struct efx_nic *efx,
        mutex_init(&efx->mac_lock);
 #ifdef CONFIG_RFS_ACCEL
        mutex_init(&efx->rps_mutex);
+       spin_lock_init(&efx->rps_hash_lock);
+       /* Failure to allocate is not fatal, but may degrade ARFS performance */
+       efx->rps_hash_table = kcalloc(EFX_ARFS_HASH_TABLE_SIZE,
+                                     sizeof(*efx->rps_hash_table), GFP_KERNEL);
 #endif
        efx->phy_op = &efx_dummy_phy_operations;
        efx->mdio.dev = net_dev;
@@ -3102,6 +3106,10 @@ static void efx_fini_struct(struct efx_nic *efx)
 {
        int i;
 
+#ifdef CONFIG_RFS_ACCEL
+       kfree(efx->rps_hash_table);
+#endif
+
        for (i = 0; i < EFX_MAX_CHANNELS; i++)
                kfree(efx->channel[i]);
 
@@ -3124,6 +3132,141 @@ void efx_update_sw_stats(struct efx_nic *efx, u64 *stats)
        stats[GENERIC_STAT_rx_noskb_drops] = atomic_read(&efx->n_rx_noskb_drops);
 }
 
+bool efx_filter_spec_equal(const struct efx_filter_spec *left,
+                          const struct efx_filter_spec *right)
+{
+       if ((left->match_flags ^ right->match_flags) |
+           ((left->flags ^ right->flags) &
+            (EFX_FILTER_FLAG_RX | EFX_FILTER_FLAG_TX)))
+               return false;
+
+       return memcmp(&left->outer_vid, &right->outer_vid,
+                     sizeof(struct efx_filter_spec) -
+                     offsetof(struct efx_filter_spec, outer_vid)) == 0;
+}
+
+u32 efx_filter_spec_hash(const struct efx_filter_spec *spec)
+{
+       BUILD_BUG_ON(offsetof(struct efx_filter_spec, outer_vid) & 3);
+       return jhash2((const u32 *)&spec->outer_vid,
+                     (sizeof(struct efx_filter_spec) -
+                      offsetof(struct efx_filter_spec, outer_vid)) / 4,
+                     0);
+}
+
+#ifdef CONFIG_RFS_ACCEL
+bool efx_rps_check_rule(struct efx_arfs_rule *rule, unsigned int filter_idx,
+                       bool *force)
+{
+       if (rule->filter_id == EFX_ARFS_FILTER_ID_PENDING) {
+               /* ARFS is currently updating this entry, leave it */
+               return false;
+       }
+       if (rule->filter_id == EFX_ARFS_FILTER_ID_ERROR) {
+               /* ARFS tried and failed to update this, so it's probably out
+                * of date.  Remove the filter and the ARFS rule entry.
+                */
+               rule->filter_id = EFX_ARFS_FILTER_ID_REMOVING;
+               *force = true;
+               return true;
+       } else if (WARN_ON(rule->filter_id != filter_idx)) { /* can't happen */
+               /* ARFS has moved on, so old filter is not needed.  Since we did
+                * not mark the rule with EFX_ARFS_FILTER_ID_REMOVING, it will
+                * not be removed by efx_rps_hash_del() subsequently.
+                */
+               *force = true;
+               return true;
+       }
+       /* Remove it iff ARFS wants to. */
+       return true;
+}
+
+struct hlist_head *efx_rps_hash_bucket(struct efx_nic *efx,
+                                      const struct efx_filter_spec *spec)
+{
+       u32 hash = efx_filter_spec_hash(spec);
+
+       WARN_ON(!spin_is_locked(&efx->rps_hash_lock));
+       if (!efx->rps_hash_table)
+               return NULL;
+       return &efx->rps_hash_table[hash % EFX_ARFS_HASH_TABLE_SIZE];
+}
+
+struct efx_arfs_rule *efx_rps_hash_find(struct efx_nic *efx,
+                                       const struct efx_filter_spec *spec)
+{
+       struct efx_arfs_rule *rule;
+       struct hlist_head *head;
+       struct hlist_node *node;
+
+       head = efx_rps_hash_bucket(efx, spec);
+       if (!head)
+               return NULL;
+       hlist_for_each(node, head) {
+               rule = container_of(node, struct efx_arfs_rule, node);
+               if (efx_filter_spec_equal(spec, &rule->spec))
+                       return rule;
+       }
+       return NULL;
+}
+
+struct efx_arfs_rule *efx_rps_hash_add(struct efx_nic *efx,
+                                      const struct efx_filter_spec *spec,
+                                      bool *new)
+{
+       struct efx_arfs_rule *rule;
+       struct hlist_head *head;
+       struct hlist_node *node;
+
+       head = efx_rps_hash_bucket(efx, spec);
+       if (!head)
+               return NULL;
+       hlist_for_each(node, head) {
+               rule = container_of(node, struct efx_arfs_rule, node);
+               if (efx_filter_spec_equal(spec, &rule->spec)) {
+                       *new = false;
+                       return rule;
+               }
+       }
+       rule = kmalloc(sizeof(*rule), GFP_ATOMIC);
+       *new = true;
+       if (rule) {
+               memcpy(&rule->spec, spec, sizeof(rule->spec));
+               hlist_add_head(&rule->node, head);
+       }
+       return rule;
+}
+
+void efx_rps_hash_del(struct efx_nic *efx, const struct efx_filter_spec *spec)
+{
+       struct efx_arfs_rule *rule;
+       struct hlist_head *head;
+       struct hlist_node *node;
+
+       head = efx_rps_hash_bucket(efx, spec);
+       if (WARN_ON(!head))
+               return;
+       hlist_for_each(node, head) {
+               rule = container_of(node, struct efx_arfs_rule, node);
+               if (efx_filter_spec_equal(spec, &rule->spec)) {
+                       /* Someone already reused the entry.  We know that if
+                        * this check doesn't fire (i.e. filter_id == REMOVING)
+                        * then the REMOVING mark was put there by our caller,
+                        * because caller is holding a lock on filter table and
+                        * only holders of that lock set REMOVING.
+                        */
+                       if (rule->filter_id != EFX_ARFS_FILTER_ID_REMOVING)
+                               return;
+                       hlist_del(node);
+                       kfree(rule);
+                       return;
+               }
+       }
+       /* We didn't find it. */
+       WARN_ON(1);
+}
+#endif
+
 /* RSS contexts.  We're using linked lists and crappy O(n) algorithms, because
  * (a) this is an infrequent control-plane operation and (b) n is small (max 64)
  */
index a3140e16fcef31f553eeab15e60e8696781ad324..3f759ebdcf1012ce9e913dcd6e96f5fd9307e1a6 100644 (file)
@@ -186,6 +186,27 @@ static inline void efx_filter_rfs_expire(struct work_struct *data) {}
 #endif
 bool efx_filter_is_mc_recipient(const struct efx_filter_spec *spec);
 
+bool efx_filter_spec_equal(const struct efx_filter_spec *left,
+                          const struct efx_filter_spec *right);
+u32 efx_filter_spec_hash(const struct efx_filter_spec *spec);
+
+#ifdef CONFIG_RFS_ACCEL
+bool efx_rps_check_rule(struct efx_arfs_rule *rule, unsigned int filter_idx,
+                       bool *force);
+
+struct efx_arfs_rule *efx_rps_hash_find(struct efx_nic *efx,
+                                       const struct efx_filter_spec *spec);
+
+/* @new is written to indicate if entry was newly added (true) or if an old
+ * entry was found and returned (false).
+ */
+struct efx_arfs_rule *efx_rps_hash_add(struct efx_nic *efx,
+                                      const struct efx_filter_spec *spec,
+                                      bool *new);
+
+void efx_rps_hash_del(struct efx_nic *efx, const struct efx_filter_spec *spec);
+#endif
+
 /* RSS contexts */
 struct efx_rss_context *efx_alloc_rss_context_entry(struct efx_nic *efx);
 struct efx_rss_context *efx_find_rss_context_entry(struct efx_nic *efx, u32 id);
index 7174ef5e5c5e310ed83c362a81df58a94af7bdc2..c72adf8b52eac62fd31cf16f3dbae15ee1404381 100644 (file)
@@ -2905,18 +2905,45 @@ bool efx_farch_filter_rfs_expire_one(struct efx_nic *efx, u32 flow_id,
 {
        struct efx_farch_filter_state *state = efx->filter_state;
        struct efx_farch_filter_table *table;
-       bool ret = false;
+       bool ret = false, force = false;
+       u16 arfs_id;
 
        down_write(&state->lock);
+       spin_lock_bh(&efx->rps_hash_lock);
        table = &state->table[EFX_FARCH_FILTER_TABLE_RX_IP];
        if (test_bit(index, table->used_bitmap) &&
-           table->spec[index].priority == EFX_FILTER_PRI_HINT &&
-           rps_may_expire_flow(efx->net_dev, table->spec[index].dmaq_id,
-                               flow_id, 0)) {
-               efx_farch_filter_table_clear_entry(efx, table, index);
-               ret = true;
+           table->spec[index].priority == EFX_FILTER_PRI_HINT) {
+               struct efx_arfs_rule *rule = NULL;
+               struct efx_filter_spec spec;
+
+               efx_farch_filter_to_gen_spec(&spec, &table->spec[index]);
+               if (!efx->rps_hash_table) {
+                       /* In the absence of the table, we always returned 0 to
+                        * ARFS, so use the same to query it.
+                        */
+                       arfs_id = 0;
+               } else {
+                       rule = efx_rps_hash_find(efx, &spec);
+                       if (!rule) {
+                               /* ARFS table doesn't know of this filter, remove it */
+                               force = true;
+                       } else {
+                               arfs_id = rule->arfs_id;
+                               if (!efx_rps_check_rule(rule, index, &force))
+                                       goto out_unlock;
+                       }
+               }
+               if (force || rps_may_expire_flow(efx->net_dev, spec.dmaq_id,
+                                                flow_id, arfs_id)) {
+                       if (rule)
+                               rule->filter_id = EFX_ARFS_FILTER_ID_REMOVING;
+                       efx_rps_hash_del(efx, &spec);
+                       efx_farch_filter_table_clear_entry(efx, table, index);
+                       ret = true;
+               }
        }
-
+out_unlock:
+       spin_unlock_bh(&efx->rps_hash_lock);
        up_write(&state->lock);
        return ret;
 }
index eea3808b3f250f70b7f5191e56eb9fa89c3c6eef..65568925c3efe6398d1e26ca1520b53919f60291 100644 (file)
@@ -734,6 +734,35 @@ struct efx_rss_context {
 };
 
 #ifdef CONFIG_RFS_ACCEL
+/* Order of these is important, since filter_id >= %EFX_ARFS_FILTER_ID_PENDING
+ * is used to test if filter does or will exist.
+ */
+#define EFX_ARFS_FILTER_ID_PENDING     -1
+#define EFX_ARFS_FILTER_ID_ERROR       -2
+#define EFX_ARFS_FILTER_ID_REMOVING    -3
+/**
+ * struct efx_arfs_rule - record of an ARFS filter and its IDs
+ * @node: linkage into hash table
+ * @spec: details of the filter (used as key for hash table).  Use efx->type to
+ *     determine which member to use.
+ * @rxq_index: channel to which the filter will steer traffic.
+ * @arfs_id: filter ID which was returned to ARFS
+ * @filter_id: index in software filter table.  May be
+ *     %EFX_ARFS_FILTER_ID_PENDING if filter was not inserted yet,
+ *     %EFX_ARFS_FILTER_ID_ERROR if filter insertion failed, or
+ *     %EFX_ARFS_FILTER_ID_REMOVING if expiry is currently removing the filter.
+ */
+struct efx_arfs_rule {
+       struct hlist_node node;
+       struct efx_filter_spec spec;
+       u16 rxq_index;
+       u16 arfs_id;
+       s32 filter_id;
+};
+
+/* Size chosen so that the table is one page (4kB) */
+#define EFX_ARFS_HASH_TABLE_SIZE       512
+
 /**
  * struct efx_async_filter_insertion - Request to asynchronously insert a filter
  * @net_dev: Reference to the netdevice
@@ -873,6 +902,10 @@ struct efx_async_filter_insertion {
  *     @rps_expire_channel's @rps_flow_id
  * @rps_slot_map: bitmap of in-flight entries in @rps_slot
  * @rps_slot: array of ARFS insertion requests for efx_filter_rfs_work()
+ * @rps_hash_lock: Protects ARFS filter mapping state (@rps_hash_table and
+ *     @rps_next_id).
+ * @rps_hash_table: Mapping between ARFS filters and their various IDs
+ * @rps_next_id: next arfs_id for an ARFS filter
  * @active_queues: Count of RX and TX queues that haven't been flushed and drained.
  * @rxq_flush_pending: Count of number of receive queues that need to be flushed.
  *     Decremented when the efx_flush_rx_queue() is called.
@@ -1029,6 +1062,9 @@ struct efx_nic {
        unsigned int rps_expire_index;
        unsigned long rps_slot_map;
        struct efx_async_filter_insertion rps_slot[EFX_RPS_MAX_IN_FLIGHT];
+       spinlock_t rps_hash_lock;
+       struct hlist_head *rps_hash_table;
+       u32 rps_next_id;
 #endif
 
        atomic_t active_queues;
index 9c593c661cbfd98e9aacc81f5f9580e884cb2cb0..d2e254f2f72bf0f2fffd0078397da819a676980f 100644 (file)
@@ -834,9 +834,31 @@ static void efx_filter_rfs_work(struct work_struct *data)
        struct efx_nic *efx = netdev_priv(req->net_dev);
        struct efx_channel *channel = efx_get_channel(efx, req->rxq_index);
        int slot_idx = req - efx->rps_slot;
+       struct efx_arfs_rule *rule;
+       u16 arfs_id = 0;
        int rc;
 
        rc = efx->type->filter_insert(efx, &req->spec, true);
+       if (rc >= 0)
+               rc %= efx->type->max_rx_ip_filters;
+       if (efx->rps_hash_table) {
+               spin_lock_bh(&efx->rps_hash_lock);
+               rule = efx_rps_hash_find(efx, &req->spec);
+               /* The rule might have already gone, if someone else's request
+                * for the same spec was already worked and then expired before
+                * we got around to our work.  In that case we have nothing
+                * tying us to an arfs_id, meaning that as soon as the filter
+                * is considered for expiry it will be removed.
+                */
+               if (rule) {
+                       if (rc < 0)
+                               rule->filter_id = EFX_ARFS_FILTER_ID_ERROR;
+                       else
+                               rule->filter_id = rc;
+                       arfs_id = rule->arfs_id;
+               }
+               spin_unlock_bh(&efx->rps_hash_lock);
+       }
        if (rc >= 0) {
                /* Remember this so we can check whether to expire the filter
                 * later.
@@ -848,18 +870,18 @@ static void efx_filter_rfs_work(struct work_struct *data)
 
                if (req->spec.ether_type == htons(ETH_P_IP))
                        netif_info(efx, rx_status, efx->net_dev,
-                                  "steering %s %pI4:%u:%pI4:%u to queue %u [flow %u filter %d]\n",
+                                  "steering %s %pI4:%u:%pI4:%u to queue %u [flow %u filter %d id %u]\n",
                                   (req->spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
                                   req->spec.rem_host, ntohs(req->spec.rem_port),
                                   req->spec.loc_host, ntohs(req->spec.loc_port),
-                                  req->rxq_index, req->flow_id, rc);
+                                  req->rxq_index, req->flow_id, rc, arfs_id);
                else
                        netif_info(efx, rx_status, efx->net_dev,
-                                  "steering %s [%pI6]:%u:[%pI6]:%u to queue %u [flow %u filter %d]\n",
+                                  "steering %s [%pI6]:%u:[%pI6]:%u to queue %u [flow %u filter %d id %u]\n",
                                   (req->spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
                                   req->spec.rem_host, ntohs(req->spec.rem_port),
                                   req->spec.loc_host, ntohs(req->spec.loc_port),
-                                  req->rxq_index, req->flow_id, rc);
+                                  req->rxq_index, req->flow_id, rc, arfs_id);
        }
 
        /* Release references */
@@ -872,8 +894,10 @@ int efx_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb,
 {
        struct efx_nic *efx = netdev_priv(net_dev);
        struct efx_async_filter_insertion *req;
+       struct efx_arfs_rule *rule;
        struct flow_keys fk;
        int slot_idx;
+       bool new;
        int rc;
 
        /* find a free slot */
@@ -926,12 +950,42 @@ int efx_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb,
        req->spec.rem_port = fk.ports.src;
        req->spec.loc_port = fk.ports.dst;
 
+       if (efx->rps_hash_table) {
+               /* Add it to ARFS hash table */
+               spin_lock(&efx->rps_hash_lock);
+               rule = efx_rps_hash_add(efx, &req->spec, &new);
+               if (!rule) {
+                       rc = -ENOMEM;
+                       goto out_unlock;
+               }
+               if (new)
+                       rule->arfs_id = efx->rps_next_id++ % RPS_NO_FILTER;
+               rc = rule->arfs_id;
+               /* Skip if existing or pending filter already does the right thing */
+               if (!new && rule->rxq_index == rxq_index &&
+                   rule->filter_id >= EFX_ARFS_FILTER_ID_PENDING)
+                       goto out_unlock;
+               rule->rxq_index = rxq_index;
+               rule->filter_id = EFX_ARFS_FILTER_ID_PENDING;
+               spin_unlock(&efx->rps_hash_lock);
+       } else {
+               /* Without an ARFS hash table, we just use arfs_id 0 for all
+                * filters.  This means if multiple flows hash to the same
+                * flow_id, all but the most recently touched will be eligible
+                * for expiry.
+                */
+               rc = 0;
+       }
+
+       /* Queue the request */
        dev_hold(req->net_dev = net_dev);
        INIT_WORK(&req->work, efx_filter_rfs_work);
        req->rxq_index = rxq_index;
        req->flow_id = flow_id;
        schedule_work(&req->work);
-       return 0;
+       return rc;
+out_unlock:
+       spin_unlock(&efx->rps_hash_lock);
 out_clear:
        clear_bit(slot_idx, &efx->rps_slot_map);
        return rc;
index e3b578b4f7cb73685e9a5cfe3a8ed4ed5e777f55..68e9e2640c62845b8765671a2a2bfc8ac28f4a10 100644 (file)
@@ -5,7 +5,7 @@ stmmac-objs:= stmmac_main.o stmmac_ethtool.o stmmac_mdio.o ring_mode.o  \
              dwmac100_core.o dwmac100_dma.o enh_desc.o norm_desc.o     \
              mmc_core.o stmmac_hwtstamp.o stmmac_ptp.o dwmac4_descs.o  \
              dwmac4_dma.o dwmac4_lib.o dwmac4_core.o dwmac5.o hwif.o \
-             $(stmmac-y)
+             stmmac_tc.o $(stmmac-y)
 
 # Ordering matters. Generic driver must be last.
 obj-$(CONFIG_STMMAC_PLATFORM)  += stmmac-platform.o
index 627e905b6d76b1f36042ac9c9bb63bd79e017093..a679cb729d1dea1904557371658248bc7efd17c4 100644 (file)
@@ -353,6 +353,10 @@ struct dma_features {
        unsigned int rx_fifo_size;
        /* Automotive Safety Package */
        unsigned int asp;
+       /* RX Parser */
+       unsigned int frpsel;
+       unsigned int frpbs;
+       unsigned int frpes;
 };
 
 /* GMAC TX FIFO is 8K, Rx FIFO is 16K */
@@ -412,6 +416,7 @@ struct mac_device_info {
        const struct stmmac_dma_ops *dma;
        const struct stmmac_mode_ops *mode;
        const struct stmmac_hwtimestamp *ptp;
+       const struct stmmac_tc_ops *tc;
        struct mii_regs mii;    /* MII register Addresses */
        struct mac_link link;
        void __iomem *pcsr;     /* vpointer to device CSRs */
index 7cb794094a70665ea009f1b7e2160cead7186012..4ff231df73225dc449b21c7c1f312666b2a957fd 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/io.h>
 #include <linux/ioport.h>
 #include <linux/module.h>
+#include <linux/of_device.h>
 #include <linux/of_net.h>
 #include <linux/mfd/syscon.h>
 #include <linux/platform_device.h>
 
 #define PRG_ETH0_RGMII_MODE            BIT(0)
 
+#define PRG_ETH0_EXT_PHY_MODE_MASK     GENMASK(2, 0)
+#define PRG_ETH0_EXT_RGMII_MODE                1
+#define PRG_ETH0_EXT_RMII_MODE         4
+
 /* mux to choose between fclk_div2 (bit unset) and mpll2 (bit set) */
 #define PRG_ETH0_CLK_M250_SEL_SHIFT    4
 #define PRG_ETH0_CLK_M250_SEL_MASK     GENMASK(4, 4)
 
 #define MUX_CLK_NUM_PARENTS            2
 
+struct meson8b_dwmac;
+
+struct meson8b_dwmac_data {
+       int (*set_phy_mode)(struct meson8b_dwmac *dwmac);
+};
+
 struct meson8b_dwmac {
-       struct device           *dev;
-       void __iomem            *regs;
-       phy_interface_t         phy_mode;
-       struct clk              *rgmii_tx_clk;
-       u32                     tx_delay_ns;
+       struct device                   *dev;
+       void __iomem                    *regs;
+
+       const struct meson8b_dwmac_data *data;
+       phy_interface_t                 phy_mode;
+       struct clk                      *rgmii_tx_clk;
+       u32                             tx_delay_ns;
 };
 
 struct meson8b_dwmac_clk_configs {
@@ -171,6 +184,59 @@ static int meson8b_init_rgmii_tx_clk(struct meson8b_dwmac *dwmac)
        return 0;
 }
 
+static int meson8b_set_phy_mode(struct meson8b_dwmac *dwmac)
+{
+       switch (dwmac->phy_mode) {
+       case PHY_INTERFACE_MODE_RGMII:
+       case PHY_INTERFACE_MODE_RGMII_RXID:
+       case PHY_INTERFACE_MODE_RGMII_ID:
+       case PHY_INTERFACE_MODE_RGMII_TXID:
+               /* enable RGMII mode */
+               meson8b_dwmac_mask_bits(dwmac, PRG_ETH0,
+                                       PRG_ETH0_RGMII_MODE,
+                                       PRG_ETH0_RGMII_MODE);
+               break;
+       case PHY_INTERFACE_MODE_RMII:
+               /* disable RGMII mode -> enables RMII mode */
+               meson8b_dwmac_mask_bits(dwmac, PRG_ETH0,
+                                       PRG_ETH0_RGMII_MODE, 0);
+               break;
+       default:
+               dev_err(dwmac->dev, "fail to set phy-mode %s\n",
+                       phy_modes(dwmac->phy_mode));
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static int meson_axg_set_phy_mode(struct meson8b_dwmac *dwmac)
+{
+       switch (dwmac->phy_mode) {
+       case PHY_INTERFACE_MODE_RGMII:
+       case PHY_INTERFACE_MODE_RGMII_RXID:
+       case PHY_INTERFACE_MODE_RGMII_ID:
+       case PHY_INTERFACE_MODE_RGMII_TXID:
+               /* enable RGMII mode */
+               meson8b_dwmac_mask_bits(dwmac, PRG_ETH0,
+                                       PRG_ETH0_EXT_PHY_MODE_MASK,
+                                       PRG_ETH0_EXT_RGMII_MODE);
+               break;
+       case PHY_INTERFACE_MODE_RMII:
+               /* disable RGMII mode -> enables RMII mode */
+               meson8b_dwmac_mask_bits(dwmac, PRG_ETH0,
+                                       PRG_ETH0_EXT_PHY_MODE_MASK,
+                                       PRG_ETH0_EXT_RMII_MODE);
+               break;
+       default:
+               dev_err(dwmac->dev, "fail to set phy-mode %s\n",
+                       phy_modes(dwmac->phy_mode));
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
 static int meson8b_init_prg_eth(struct meson8b_dwmac *dwmac)
 {
        int ret;
@@ -188,10 +254,6 @@ static int meson8b_init_prg_eth(struct meson8b_dwmac *dwmac)
 
        case PHY_INTERFACE_MODE_RGMII_ID:
        case PHY_INTERFACE_MODE_RGMII_TXID:
-               /* enable RGMII mode */
-               meson8b_dwmac_mask_bits(dwmac, PRG_ETH0, PRG_ETH0_RGMII_MODE,
-                                       PRG_ETH0_RGMII_MODE);
-
                /* only relevant for RMII mode -> disable in RGMII mode */
                meson8b_dwmac_mask_bits(dwmac, PRG_ETH0,
                                        PRG_ETH0_INVERTED_RMII_CLK, 0);
@@ -224,10 +286,6 @@ static int meson8b_init_prg_eth(struct meson8b_dwmac *dwmac)
                break;
 
        case PHY_INTERFACE_MODE_RMII:
-               /* disable RGMII mode -> enables RMII mode */
-               meson8b_dwmac_mask_bits(dwmac, PRG_ETH0, PRG_ETH0_RGMII_MODE,
-                                       0);
-
                /* invert internal clk_rmii_i to generate 25/2.5 tx_rx_clk */
                meson8b_dwmac_mask_bits(dwmac, PRG_ETH0,
                                        PRG_ETH0_INVERTED_RMII_CLK,
@@ -274,6 +332,11 @@ static int meson8b_dwmac_probe(struct platform_device *pdev)
                goto err_remove_config_dt;
        }
 
+       dwmac->data = (const struct meson8b_dwmac_data *)
+               of_device_get_match_data(&pdev->dev);
+       if (!dwmac->data)
+               return -EINVAL;
+
        res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
        dwmac->regs = devm_ioremap_resource(&pdev->dev, res);
        if (IS_ERR(dwmac->regs)) {
@@ -298,6 +361,10 @@ static int meson8b_dwmac_probe(struct platform_device *pdev)
        if (ret)
                goto err_remove_config_dt;
 
+       ret = dwmac->data->set_phy_mode(dwmac);
+       if (ret)
+               goto err_remove_config_dt;
+
        ret = meson8b_init_prg_eth(dwmac);
        if (ret)
                goto err_remove_config_dt;
@@ -316,10 +383,31 @@ static int meson8b_dwmac_probe(struct platform_device *pdev)
        return ret;
 }
 
+static const struct meson8b_dwmac_data meson8b_dwmac_data = {
+       .set_phy_mode = meson8b_set_phy_mode,
+};
+
+static const struct meson8b_dwmac_data meson_axg_dwmac_data = {
+       .set_phy_mode = meson_axg_set_phy_mode,
+};
+
 static const struct of_device_id meson8b_dwmac_match[] = {
-       { .compatible = "amlogic,meson8b-dwmac" },
-       { .compatible = "amlogic,meson8m2-dwmac" },
-       { .compatible = "amlogic,meson-gxbb-dwmac" },
+       {
+               .compatible = "amlogic,meson8b-dwmac",
+               .data = &meson8b_dwmac_data,
+       },
+       {
+               .compatible = "amlogic,meson8m2-dwmac",
+               .data = &meson8b_dwmac_data,
+       },
+       {
+               .compatible = "amlogic,meson-gxbb-dwmac",
+               .data = &meson8b_dwmac_data,
+       },
+       {
+               .compatible = "amlogic,meson-axg-dwmac",
+               .data = &meson_axg_dwmac_data,
+       },
        { }
 };
 MODULE_DEVICE_TABLE(of, meson8b_dwmac_match);
index 13133b30b575e74a081f35158e360d7eee8b28ab..f08625a02cea03f8dcf55ca3f9cc0f06fa4e3ca5 100644 (file)
@@ -1104,30 +1104,20 @@ static int gmac_clk_enable(struct rk_priv_data *bsp_priv, bool enable)
        } else {
                if (bsp_priv->clk_enabled) {
                        if (phy_iface == PHY_INTERFACE_MODE_RMII) {
-                               if (!IS_ERR(bsp_priv->mac_clk_rx))
-                                       clk_disable_unprepare(
-                                               bsp_priv->mac_clk_rx);
+                               clk_disable_unprepare(bsp_priv->mac_clk_rx);
 
-                               if (!IS_ERR(bsp_priv->clk_mac_ref))
-                                       clk_disable_unprepare(
-                                               bsp_priv->clk_mac_ref);
+                               clk_disable_unprepare(bsp_priv->clk_mac_ref);
 
-                               if (!IS_ERR(bsp_priv->clk_mac_refout))
-                                       clk_disable_unprepare(
-                                               bsp_priv->clk_mac_refout);
+                               clk_disable_unprepare(bsp_priv->clk_mac_refout);
                        }
 
-                       if (!IS_ERR(bsp_priv->clk_phy))
-                               clk_disable_unprepare(bsp_priv->clk_phy);
+                       clk_disable_unprepare(bsp_priv->clk_phy);
 
-                       if (!IS_ERR(bsp_priv->aclk_mac))
-                               clk_disable_unprepare(bsp_priv->aclk_mac);
+                       clk_disable_unprepare(bsp_priv->aclk_mac);
 
-                       if (!IS_ERR(bsp_priv->pclk_mac))
-                               clk_disable_unprepare(bsp_priv->pclk_mac);
+                       clk_disable_unprepare(bsp_priv->pclk_mac);
 
-                       if (!IS_ERR(bsp_priv->mac_clk_tx))
-                               clk_disable_unprepare(bsp_priv->mac_clk_tx);
+                       clk_disable_unprepare(bsp_priv->mac_clk_tx);
                        /**
                         * if (!IS_ERR(bsp_priv->clk_mac))
                         *      clk_disable_unprepare(bsp_priv->clk_mac);
index a3fa65b1ca8e5f9c45416b3b114128ead5166ca3..2e6e2a96b4f263023e04eaad77e56f160cbedc5c 100644 (file)
  *                             This value is used for disabling properly EMAC
  *                             and used as a good starting value in case of the
  *                             boot process(uboot) leave some stuff.
+ * @syscon_field               reg_field for the syscon's gmac register
  * @soc_has_internal_phy:      Does the MAC embed an internal PHY
  * @support_mii:               Does the MAC handle MII
  * @support_rmii:              Does the MAC handle RMII
  * @support_rgmii:             Does the MAC handle RGMII
+ *
+ * @rx_delay_max:              Maximum raw value for RX delay chain
+ * @tx_delay_max:              Maximum raw value for TX delay chain
+ *                             These two also indicate the bitmask for
+ *                             the RX and TX delay chain registers. A
+ *                             value of zero indicates this is not supported.
  */
 struct emac_variant {
        u32 default_syscon_value;
+       const struct reg_field *syscon_field;
        bool soc_has_internal_phy;
        bool support_mii;
        bool support_rmii;
        bool support_rgmii;
+       u8 rx_delay_max;
+       u8 tx_delay_max;
 };
 
 /* struct sunxi_priv_data - hold all sunxi private data
@@ -71,38 +81,70 @@ struct sunxi_priv_data {
        struct regulator *regulator;
        struct reset_control *rst_ephy;
        const struct emac_variant *variant;
-       struct regmap *regmap;
+       struct regmap_field *regmap_field;
        bool internal_phy_powered;
        void *mux_handle;
 };
 
+/* EMAC clock register @ 0x30 in the "system control" address range */
+static const struct reg_field sun8i_syscon_reg_field = {
+       .reg = 0x30,
+       .lsb = 0,
+       .msb = 31,
+};
+
+/* EMAC clock register @ 0x164 in the CCU address range */
+static const struct reg_field sun8i_ccu_reg_field = {
+       .reg = 0x164,
+       .lsb = 0,
+       .msb = 31,
+};
+
 static const struct emac_variant emac_variant_h3 = {
        .default_syscon_value = 0x58000,
+       .syscon_field = &sun8i_syscon_reg_field,
        .soc_has_internal_phy = true,
        .support_mii = true,
        .support_rmii = true,
-       .support_rgmii = true
+       .support_rgmii = true,
+       .rx_delay_max = 31,
+       .tx_delay_max = 7,
 };
 
 static const struct emac_variant emac_variant_v3s = {
        .default_syscon_value = 0x38000,
+       .syscon_field = &sun8i_syscon_reg_field,
        .soc_has_internal_phy = true,
        .support_mii = true
 };
 
 static const struct emac_variant emac_variant_a83t = {
        .default_syscon_value = 0,
+       .syscon_field = &sun8i_syscon_reg_field,
        .soc_has_internal_phy = false,
        .support_mii = true,
-       .support_rgmii = true
+       .support_rgmii = true,
+       .rx_delay_max = 31,
+       .tx_delay_max = 7,
+};
+
+static const struct emac_variant emac_variant_r40 = {
+       .default_syscon_value = 0,
+       .syscon_field = &sun8i_ccu_reg_field,
+       .support_mii = true,
+       .support_rgmii = true,
+       .rx_delay_max = 7,
 };
 
 static const struct emac_variant emac_variant_a64 = {
        .default_syscon_value = 0,
+       .syscon_field = &sun8i_syscon_reg_field,
        .soc_has_internal_phy = false,
        .support_mii = true,
        .support_rmii = true,
-       .support_rgmii = true
+       .support_rgmii = true,
+       .rx_delay_max = 31,
+       .tx_delay_max = 7,
 };
 
 #define EMAC_BASIC_CTL0 0x00
@@ -206,9 +248,7 @@ static const struct emac_variant emac_variant_a64 = {
 #define SYSCON_RMII_EN         BIT(13) /* 1: enable RMII (overrides EPIT) */
 
 /* Generic system control EMAC_CLK bits */
-#define SYSCON_ETXDC_MASK              GENMASK(2, 0)
 #define SYSCON_ETXDC_SHIFT             10
-#define SYSCON_ERXDC_MASK              GENMASK(4, 0)
 #define SYSCON_ERXDC_SHIFT             5
 /* EMAC PHY Interface Type */
 #define SYSCON_EPIT                    BIT(2) /* 1: RGMII, 0: MII */
@@ -216,7 +256,6 @@ static const struct emac_variant emac_variant_a64 = {
 #define SYSCON_ETCS_MII                0x0
 #define SYSCON_ETCS_EXT_GMII   0x1
 #define SYSCON_ETCS_INT_GMII   0x2
-#define SYSCON_EMAC_REG                0x30
 
 /* sun8i_dwmac_dma_reset() - reset the EMAC
  * Called from stmmac via stmmac_dma_ops->reset
@@ -237,17 +276,28 @@ static int sun8i_dwmac_dma_reset(void __iomem *ioaddr)
  * Called from stmmac via stmmac_dma_ops->init
  */
 static void sun8i_dwmac_dma_init(void __iomem *ioaddr,
-                                struct stmmac_dma_cfg *dma_cfg,
-                                u32 dma_tx, u32 dma_rx, int atds)
+                                struct stmmac_dma_cfg *dma_cfg, int atds)
 {
-       /* Write TX and RX descriptors address */
-       writel(dma_rx, ioaddr + EMAC_RX_DESC_LIST);
-       writel(dma_tx, ioaddr + EMAC_TX_DESC_LIST);
-
        writel(EMAC_RX_INT | EMAC_TX_INT, ioaddr + EMAC_INT_EN);
        writel(0x1FFFFFF, ioaddr + EMAC_INT_STA);
 }
 
+static void sun8i_dwmac_dma_init_rx(void __iomem *ioaddr,
+                                   struct stmmac_dma_cfg *dma_cfg,
+                                   u32 dma_rx_phy, u32 chan)
+{
+       /* Write RX descriptors address */
+       writel(dma_rx_phy, ioaddr + EMAC_RX_DESC_LIST);
+}
+
+static void sun8i_dwmac_dma_init_tx(void __iomem *ioaddr,
+                                   struct stmmac_dma_cfg *dma_cfg,
+                                   u32 dma_tx_phy, u32 chan)
+{
+       /* Write TX descriptors address */
+       writel(dma_tx_phy, ioaddr + EMAC_TX_DESC_LIST);
+}
+
 /* sun8i_dwmac_dump_regs() - Dump EMAC address space
  * Called from stmmac_dma_ops->dump_regs
  * Used for ethtool
@@ -398,13 +448,36 @@ static int sun8i_dwmac_dma_interrupt(void __iomem *ioaddr,
        return ret;
 }
 
-static void sun8i_dwmac_dma_operation_mode(void __iomem *ioaddr, int txmode,
-                                          int rxmode, int rxfifosz)
+static void sun8i_dwmac_dma_operation_mode_rx(void __iomem *ioaddr, int mode,
+                                             u32 channel, int fifosz, u8 qmode)
+{
+       u32 v;
+
+       v = readl(ioaddr + EMAC_RX_CTL1);
+       if (mode == SF_DMA_MODE) {
+               v |= EMAC_RX_MD;
+       } else {
+               v &= ~EMAC_RX_MD;
+               v &= ~EMAC_RX_TH_MASK;
+               if (mode < 32)
+                       v |= EMAC_RX_TH_32;
+               else if (mode < 64)
+                       v |= EMAC_RX_TH_64;
+               else if (mode < 96)
+                       v |= EMAC_RX_TH_96;
+               else if (mode < 128)
+                       v |= EMAC_RX_TH_128;
+       }
+       writel(v, ioaddr + EMAC_RX_CTL1);
+}
+
+static void sun8i_dwmac_dma_operation_mode_tx(void __iomem *ioaddr, int mode,
+                                             u32 channel, int fifosz, u8 qmode)
 {
        u32 v;
 
        v = readl(ioaddr + EMAC_TX_CTL1);
-       if (txmode == SF_DMA_MODE) {
+       if (mode == SF_DMA_MODE) {
                v |= EMAC_TX_MD;
                /* Undocumented bit (called TX_NEXT_FRM in BSP), the original
                 * comment is
@@ -415,40 +488,26 @@ static void sun8i_dwmac_dma_operation_mode(void __iomem *ioaddr, int txmode,
        } else {
                v &= ~EMAC_TX_MD;
                v &= ~EMAC_TX_TH_MASK;
-               if (txmode < 64)
+               if (mode < 64)
                        v |= EMAC_TX_TH_64;
-               else if (txmode < 128)
+               else if (mode < 128)
                        v |= EMAC_TX_TH_128;
-               else if (txmode < 192)
+               else if (mode < 192)
                        v |= EMAC_TX_TH_192;
-               else if (txmode < 256)
+               else if (mode < 256)
                        v |= EMAC_TX_TH_256;
        }
        writel(v, ioaddr + EMAC_TX_CTL1);
-
-       v = readl(ioaddr + EMAC_RX_CTL1);
-       if (rxmode == SF_DMA_MODE) {
-               v |= EMAC_RX_MD;
-       } else {
-               v &= ~EMAC_RX_MD;
-               v &= ~EMAC_RX_TH_MASK;
-               if (rxmode < 32)
-                       v |= EMAC_RX_TH_32;
-               else if (rxmode < 64)
-                       v |= EMAC_RX_TH_64;
-               else if (rxmode < 96)
-                       v |= EMAC_RX_TH_96;
-               else if (rxmode < 128)
-                       v |= EMAC_RX_TH_128;
-       }
-       writel(v, ioaddr + EMAC_RX_CTL1);
 }
 
 static const struct stmmac_dma_ops sun8i_dwmac_dma_ops = {
        .reset = sun8i_dwmac_dma_reset,
        .init = sun8i_dwmac_dma_init,
+       .init_rx_chan = sun8i_dwmac_dma_init_rx,
+       .init_tx_chan = sun8i_dwmac_dma_init_tx,
        .dump_regs = sun8i_dwmac_dump_regs,
-       .dma_mode = sun8i_dwmac_dma_operation_mode,
+       .dma_rx_mode = sun8i_dwmac_dma_operation_mode_rx,
+       .dma_tx_mode = sun8i_dwmac_dma_operation_mode_tx,
        .enable_dma_transmission = sun8i_dwmac_enable_dma_transmission,
        .enable_dma_irq = sun8i_dwmac_enable_dma_irq,
        .disable_dma_irq = sun8i_dwmac_disable_dma_irq,
@@ -745,7 +804,7 @@ static int mdio_mux_syscon_switch_fn(int current_child, int desired_child,
        bool need_power_ephy = false;
 
        if (current_child ^ desired_child) {
-               regmap_read(gmac->regmap, SYSCON_EMAC_REG, &reg);
+               regmap_field_read(gmac->regmap_field, &reg);
                switch (desired_child) {
                case DWMAC_SUN8I_MDIO_MUX_INTERNAL_ID:
                        dev_info(priv->device, "Switch mux to internal PHY");
@@ -763,7 +822,7 @@ static int mdio_mux_syscon_switch_fn(int current_child, int desired_child,
                                desired_child);
                        return -EINVAL;
                }
-               regmap_write(gmac->regmap, SYSCON_EMAC_REG, val);
+               regmap_field_write(gmac->regmap_field, val);
                if (need_power_ephy) {
                        ret = sun8i_dwmac_power_internal_phy(priv);
                        if (ret)
@@ -801,7 +860,7 @@ static int sun8i_dwmac_set_syscon(struct stmmac_priv *priv)
        int ret;
        u32 reg, val;
 
-       regmap_read(gmac->regmap, SYSCON_EMAC_REG, &val);
+       regmap_field_read(gmac->regmap_field, &val);
        reg = gmac->variant->default_syscon_value;
        if (reg != val)
                dev_warn(priv->device,
@@ -835,8 +894,9 @@ static int sun8i_dwmac_set_syscon(struct stmmac_priv *priv)
                }
                val /= 100;
                dev_dbg(priv->device, "set tx-delay to %x\n", val);
-               if (val <= SYSCON_ETXDC_MASK) {
-                       reg &= ~(SYSCON_ETXDC_MASK << SYSCON_ETXDC_SHIFT);
+               if (val <= gmac->variant->tx_delay_max) {
+                       reg &= ~(gmac->variant->tx_delay_max <<
+                                SYSCON_ETXDC_SHIFT);
                        reg |= (val << SYSCON_ETXDC_SHIFT);
                } else {
                        dev_err(priv->device, "Invalid TX clock delay: %d\n",
@@ -852,8 +912,9 @@ static int sun8i_dwmac_set_syscon(struct stmmac_priv *priv)
                }
                val /= 100;
                dev_dbg(priv->device, "set rx-delay to %x\n", val);
-               if (val <= SYSCON_ERXDC_MASK) {
-                       reg &= ~(SYSCON_ERXDC_MASK << SYSCON_ERXDC_SHIFT);
+               if (val <= gmac->variant->rx_delay_max) {
+                       reg &= ~(gmac->variant->rx_delay_max <<
+                                SYSCON_ERXDC_SHIFT);
                        reg |= (val << SYSCON_ERXDC_SHIFT);
                } else {
                        dev_err(priv->device, "Invalid RX clock delay: %d\n",
@@ -883,7 +944,7 @@ static int sun8i_dwmac_set_syscon(struct stmmac_priv *priv)
                return -EINVAL;
        }
 
-       regmap_write(gmac->regmap, SYSCON_EMAC_REG, reg);
+       regmap_field_write(gmac->regmap_field, reg);
 
        return 0;
 }
@@ -892,7 +953,7 @@ static void sun8i_dwmac_unset_syscon(struct sunxi_priv_data *gmac)
 {
        u32 reg = gmac->variant->default_syscon_value;
 
-       regmap_write(gmac->regmap, SYSCON_EMAC_REG, reg);
+       regmap_field_write(gmac->regmap_field, reg);
 }
 
 static void sun8i_dwmac_exit(struct platform_device *pdev, void *priv)
@@ -971,6 +1032,34 @@ static struct mac_device_info *sun8i_dwmac_setup(void *ppriv)
        return mac;
 }
 
+static struct regmap *sun8i_dwmac_get_syscon_from_dev(struct device_node *node)
+{
+       struct device_node *syscon_node;
+       struct platform_device *syscon_pdev;
+       struct regmap *regmap = NULL;
+
+       syscon_node = of_parse_phandle(node, "syscon", 0);
+       if (!syscon_node)
+               return ERR_PTR(-ENODEV);
+
+       syscon_pdev = of_find_device_by_node(syscon_node);
+       if (!syscon_pdev) {
+               /* platform device might not be probed yet */
+               regmap = ERR_PTR(-EPROBE_DEFER);
+               goto out_put_node;
+       }
+
+       /* If no regmap is found then the other device driver is at fault */
+       regmap = dev_get_regmap(&syscon_pdev->dev, NULL);
+       if (!regmap)
+               regmap = ERR_PTR(-EINVAL);
+
+       platform_device_put(syscon_pdev);
+out_put_node:
+       of_node_put(syscon_node);
+       return regmap;
+}
+
 static int sun8i_dwmac_probe(struct platform_device *pdev)
 {
        struct plat_stmmacenet_data *plat_dat;
@@ -980,6 +1069,7 @@ static int sun8i_dwmac_probe(struct platform_device *pdev)
        int ret;
        struct stmmac_priv *priv;
        struct net_device *ndev;
+       struct regmap *regmap;
 
        ret = stmmac_get_platform_resources(pdev, &stmmac_res);
        if (ret)
@@ -1014,14 +1104,41 @@ static int sun8i_dwmac_probe(struct platform_device *pdev)
                gmac->regulator = NULL;
        }
 
-       gmac->regmap = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
-                                                      "syscon");
-       if (IS_ERR(gmac->regmap)) {
-               ret = PTR_ERR(gmac->regmap);
+       /* The "GMAC clock control" register might be located in the
+        * CCU address range (on the R40), or the system control address
+        * range (on most other sun8i and later SoCs).
+        *
+        * The former controls most if not all clocks in the SoC. The
+        * latter has an SoC identification register, and on some SoCs,
+        * controls to map device specific SRAM to either the intended
+        * peripheral, or the CPU address space.
+        *
+        * In either case, there should be a coordinated and restricted
+        * method of accessing the register needed here. This is done by
+        * having the device export a custom regmap, instead of a generic
+        * syscon, which grants all access to all registers.
+        *
+        * To support old device trees, we fall back to using the syscon
+        * interface if possible.
+        */
+       regmap = sun8i_dwmac_get_syscon_from_dev(pdev->dev.of_node);
+       if (IS_ERR(regmap))
+               regmap = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
+                                                        "syscon");
+       if (IS_ERR(regmap)) {
+               ret = PTR_ERR(regmap);
                dev_err(&pdev->dev, "Unable to map syscon: %d\n", ret);
                return ret;
        }
 
+       gmac->regmap_field = devm_regmap_field_alloc(dev, regmap,
+                                                    *gmac->variant->syscon_field);
+       if (IS_ERR(gmac->regmap_field)) {
+               ret = PTR_ERR(gmac->regmap_field);
+               dev_err(dev, "Unable to map syscon register: %d\n", ret);
+               return ret;
+       }
+
        plat_dat->interface = of_get_phy_mode(dev->of_node);
 
        /* platform data specifying hardware features and callbacks.
@@ -1078,6 +1195,8 @@ static const struct of_device_id sun8i_dwmac_match[] = {
                .data = &emac_variant_v3s },
        { .compatible = "allwinner,sun8i-a83t-emac",
                .data = &emac_variant_a83t },
+       { .compatible = "allwinner,sun8i-r40-gmac",
+               .data = &emac_variant_r40 },
        { .compatible = "allwinner,sun50i-a64-emac",
                .data = &emac_variant_a64 },
        { }
index 7ecf549c7f1cd594f86f165c02743f415f7ada85..aacc4aa80e3c59dee5fcde2adaa810e234b2390a 100644 (file)
@@ -81,8 +81,7 @@ static void dwmac1000_dma_axi(void __iomem *ioaddr, struct stmmac_axi *axi)
 }
 
 static void dwmac1000_dma_init(void __iomem *ioaddr,
-                              struct stmmac_dma_cfg *dma_cfg,
-                              u32 dma_tx, u32 dma_rx, int atds)
+                              struct stmmac_dma_cfg *dma_cfg, int atds)
 {
        u32 value = readl(ioaddr + DMA_BUS_MODE);
        int txpbl = dma_cfg->txpbl ?: dma_cfg->pbl;
@@ -119,12 +118,22 @@ static void dwmac1000_dma_init(void __iomem *ioaddr,
 
        /* Mask interrupts by writing to CSR7 */
        writel(DMA_INTR_DEFAULT_MASK, ioaddr + DMA_INTR_ENA);
+}
 
-       /* RX/TX descriptor base address lists must be written into
-        * DMA CSR3 and CSR4, respectively
-        */
-       writel(dma_tx, ioaddr + DMA_TX_BASE_ADDR);
-       writel(dma_rx, ioaddr + DMA_RCV_BASE_ADDR);
+static void dwmac1000_dma_init_rx(void __iomem *ioaddr,
+                                 struct stmmac_dma_cfg *dma_cfg,
+                                 u32 dma_rx_phy, u32 chan)
+{
+       /* RX descriptor base address list must be written into DMA CSR3 */
+       writel(dma_rx_phy, ioaddr + DMA_RCV_BASE_ADDR);
+}
+
+static void dwmac1000_dma_init_tx(void __iomem *ioaddr,
+                                 struct stmmac_dma_cfg *dma_cfg,
+                                 u32 dma_tx_phy, u32 chan)
+{
+       /* TX descriptor base address list must be written into DMA CSR4 */
+       writel(dma_tx_phy, ioaddr + DMA_TX_BASE_ADDR);
 }
 
 static u32 dwmac1000_configure_fc(u32 csr6, int rxfifosz)
@@ -148,12 +157,40 @@ static u32 dwmac1000_configure_fc(u32 csr6, int rxfifosz)
        return csr6;
 }
 
-static void dwmac1000_dma_operation_mode(void __iomem *ioaddr, int txmode,
-                                        int rxmode, int rxfifosz)
+static void dwmac1000_dma_operation_mode_rx(void __iomem *ioaddr, int mode,
+                                           u32 channel, int fifosz, u8 qmode)
+{
+       u32 csr6 = readl(ioaddr + DMA_CONTROL);
+
+       if (mode == SF_DMA_MODE) {
+               pr_debug("GMAC: enable RX store and forward mode\n");
+               csr6 |= DMA_CONTROL_RSF;
+       } else {
+               pr_debug("GMAC: disable RX SF mode (threshold %d)\n", mode);
+               csr6 &= ~DMA_CONTROL_RSF;
+               csr6 &= DMA_CONTROL_TC_RX_MASK;
+               if (mode <= 32)
+                       csr6 |= DMA_CONTROL_RTC_32;
+               else if (mode <= 64)
+                       csr6 |= DMA_CONTROL_RTC_64;
+               else if (mode <= 96)
+                       csr6 |= DMA_CONTROL_RTC_96;
+               else
+                       csr6 |= DMA_CONTROL_RTC_128;
+       }
+
+       /* Configure flow control based on rx fifo size */
+       csr6 = dwmac1000_configure_fc(csr6, fifosz);
+
+       writel(csr6, ioaddr + DMA_CONTROL);
+}
+
+static void dwmac1000_dma_operation_mode_tx(void __iomem *ioaddr, int mode,
+                                           u32 channel, int fifosz, u8 qmode)
 {
        u32 csr6 = readl(ioaddr + DMA_CONTROL);
 
-       if (txmode == SF_DMA_MODE) {
+       if (mode == SF_DMA_MODE) {
                pr_debug("GMAC: enable TX store and forward mode\n");
                /* Transmit COE type 2 cannot be done in cut-through mode. */
                csr6 |= DMA_CONTROL_TSF;
@@ -162,42 +199,22 @@ static void dwmac1000_dma_operation_mode(void __iomem *ioaddr, int txmode,
                 */
                csr6 |= DMA_CONTROL_OSF;
        } else {
-               pr_debug("GMAC: disabling TX SF (threshold %d)\n", txmode);
+               pr_debug("GMAC: disabling TX SF (threshold %d)\n", mode);
                csr6 &= ~DMA_CONTROL_TSF;
                csr6 &= DMA_CONTROL_TC_TX_MASK;
                /* Set the transmit threshold */
-               if (txmode <= 32)
+               if (mode <= 32)
                        csr6 |= DMA_CONTROL_TTC_32;
-               else if (txmode <= 64)
+               else if (mode <= 64)
                        csr6 |= DMA_CONTROL_TTC_64;
-               else if (txmode <= 128)
+               else if (mode <= 128)
                        csr6 |= DMA_CONTROL_TTC_128;
-               else if (txmode <= 192)
+               else if (mode <= 192)
                        csr6 |= DMA_CONTROL_TTC_192;
                else
                        csr6 |= DMA_CONTROL_TTC_256;
        }
 
-       if (rxmode == SF_DMA_MODE) {
-               pr_debug("GMAC: enable RX store and forward mode\n");
-               csr6 |= DMA_CONTROL_RSF;
-       } else {
-               pr_debug("GMAC: disable RX SF mode (threshold %d)\n", rxmode);
-               csr6 &= ~DMA_CONTROL_RSF;
-               csr6 &= DMA_CONTROL_TC_RX_MASK;
-               if (rxmode <= 32)
-                       csr6 |= DMA_CONTROL_RTC_32;
-               else if (rxmode <= 64)
-                       csr6 |= DMA_CONTROL_RTC_64;
-               else if (rxmode <= 96)
-                       csr6 |= DMA_CONTROL_RTC_96;
-               else
-                       csr6 |= DMA_CONTROL_RTC_128;
-       }
-
-       /* Configure flow control based on rx fifo size */
-       csr6 = dwmac1000_configure_fc(csr6, rxfifosz);
-
        writel(csr6, ioaddr + DMA_CONTROL);
 }
 
@@ -256,9 +273,12 @@ static void dwmac1000_rx_watchdog(void __iomem *ioaddr, u32 riwt,
 const struct stmmac_dma_ops dwmac1000_dma_ops = {
        .reset = dwmac_dma_reset,
        .init = dwmac1000_dma_init,
+       .init_rx_chan = dwmac1000_dma_init_rx,
+       .init_tx_chan = dwmac1000_dma_init_tx,
        .axi = dwmac1000_dma_axi,
        .dump_regs = dwmac1000_dump_dma_regs,
-       .dma_mode = dwmac1000_dma_operation_mode,
+       .dma_rx_mode = dwmac1000_dma_operation_mode_rx,
+       .dma_tx_mode = dwmac1000_dma_operation_mode_tx,
        .enable_dma_transmission = dwmac_enable_dma_transmission,
        .enable_dma_irq = dwmac_enable_dma_irq,
        .disable_dma_irq = dwmac_disable_dma_irq,
index 6502b9aa3bf587d0095816052066ce301a765634..21dee25ee570be90415f016050464d695e2fe0d6 100644 (file)
@@ -29,8 +29,7 @@
 #include "dwmac_dma.h"
 
 static void dwmac100_dma_init(void __iomem *ioaddr,
-                             struct stmmac_dma_cfg *dma_cfg,
-                             u32 dma_tx, u32 dma_rx, int atds)
+                             struct stmmac_dma_cfg *dma_cfg, int atds)
 {
        /* Enable Application Access by writing to DMA CSR0 */
        writel(DMA_BUS_MODE_DEFAULT | (dma_cfg->pbl << DMA_BUS_MODE_PBL_SHIFT),
@@ -38,12 +37,22 @@ static void dwmac100_dma_init(void __iomem *ioaddr,
 
        /* Mask interrupts by writing to CSR7 */
        writel(DMA_INTR_DEFAULT_MASK, ioaddr + DMA_INTR_ENA);
+}
 
-       /* RX/TX descriptor base addr lists must be written into
-        * DMA CSR3 and CSR4, respectively
-        */
-       writel(dma_tx, ioaddr + DMA_TX_BASE_ADDR);
-       writel(dma_rx, ioaddr + DMA_RCV_BASE_ADDR);
+static void dwmac100_dma_init_rx(void __iomem *ioaddr,
+                                struct stmmac_dma_cfg *dma_cfg,
+                                u32 dma_rx_phy, u32 chan)
+{
+       /* RX descriptor base addr lists must be written into DMA CSR3 */
+       writel(dma_rx_phy, ioaddr + DMA_RCV_BASE_ADDR);
+}
+
+static void dwmac100_dma_init_tx(void __iomem *ioaddr,
+                                struct stmmac_dma_cfg *dma_cfg,
+                                u32 dma_tx_phy, u32 chan)
+{
+       /* TX descriptor base addr lists must be written into DMA CSR4 */
+       writel(dma_tx_phy, ioaddr + DMA_TX_BASE_ADDR);
 }
 
 /* Store and Forward capability is not used at all.
@@ -51,14 +60,14 @@ static void dwmac100_dma_init(void __iomem *ioaddr,
  * The transmit threshold can be programmed by setting the TTC bits in the DMA
  * control register.
  */
-static void dwmac100_dma_operation_mode(void __iomem *ioaddr, int txmode,
-                                       int rxmode, int rxfifosz)
+static void dwmac100_dma_operation_mode_tx(void __iomem *ioaddr, int mode,
+                                          u32 channel, int fifosz, u8 qmode)
 {
        u32 csr6 = readl(ioaddr + DMA_CONTROL);
 
-       if (txmode <= 32)
+       if (mode <= 32)
                csr6 |= DMA_CONTROL_TTC_32;
-       else if (txmode <= 64)
+       else if (mode <= 64)
                csr6 |= DMA_CONTROL_TTC_64;
        else
                csr6 |= DMA_CONTROL_TTC_128;
@@ -112,8 +121,10 @@ static void dwmac100_dma_diagnostic_fr(void *data, struct stmmac_extra_stats *x,
 const struct stmmac_dma_ops dwmac100_dma_ops = {
        .reset = dwmac_dma_reset,
        .init = dwmac100_dma_init,
+       .init_rx_chan = dwmac100_dma_init_rx,
+       .init_tx_chan = dwmac100_dma_init_tx,
        .dump_regs = dwmac100_dump_dma_regs,
-       .dma_mode = dwmac100_dma_operation_mode,
+       .dma_tx_mode = dwmac100_dma_operation_mode_tx,
        .dma_diagnostic_fr = dwmac100_dma_diagnostic_fr,
        .enable_dma_transmission = dwmac_enable_dma_transmission,
        .enable_dma_irq = dwmac_enable_dma_irq,
index 03eab9077c1c98a16527c5cec80fc20ea27be827..6330a55953df5e85c6a65a3bb6839d343257309f 100644 (file)
@@ -194,6 +194,9 @@ enum power_event {
 
 /* MAC HW features3 bitmap */
 #define GMAC_HW_FEAT_ASP               GENMASK(29, 28)
+#define GMAC_HW_FEAT_FRPES             GENMASK(14, 13)
+#define GMAC_HW_FEAT_FRPBS             GENMASK(12, 11)
+#define GMAC_HW_FEAT_FRPSEL            BIT(10)
 
 /* MAC HW ADDR regs */
 #define GMAC_HI_DCS                    GENMASK(18, 16)
@@ -202,6 +205,7 @@ enum power_event {
 
 /*  MTL registers */
 #define MTL_OPERATION_MODE             0x00000c00
+#define MTL_FRPE                       BIT(15)
 #define MTL_OPERATION_SCHALG_MASK      GENMASK(6, 5)
 #define MTL_OPERATION_SCHALG_WRR       (0x0 << 5)
 #define MTL_OPERATION_SCHALG_WFQ       (0x1 << 5)
index 7289b3b47d8e69305b2fb9653f9fe61e269fad8f..a7121a7d9391c8e2e016822c6ac38c4247ef8624 100644 (file)
@@ -795,6 +795,7 @@ const struct stmmac_ops dwmac510_ops = {
        .safety_feat_config = dwmac5_safety_feat_config,
        .safety_feat_irq_status = dwmac5_safety_feat_irq_status,
        .safety_feat_dump = dwmac5_safety_feat_dump,
+       .rxp_config = dwmac5_rxp_config,
 };
 
 int dwmac4_setup(struct stmmac_priv *priv)
index 65ed896c13cb3f1f4acf892075f6c99a3d200668..20299f6f65fce13d7deccacf5d27ba50cc858b46 100644 (file)
@@ -189,9 +189,12 @@ static void dwmac4_set_tx_owner(struct dma_desc *p)
        p->des3 |= cpu_to_le32(TDES3_OWN);
 }
 
-static void dwmac4_set_rx_owner(struct dma_desc *p)
+static void dwmac4_set_rx_owner(struct dma_desc *p, int disable_rx_ic)
 {
-       p->des3 |= cpu_to_le32(RDES3_OWN);
+       p->des3 = cpu_to_le32(RDES3_OWN | RDES3_BUFFER1_VALID_ADDR);
+
+       if (!disable_rx_ic)
+               p->des3 |= cpu_to_le32(RDES3_INT_ON_COMPLETION_EN);
 }
 
 static int dwmac4_get_tx_ls(struct dma_desc *p)
@@ -292,10 +295,7 @@ static int dwmac4_wrback_get_rx_timestamp_status(void *desc, void *next_desc,
 static void dwmac4_rd_init_rx_desc(struct dma_desc *p, int disable_rx_ic,
                                   int mode, int end)
 {
-       p->des3 = cpu_to_le32(RDES3_OWN | RDES3_BUFFER1_VALID_ADDR);
-
-       if (!disable_rx_ic)
-               p->des3 |= cpu_to_le32(RDES3_INT_ON_COMPLETION_EN);
+       dwmac4_set_rx_owner(p, disable_rx_ic);
 }
 
 static void dwmac4_rd_init_tx_desc(struct dma_desc *p, int mode, int end)
@@ -424,6 +424,25 @@ static void dwmac4_set_mss_ctxt(struct dma_desc *p, unsigned int mss)
        p->des3 = cpu_to_le32(TDES3_CONTEXT_TYPE | TDES3_CTXT_TCMSSV);
 }
 
+static void dwmac4_get_addr(struct dma_desc *p, unsigned int *addr)
+{
+       *addr = le32_to_cpu(p->des0);
+}
+
+static void dwmac4_set_addr(struct dma_desc *p, dma_addr_t addr)
+{
+       p->des0 = cpu_to_le32(addr);
+       p->des1 = 0;
+}
+
+static void dwmac4_clear(struct dma_desc *p)
+{
+       p->des0 = 0;
+       p->des1 = 0;
+       p->des2 = 0;
+       p->des3 = 0;
+}
+
 const struct stmmac_desc_ops dwmac4_desc_ops = {
        .tx_status = dwmac4_wrback_get_tx_status,
        .rx_status = dwmac4_wrback_get_rx_status,
@@ -445,6 +464,9 @@ const struct stmmac_desc_ops dwmac4_desc_ops = {
        .init_tx_desc = dwmac4_rd_init_tx_desc,
        .display_ring = dwmac4_display_ring,
        .set_mss = dwmac4_set_mss_ctxt,
+       .get_addr = dwmac4_get_addr,
+       .set_addr = dwmac4_set_addr,
+       .clear = dwmac4_clear,
 };
 
 const struct stmmac_mode_ops dwmac4_ring_mode_ops = { };
index d37d457306d1616e8c628b799a366237d9e55905..bf8e5a16f11c8b167b5a5e0e381b9d208b6a8e33 100644 (file)
@@ -94,6 +94,10 @@ static void dwmac4_dma_init_tx_chan(void __iomem *ioaddr,
 
        value = readl(ioaddr + DMA_CHAN_TX_CONTROL(chan));
        value = value | (txpbl << DMA_BUS_MODE_PBL_SHIFT);
+
+       /* Enable OSP to get best performance */
+       value |= DMA_CONTROL_OSP;
+
        writel(value, ioaddr + DMA_CHAN_TX_CONTROL(chan));
 
        writel(dma_tx_phy, ioaddr + DMA_CHAN_TX_BASE_ADDR(chan));
@@ -116,8 +120,7 @@ static void dwmac4_dma_init_channel(void __iomem *ioaddr,
 }
 
 static void dwmac4_dma_init(void __iomem *ioaddr,
-                           struct stmmac_dma_cfg *dma_cfg,
-                           u32 dma_tx, u32 dma_rx, int atds)
+                           struct stmmac_dma_cfg *dma_cfg, int atds)
 {
        u32 value = readl(ioaddr + DMA_SYS_BUS_MODE);
 
@@ -379,6 +382,9 @@ static void dwmac4_get_hw_feature(void __iomem *ioaddr,
 
        /* 5.10 Features */
        dma_cap->asp = (hw_cap & GMAC_HW_FEAT_ASP) >> 28;
+       dma_cap->frpes = (hw_cap & GMAC_HW_FEAT_FRPES) >> 13;
+       dma_cap->frpbs = (hw_cap & GMAC_HW_FEAT_FRPBS) >> 11;
+       dma_cap->frpsel = (hw_cap & GMAC_HW_FEAT_FRPSEL) >> 10;
 }
 
 /* Enable/disable TSO feature and set MSS */
index 8474bf961dd0c60a409ba4c1201557cdef7580dc..c63c1fe3f26b9e4d5cb714ea3ceed56bf103b17e 100644 (file)
 #define DMA_CHAN0_DBG_STAT_RPS_SHIFT   8
 
 int dwmac4_dma_reset(void __iomem *ioaddr);
-void dwmac4_enable_dma_transmission(void __iomem *ioaddr, u32 tail_ptr);
 void dwmac4_enable_dma_irq(void __iomem *ioaddr, u32 chan);
 void dwmac410_enable_dma_irq(void __iomem *ioaddr, u32 chan);
 void dwmac4_disable_dma_irq(void __iomem *ioaddr, u32 chan);
index 2978550bb7f6b36339cf54344f464301371fe778..b2becb80a697c6c32c50beeb50f97c17c6981cfe 100644 (file)
@@ -7,6 +7,7 @@
 #include "common.h"
 #include "dwmac4.h"
 #include "dwmac5.h"
+#include "stmmac.h"
 
 struct dwmac5_error_desc {
        bool valid;
@@ -299,3 +300,197 @@ int dwmac5_safety_feat_dump(struct stmmac_safety_stats *stats,
                *desc = dwmac5_all_errors[module].desc[offset].desc;
        return 0;
 }
+
+static int dwmac5_rxp_disable(void __iomem *ioaddr)
+{
+       u32 val;
+       int ret;
+
+       val = readl(ioaddr + MTL_OPERATION_MODE);
+       val &= ~MTL_FRPE;
+       writel(val, ioaddr + MTL_OPERATION_MODE);
+
+       ret = readl_poll_timeout(ioaddr + MTL_RXP_CONTROL_STATUS, val,
+                       val & RXPI, 1, 10000);
+       if (ret)
+               return ret;
+       return 0;
+}
+
+static void dwmac5_rxp_enable(void __iomem *ioaddr)
+{
+       u32 val;
+
+       val = readl(ioaddr + MTL_OPERATION_MODE);
+       val |= MTL_FRPE;
+       writel(val, ioaddr + MTL_OPERATION_MODE);
+}
+
+static int dwmac5_rxp_update_single_entry(void __iomem *ioaddr,
+                                         struct stmmac_tc_entry *entry,
+                                         int pos)
+{
+       int ret, i;
+
+       for (i = 0; i < (sizeof(entry->val) / sizeof(u32)); i++) {
+               int real_pos = pos * (sizeof(entry->val) / sizeof(u32)) + i;
+               u32 val;
+
+               /* Wait for ready */
+               ret = readl_poll_timeout(ioaddr + MTL_RXP_IACC_CTRL_STATUS,
+                               val, !(val & STARTBUSY), 1, 10000);
+               if (ret)
+                       return ret;
+
+               /* Write data */
+               val = *((u32 *)&entry->val + i);
+               writel(val, ioaddr + MTL_RXP_IACC_DATA);
+
+               /* Write pos */
+               val = real_pos & ADDR;
+               writel(val, ioaddr + MTL_RXP_IACC_CTRL_STATUS);
+
+               /* Write OP */
+               val |= WRRDN;
+               writel(val, ioaddr + MTL_RXP_IACC_CTRL_STATUS);
+
+               /* Start Write */
+               val |= STARTBUSY;
+               writel(val, ioaddr + MTL_RXP_IACC_CTRL_STATUS);
+
+               /* Wait for done */
+               ret = readl_poll_timeout(ioaddr + MTL_RXP_IACC_CTRL_STATUS,
+                               val, !(val & STARTBUSY), 1, 10000);
+               if (ret)
+                       return ret;
+       }
+
+       return 0;
+}
+
+static struct stmmac_tc_entry *
+dwmac5_rxp_get_next_entry(struct stmmac_tc_entry *entries, unsigned int count,
+                         u32 curr_prio)
+{
+       struct stmmac_tc_entry *entry;
+       u32 min_prio = ~0x0;
+       int i, min_prio_idx;
+       bool found = false;
+
+       for (i = count - 1; i >= 0; i--) {
+               entry = &entries[i];
+
+               /* Do not update unused entries */
+               if (!entry->in_use)
+                       continue;
+               /* Do not update already updated entries (i.e. fragments) */
+               if (entry->in_hw)
+                       continue;
+               /* Let last entry be updated last */
+               if (entry->is_last)
+                       continue;
+               /* Do not return fragments */
+               if (entry->is_frag)
+                       continue;
+               /* Check if we already checked this prio */
+               if (entry->prio < curr_prio)
+                       continue;
+               /* Check if this is the minimum prio */
+               if (entry->prio < min_prio) {
+                       min_prio = entry->prio;
+                       min_prio_idx = i;
+                       found = true;
+               }
+       }
+
+       if (found)
+               return &entries[min_prio_idx];
+       return NULL;
+}
+
+int dwmac5_rxp_config(void __iomem *ioaddr, struct stmmac_tc_entry *entries,
+                     unsigned int count)
+{
+       struct stmmac_tc_entry *entry, *frag;
+       int i, ret, nve = 0;
+       u32 curr_prio = 0;
+       u32 old_val, val;
+
+       /* Force disable RX */
+       old_val = readl(ioaddr + GMAC_CONFIG);
+       val = old_val & ~GMAC_CONFIG_RE;
+       writel(val, ioaddr + GMAC_CONFIG);
+
+       /* Disable RX Parser */
+       ret = dwmac5_rxp_disable(ioaddr);
+       if (ret)
+               goto re_enable;
+
+       /* Set all entries as NOT in HW */
+       for (i = 0; i < count; i++) {
+               entry = &entries[i];
+               entry->in_hw = false;
+       }
+
+       /* Update entries by reverse order */
+       while (1) {
+               entry = dwmac5_rxp_get_next_entry(entries, count, curr_prio);
+               if (!entry)
+                       break;
+
+               curr_prio = entry->prio;
+               frag = entry->frag_ptr;
+
+               /* Set special fragment requirements */
+               if (frag) {
+                       entry->val.af = 0;
+                       entry->val.rf = 0;
+                       entry->val.nc = 1;
+                       entry->val.ok_index = nve + 2;
+               }
+
+               ret = dwmac5_rxp_update_single_entry(ioaddr, entry, nve);
+               if (ret)
+                       goto re_enable;
+
+               entry->table_pos = nve++;
+               entry->in_hw = true;
+
+               if (frag && !frag->in_hw) {
+                       ret = dwmac5_rxp_update_single_entry(ioaddr, frag, nve);
+                       if (ret)
+                               goto re_enable;
+                       frag->table_pos = nve++;
+                       frag->in_hw = true;
+               }
+       }
+
+       if (!nve)
+               goto re_enable;
+
+       /* Update all pass entry */
+       for (i = 0; i < count; i++) {
+               entry = &entries[i];
+               if (!entry->is_last)
+                       continue;
+
+               ret = dwmac5_rxp_update_single_entry(ioaddr, entry, nve);
+               if (ret)
+                       goto re_enable;
+
+               entry->table_pos = nve++;
+       }
+
+       /* Assume n. of parsable entries == n. of valid entries */
+       val = (nve << 16) & NPE;
+       val |= nve & NVE;
+       writel(val, ioaddr + MTL_RXP_CONTROL_STATUS);
+
+       /* Enable RX Parser */
+       dwmac5_rxp_enable(ioaddr);
+
+re_enable:
+       /* Re-enable RX */
+       writel(old_val, ioaddr + GMAC_CONFIG);
+       return ret;
+}
index bd4c466d303e3ae7b2a2cbb9aa6e8d1452cef913..cc810aff71007649d7d1618baa6d97f80da36d87 100644 (file)
 #define PRTYEN                         BIT(1)
 #define TMOUTEN                                BIT(0)
 
+#define MTL_RXP_CONTROL_STATUS         0x00000ca0
+#define RXPI                           BIT(31)
+#define NPE                            GENMASK(23, 16)
+#define NVE                            GENMASK(7, 0)
+#define MTL_RXP_IACC_CTRL_STATUS       0x00000cb0
+#define STARTBUSY                      BIT(31)
+#define RXPEIEC                                GENMASK(22, 21)
+#define RXPEIEE                                BIT(20)
+#define WRRDN                          BIT(16)
+#define ADDR                           GENMASK(15, 0)
+#define MTL_RXP_IACC_DATA              0x00000cb4
 #define MTL_ECC_CONTROL                        0x00000cc0
 #define TSOEE                          BIT(4)
 #define MRXPEE                         BIT(3)
@@ -48,5 +59,7 @@ int dwmac5_safety_feat_irq_status(struct net_device *ndev,
                struct stmmac_safety_stats *stats);
 int dwmac5_safety_feat_dump(struct stmmac_safety_stats *stats,
                        int index, unsigned long *count, const char **desc);
+int dwmac5_rxp_config(void __iomem *ioaddr, struct stmmac_tc_entry *entries,
+                     unsigned int count);
 
 #endif /* __DWMAC5_H__ */
index 3bfb3f584be2008a00ff084b932ad8665116931b..77914c89d7497de6f9a251196fe079f49364ee13 100644 (file)
@@ -292,7 +292,7 @@ static void enh_desc_set_tx_owner(struct dma_desc *p)
        p->des0 |= cpu_to_le32(ETDES0_OWN);
 }
 
-static void enh_desc_set_rx_owner(struct dma_desc *p)
+static void enh_desc_set_rx_owner(struct dma_desc *p, int disable_rx_ic)
 {
        p->des0 |= cpu_to_le32(RDES0_OWN);
 }
@@ -437,6 +437,21 @@ static void enh_desc_display_ring(void *head, unsigned int size, bool rx)
        pr_info("\n");
 }
 
+static void enh_desc_get_addr(struct dma_desc *p, unsigned int *addr)
+{
+       *addr = le32_to_cpu(p->des2);
+}
+
+static void enh_desc_set_addr(struct dma_desc *p, dma_addr_t addr)
+{
+       p->des2 = cpu_to_le32(addr);
+}
+
+static void enh_desc_clear(struct dma_desc *p)
+{
+       p->des2 = 0;
+}
+
 const struct stmmac_desc_ops enh_desc_ops = {
        .tx_status = enh_desc_get_tx_status,
        .rx_status = enh_desc_get_rx_status,
@@ -457,4 +472,7 @@ const struct stmmac_desc_ops enh_desc_ops = {
        .get_timestamp = enh_desc_get_timestamp,
        .get_rx_timestamp_status = enh_desc_get_rx_timestamp_status,
        .display_ring = enh_desc_display_ring,
+       .get_addr = enh_desc_get_addr,
+       .set_addr = enh_desc_set_addr,
+       .clear = enh_desc_clear,
 };
index 2b0a7e79de003cdc88d8b4a9d5d3b01fae155259..14770fc8865e8022d06fc0e05b7aa27feab2117d 100644 (file)
@@ -6,6 +6,7 @@
 
 #include "common.h"
 #include "stmmac.h"
+#include "stmmac_ptp.h"
 
 static u32 stmmac_get_id(struct stmmac_priv *priv, u32 id_reg)
 {
@@ -72,11 +73,13 @@ static const struct stmmac_hwif_entry {
        bool gmac;
        bool gmac4;
        u32 min_id;
+       const struct stmmac_regs_off regs;
        const void *desc;
        const void *dma;
        const void *mac;
        const void *hwtimestamp;
        const void *mode;
+       const void *tc;
        int (*setup)(struct stmmac_priv *priv);
        int (*quirks)(struct stmmac_priv *priv);
 } stmmac_hw[] = {
@@ -85,66 +88,96 @@ static const struct stmmac_hwif_entry {
                .gmac = false,
                .gmac4 = false,
                .min_id = 0,
+               .regs = {
+                       .ptp_off = PTP_GMAC3_X_OFFSET,
+                       .mmc_off = MMC_GMAC3_X_OFFSET,
+               },
                .desc = NULL,
                .dma = &dwmac100_dma_ops,
                .mac = &dwmac100_ops,
                .hwtimestamp = &stmmac_ptp,
                .mode = NULL,
+               .tc = NULL,
                .setup = dwmac100_setup,
                .quirks = stmmac_dwmac1_quirks,
        }, {
                .gmac = true,
                .gmac4 = false,
                .min_id = 0,
+               .regs = {
+                       .ptp_off = PTP_GMAC3_X_OFFSET,
+                       .mmc_off = MMC_GMAC3_X_OFFSET,
+               },
                .desc = NULL,
                .dma = &dwmac1000_dma_ops,
                .mac = &dwmac1000_ops,
                .hwtimestamp = &stmmac_ptp,
                .mode = NULL,
+               .tc = NULL,
                .setup = dwmac1000_setup,
                .quirks = stmmac_dwmac1_quirks,
        }, {
                .gmac = false,
                .gmac4 = true,
                .min_id = 0,
+               .regs = {
+                       .ptp_off = PTP_GMAC4_OFFSET,
+                       .mmc_off = MMC_GMAC4_OFFSET,
+               },
                .desc = &dwmac4_desc_ops,
                .dma = &dwmac4_dma_ops,
                .mac = &dwmac4_ops,
                .hwtimestamp = &stmmac_ptp,
                .mode = NULL,
+               .tc = NULL,
                .setup = dwmac4_setup,
                .quirks = stmmac_dwmac4_quirks,
        }, {
                .gmac = false,
                .gmac4 = true,
                .min_id = DWMAC_CORE_4_00,
+               .regs = {
+                       .ptp_off = PTP_GMAC4_OFFSET,
+                       .mmc_off = MMC_GMAC4_OFFSET,
+               },
                .desc = &dwmac4_desc_ops,
                .dma = &dwmac4_dma_ops,
                .mac = &dwmac410_ops,
                .hwtimestamp = &stmmac_ptp,
                .mode = &dwmac4_ring_mode_ops,
+               .tc = NULL,
                .setup = dwmac4_setup,
                .quirks = NULL,
        }, {
                .gmac = false,
                .gmac4 = true,
                .min_id = DWMAC_CORE_4_10,
+               .regs = {
+                       .ptp_off = PTP_GMAC4_OFFSET,
+                       .mmc_off = MMC_GMAC4_OFFSET,
+               },
                .desc = &dwmac4_desc_ops,
                .dma = &dwmac410_dma_ops,
                .mac = &dwmac410_ops,
                .hwtimestamp = &stmmac_ptp,
                .mode = &dwmac4_ring_mode_ops,
+               .tc = NULL,
                .setup = dwmac4_setup,
                .quirks = NULL,
        }, {
                .gmac = false,
                .gmac4 = true,
                .min_id = DWMAC_CORE_5_10,
+               .regs = {
+                       .ptp_off = PTP_GMAC4_OFFSET,
+                       .mmc_off = MMC_GMAC4_OFFSET,
+               },
                .desc = &dwmac4_desc_ops,
                .dma = &dwmac410_dma_ops,
                .mac = &dwmac510_ops,
                .hwtimestamp = &stmmac_ptp,
                .mode = &dwmac4_ring_mode_ops,
+               .tc = &dwmac510_tc_ops,
                .setup = dwmac4_setup,
                .quirks = NULL,
        }
@@ -156,27 +189,35 @@ int stmmac_hwif_init(struct stmmac_priv *priv)
        bool needs_gmac = priv->plat->has_gmac;
        const struct stmmac_hwif_entry *entry;
        struct mac_device_info *mac;
+       bool needs_setup = true;
        int i, ret;
        u32 id;
 
        if (needs_gmac) {
                id = stmmac_get_id(priv, GMAC_VERSION);
-       } else {
+       } else if (needs_gmac4) {
                id = stmmac_get_id(priv, GMAC4_VERSION);
+       } else {
+               id = 0;
        }
 
        /* Save ID for later use */
        priv->synopsys_id = id;
 
+       /* Lets assume some safe values first */
+       priv->ptpaddr = priv->ioaddr +
+               (needs_gmac4 ? PTP_GMAC4_OFFSET : PTP_GMAC3_X_OFFSET);
+       priv->mmcaddr = priv->ioaddr +
+               (needs_gmac4 ? MMC_GMAC4_OFFSET : MMC_GMAC3_X_OFFSET);
+
        /* Check for HW specific setup first */
        if (priv->plat->setup) {
-               priv->hw = priv->plat->setup(priv);
-               if (!priv->hw)
-                       return -ENOMEM;
-               return 0;
+               mac = priv->plat->setup(priv);
+               needs_setup = false;
+       } else {
+               mac = devm_kzalloc(priv->device, sizeof(*mac), GFP_KERNEL);
        }
 
-       mac = devm_kzalloc(priv->device, sizeof(*mac), GFP_KERNEL);
        if (!mac)
                return -ENOMEM;
 
@@ -188,21 +229,28 @@ int stmmac_hwif_init(struct stmmac_priv *priv)
                        continue;
                if (needs_gmac4 ^ entry->gmac4)
                        continue;
-               if (id < entry->min_id)
+               /* Use synopsys_id var because some setups can override this */
+               if (priv->synopsys_id < entry->min_id)
                        continue;
 
-               mac->desc = entry->desc;
-               mac->dma = entry->dma;
-               mac->mac = entry->mac;
-               mac->ptp = entry->hwtimestamp;
-               mac->mode = entry->mode;
+               /* Only use generic HW helpers if needed */
+               mac->desc = mac->desc ? : entry->desc;
+               mac->dma = mac->dma ? : entry->dma;
+               mac->mac = mac->mac ? : entry->mac;
+               mac->ptp = mac->ptp ? : entry->hwtimestamp;
+               mac->mode = mac->mode ? : entry->mode;
+               mac->tc = mac->tc ? : entry->tc;
 
                priv->hw = mac;
+               priv->ptpaddr = priv->ioaddr + entry->regs.ptp_off;
+               priv->mmcaddr = priv->ioaddr + entry->regs.mmc_off;
 
                /* Entry found */
-               ret = entry->setup(priv);
-               if (ret)
-                       return ret;
+               if (needs_setup) {
+                       ret = entry->setup(priv);
+                       if (ret)
+                               return ret;
+               }
 
                /* Run quirks, if needed */
                if (entry->quirks) {
index bfad61607f07fa1925a22663d978a4d16b3554c0..f499a7fad6f03b297a0f22a6ff3cc967e12f1ecc 100644 (file)
@@ -5,10 +5,12 @@
 #ifndef __STMMAC_HWIF_H__
 #define __STMMAC_HWIF_H__
 
+#include <linux/netdevice.h>
+
 #define stmmac_do_void_callback(__priv, __module, __cname,  __arg0, __args...) \
 ({ \
        int __result = -EINVAL; \
-       if ((__priv)->hw->__module->__cname) { \
+       if ((__priv)->hw->__module && (__priv)->hw->__module->__cname) { \
                (__priv)->hw->__module->__cname((__arg0), ##__args); \
                __result = 0; \
        } \
@@ -17,7 +19,7 @@
 #define stmmac_do_callback(__priv, __module, __cname,  __arg0, __args...) \
 ({ \
        int __result = -EINVAL; \
-       if ((__priv)->hw->__module->__cname) \
+       if ((__priv)->hw->__module && (__priv)->hw->__module->__cname) \
                __result = (__priv)->hw->__module->__cname((__arg0), ##__args); \
        __result; \
 })
@@ -57,7 +59,7 @@ struct stmmac_desc_ops {
        /* Get the buffer size from the descriptor */
        int (*get_tx_len)(struct dma_desc *p);
        /* Handle extra events on specific interrupts hw dependent */
-       void (*set_rx_owner)(struct dma_desc *p);
+       void (*set_rx_owner)(struct dma_desc *p, int disable_rx_ic);
        /* Get the receive frame size */
        int (*get_rx_frame_len)(struct dma_desc *p, int rx_coe_type);
        /* Return the reception status looking at the RDES1 */
@@ -77,6 +79,12 @@ struct stmmac_desc_ops {
        void (*display_ring)(void *head, unsigned int size, bool rx);
        /* set MSS via context descriptor */
        void (*set_mss)(struct dma_desc *p, unsigned int mss);
+       /* get descriptor skbuff address */
+       void (*get_addr)(struct dma_desc *p, unsigned int *addr);
+       /* set descriptor skbuff address */
+       void (*set_addr)(struct dma_desc *p, dma_addr_t addr);
+       /* clear descriptor */
+       void (*clear)(struct dma_desc *p);
 };
 
 #define stmmac_init_rx_desc(__priv, __args...) \
@@ -121,6 +129,12 @@ struct stmmac_desc_ops {
        stmmac_do_void_callback(__priv, desc, display_ring, __args)
 #define stmmac_set_mss(__priv, __args...) \
        stmmac_do_void_callback(__priv, desc, set_mss, __args)
+#define stmmac_get_desc_addr(__priv, __args...) \
+       stmmac_do_void_callback(__priv, desc, get_addr, __args)
+#define stmmac_set_desc_addr(__priv, __args...) \
+       stmmac_do_void_callback(__priv, desc, set_addr, __args)
+#define stmmac_clear_desc(__priv, __args...) \
+       stmmac_do_void_callback(__priv, desc, clear, __args)
 
 struct stmmac_dma_cfg;
 struct dma_features;
@@ -130,7 +144,7 @@ struct stmmac_dma_ops {
        /* DMA core initialization */
        int (*reset)(void __iomem *ioaddr);
        void (*init)(void __iomem *ioaddr, struct stmmac_dma_cfg *dma_cfg,
-                    u32 dma_tx, u32 dma_rx, int atds);
+                    int atds);
        void (*init_chan)(void __iomem *ioaddr,
                          struct stmmac_dma_cfg *dma_cfg, u32 chan);
        void (*init_rx_chan)(void __iomem *ioaddr,
@@ -143,10 +157,6 @@ struct stmmac_dma_ops {
        void (*axi)(void __iomem *ioaddr, struct stmmac_axi *axi);
        /* Dump DMA registers */
        void (*dump_regs)(void __iomem *ioaddr, u32 *reg_space);
-       /* Set tx/rx threshold in the csr6 register
-        * An invalid value enables the store-and-forward mode */
-       void (*dma_mode)(void __iomem *ioaddr, int txmode, int rxmode,
-                        int rxfifosz);
        void (*dma_rx_mode)(void __iomem *ioaddr, int mode, u32 channel,
                            int fifosz, u8 qmode);
        void (*dma_tx_mode)(void __iomem *ioaddr, int mode, u32 channel,
@@ -189,8 +199,6 @@ struct stmmac_dma_ops {
        stmmac_do_void_callback(__priv, dma, axi, __args)
 #define stmmac_dump_dma_regs(__priv, __args...) \
        stmmac_do_void_callback(__priv, dma, dump_regs, __args)
-#define stmmac_dma_mode(__priv, __args...) \
-       stmmac_do_void_callback(__priv, dma, dma_mode, __args)
 #define stmmac_dma_rx_mode(__priv, __args...) \
        stmmac_do_void_callback(__priv, dma, dma_rx_mode, __args)
 #define stmmac_dma_tx_mode(__priv, __args...) \
@@ -232,6 +240,7 @@ struct mac_device_info;
 struct net_device;
 struct rgmii_adv;
 struct stmmac_safety_stats;
+struct stmmac_tc_entry;
 
 /* Helpers to program the MAC core */
 struct stmmac_ops {
@@ -301,6 +310,9 @@ struct stmmac_ops {
                        struct stmmac_safety_stats *stats);
        int (*safety_feat_dump)(struct stmmac_safety_stats *stats,
                        int index, unsigned long *count, const char **desc);
+       /* Flexible RX Parser */
+       int (*rxp_config)(void __iomem *ioaddr, struct stmmac_tc_entry *entries,
+                         unsigned int count);
 };
 
 #define stmmac_core_init(__priv, __args...) \
@@ -365,6 +377,8 @@ struct stmmac_ops {
        stmmac_do_callback(__priv, mac, safety_feat_irq_status, __args)
 #define stmmac_safety_feat_dump(__priv, __args...) \
        stmmac_do_callback(__priv, mac, safety_feat_dump, __args)
+#define stmmac_rxp_config(__priv, __args...) \
+       stmmac_do_callback(__priv, mac, rxp_config, __args)
 
 /* PTP and HW Timer helpers */
 struct stmmac_hwtimestamp {
@@ -419,6 +433,23 @@ struct stmmac_mode_ops {
        stmmac_do_void_callback(__priv, mode, clean_desc3, __args)
 
 struct stmmac_priv;
+struct tc_cls_u32_offload;
+
+struct stmmac_tc_ops {
+       int (*init)(struct stmmac_priv *priv);
+       int (*setup_cls_u32)(struct stmmac_priv *priv,
+                            struct tc_cls_u32_offload *cls);
+};
+
+#define stmmac_tc_init(__priv, __args...) \
+       stmmac_do_callback(__priv, tc, init, __args)
+#define stmmac_tc_setup_cls_u32(__priv, __args...) \
+       stmmac_do_callback(__priv, tc, setup_cls_u32, __args)
+
+struct stmmac_regs_off {
+       u32 ptp_off;
+       u32 mmc_off;
+};
 
 extern const struct stmmac_ops dwmac100_ops;
 extern const struct stmmac_dma_ops dwmac100_dma_ops;
@@ -429,6 +460,7 @@ extern const struct stmmac_dma_ops dwmac4_dma_ops;
 extern const struct stmmac_ops dwmac410_ops;
 extern const struct stmmac_dma_ops dwmac410_dma_ops;
 extern const struct stmmac_ops dwmac510_ops;
+extern const struct stmmac_tc_ops dwmac510_tc_ops;
 
 #define GMAC_VERSION           0x00000020      /* GMAC CORE Version */
 #define GMAC4_VERSION          0x00000110      /* GMAC4+ CORE Version */
index 7b1d901bf5bc2e3af72f5b08ebceb534384d7b7c..de65bb29feba967cc7a0d6ff3184998f359dde34 100644 (file)
@@ -168,7 +168,7 @@ static void ndesc_set_tx_owner(struct dma_desc *p)
        p->des0 |= cpu_to_le32(TDES0_OWN);
 }
 
-static void ndesc_set_rx_owner(struct dma_desc *p)
+static void ndesc_set_rx_owner(struct dma_desc *p, int disable_rx_ic)
 {
        p->des0 |= cpu_to_le32(RDES0_OWN);
 }
@@ -297,6 +297,21 @@ static void ndesc_display_ring(void *head, unsigned int size, bool rx)
        pr_info("\n");
 }
 
+static void ndesc_get_addr(struct dma_desc *p, unsigned int *addr)
+{
+       *addr = le32_to_cpu(p->des2);
+}
+
+static void ndesc_set_addr(struct dma_desc *p, dma_addr_t addr)
+{
+       p->des2 = cpu_to_le32(addr);
+}
+
+static void ndesc_clear(struct dma_desc *p)
+{
+       p->des2 = 0;
+}
+
 const struct stmmac_desc_ops ndesc_ops = {
        .tx_status = ndesc_get_tx_status,
        .rx_status = ndesc_get_rx_status,
@@ -316,4 +331,7 @@ const struct stmmac_desc_ops ndesc_ops = {
        .get_timestamp = ndesc_get_timestamp,
        .get_rx_timestamp_status = ndesc_get_rx_timestamp_status,
        .display_ring = ndesc_display_ring,
+       .get_addr = ndesc_get_addr,
+       .set_addr = ndesc_set_addr,
+       .clear = ndesc_clear,
 };
index 2443f20e07bff02527021a7404f060f70b1bd296..4d425b1a0c5934dcb1cbba1dd30273beb49fff98 100644 (file)
@@ -76,11 +76,36 @@ struct stmmac_rx_queue {
        struct napi_struct napi ____cacheline_aligned_in_smp;
 };
 
+struct stmmac_tc_entry {
+       bool in_use;
+       bool in_hw;
+       bool is_last;
+       bool is_frag;
+       void *frag_ptr;
+       unsigned int table_pos;
+       u32 handle;
+       u32 prio;
+       struct {
+               u32 match_data;
+               u32 match_en;
+               u8 af:1;
+               u8 rf:1;
+               u8 im:1;
+               u8 nc:1;
+               u8 res1:4;
+               u8 frame_offset;
+               u8 ok_index;
+               u8 dma_ch_no;
+               u32 res2;
+       } __packed val;
+};
+
 struct stmmac_priv {
        /* Frequently used values are kept adjacent for cache effect */
        u32 tx_count_frames;
        u32 tx_coal_frames;
        u32 tx_coal_timer;
+       bool tx_timer_armed;
 
        int tx_coalesce;
        int hwts_tx_en;
@@ -151,6 +176,11 @@ struct stmmac_priv {
        unsigned long state;
        struct workqueue_struct *wq;
        struct work_struct service_task;
+
+       /* TC Handling */
+       unsigned int tc_entries_max;
+       unsigned int tc_off_max;
+       struct stmmac_tc_entry *tc_entries;
 };
 
 enum stmmac_state {
index 0135fd3aa6efd6a7c33991c631602c4e1316bc35..c32de53a00d3eb26bd436165b2e3eb7c1a40959c 100644 (file)
@@ -45,6 +45,7 @@
 #include <linux/seq_file.h>
 #endif /* CONFIG_DEBUG_FS */
 #include <linux/net_tstamp.h>
+#include <net/pkt_cls.h>
 #include "stmmac_ptp.h"
 #include "stmmac.h"
 #include <linux/reset.h>
@@ -1155,10 +1156,7 @@ static int stmmac_init_rx_buffers(struct stmmac_priv *priv, struct dma_desc *p,
                return -EINVAL;
        }
 
-       if (priv->synopsys_id >= DWMAC_CORE_4_00)
-               p->des0 = cpu_to_le32(rx_q->rx_skbuff_dma[i]);
-       else
-               p->des2 = cpu_to_le32(rx_q->rx_skbuff_dma[i]);
+       stmmac_set_desc_addr(priv, p, rx_q->rx_skbuff_dma[i]);
 
        if (priv->dma_buf_sz == BUF_SIZE_16KiB)
                stmmac_init_desc3(priv, p);
@@ -1343,14 +1341,7 @@ static int init_dma_tx_desc_rings(struct net_device *dev)
                        else
                                p = tx_q->dma_tx + i;
 
-                       if (priv->synopsys_id >= DWMAC_CORE_4_00) {
-                               p->des0 = 0;
-                               p->des1 = 0;
-                               p->des2 = 0;
-                               p->des3 = 0;
-                       } else {
-                               p->des2 = 0;
-                       }
+                       stmmac_clear_desc(priv, p);
 
                        tx_q->tx_skbuff_dma[i].buf = 0;
                        tx_q->tx_skbuff_dma[i].map_as_page = false;
@@ -1796,22 +1787,18 @@ static void stmmac_dma_operation_mode(struct stmmac_priv *priv)
        }
 
        /* configure all channels */
-       if (priv->synopsys_id >= DWMAC_CORE_4_00) {
-               for (chan = 0; chan < rx_channels_count; chan++) {
-                       qmode = priv->plat->rx_queues_cfg[chan].mode_to_use;
+       for (chan = 0; chan < rx_channels_count; chan++) {
+               qmode = priv->plat->rx_queues_cfg[chan].mode_to_use;
 
-                       stmmac_dma_rx_mode(priv, priv->ioaddr, rxmode, chan,
-                                       rxfifosz, qmode);
-               }
+               stmmac_dma_rx_mode(priv, priv->ioaddr, rxmode, chan,
+                               rxfifosz, qmode);
+       }
 
-               for (chan = 0; chan < tx_channels_count; chan++) {
-                       qmode = priv->plat->tx_queues_cfg[chan].mode_to_use;
+       for (chan = 0; chan < tx_channels_count; chan++) {
+               qmode = priv->plat->tx_queues_cfg[chan].mode_to_use;
 
-                       stmmac_dma_tx_mode(priv, priv->ioaddr, txmode, chan,
-                                       txfifosz, qmode);
-               }
-       } else {
-               stmmac_dma_mode(priv, priv->ioaddr, txmode, rxmode, rxfifosz);
+               stmmac_dma_tx_mode(priv, priv->ioaddr, txmode, chan,
+                               txfifosz, qmode);
        }
 }
 
@@ -1980,23 +1967,14 @@ static void stmmac_set_dma_operation_mode(struct stmmac_priv *priv, u32 txmode,
        rxfifosz /= rx_channels_count;
        txfifosz /= tx_channels_count;
 
-       if (priv->synopsys_id >= DWMAC_CORE_4_00) {
-               stmmac_dma_rx_mode(priv, priv->ioaddr, rxmode, chan, rxfifosz,
-                               rxqmode);
-               stmmac_dma_tx_mode(priv, priv->ioaddr, txmode, chan, txfifosz,
-                               txqmode);
-       } else {
-               stmmac_dma_mode(priv, priv->ioaddr, txmode, rxmode, rxfifosz);
-       }
+       stmmac_dma_rx_mode(priv, priv->ioaddr, rxmode, chan, rxfifosz, rxqmode);
+       stmmac_dma_tx_mode(priv, priv->ioaddr, txmode, chan, txfifosz, txqmode);
 }
 
 static bool stmmac_safety_feat_interrupt(struct stmmac_priv *priv)
 {
-       int ret = false;
+       int ret;
 
-       /* Safety features are only available in cores >= 5.10 */
-       if (priv->synopsys_id < DWMAC_CORE_5_10)
-               return ret;
        ret = stmmac_safety_feat_irq_status(priv, priv->dev,
                        priv->ioaddr, priv->dma_cap.asp, &priv->sstats);
        if (ret && (ret != -EINVAL)) {
@@ -2022,7 +2000,11 @@ static void stmmac_dma_interrupt(struct stmmac_priv *priv)
                                tx_channel_count : rx_channel_count;
        u32 chan;
        bool poll_scheduled = false;
-       int status[channels_to_check];
+       int status[max_t(u32, MTL_MAX_TX_QUEUES, MTL_MAX_RX_QUEUES)];
+
+       /* Make sure we never check beyond our status buffer. */
+       if (WARN_ON_ONCE(channels_to_check > ARRAY_SIZE(status)))
+               channels_to_check = ARRAY_SIZE(status);
 
        /* Each DMA channel can be used for rx and tx simultaneously, yet
         * napi_struct is embedded in struct stmmac_rx_queue rather than in a
@@ -2103,14 +2085,6 @@ static void stmmac_mmc_setup(struct stmmac_priv *priv)
        unsigned int mode = MMC_CNTRL_RESET_ON_READ | MMC_CNTRL_COUNTER_RESET |
                            MMC_CNTRL_PRESET | MMC_CNTRL_FULL_HALF_PRESET;
 
-       if (priv->synopsys_id >= DWMAC_CORE_4_00) {
-               priv->ptpaddr = priv->ioaddr + PTP_GMAC4_OFFSET;
-               priv->mmcaddr = priv->ioaddr + MMC_GMAC4_OFFSET;
-       } else {
-               priv->ptpaddr = priv->ioaddr + PTP_GMAC3_X_OFFSET;
-               priv->mmcaddr = priv->ioaddr + MMC_GMAC3_X_OFFSET;
-       }
-
        dwmac_mmc_intr_all_mask(priv->mmcaddr);
 
        if (priv->dma_cap.rmon) {
@@ -2164,10 +2138,9 @@ static int stmmac_init_dma_engine(struct stmmac_priv *priv)
 {
        u32 rx_channels_count = priv->plat->rx_queues_to_use;
        u32 tx_channels_count = priv->plat->tx_queues_to_use;
+       u32 dma_csr_ch = max(rx_channels_count, tx_channels_count);
        struct stmmac_rx_queue *rx_q;
        struct stmmac_tx_queue *tx_q;
-       u32 dummy_dma_rx_phy = 0;
-       u32 dummy_dma_tx_phy = 0;
        u32 chan = 0;
        int atds = 0;
        int ret = 0;
@@ -2186,48 +2159,39 @@ static int stmmac_init_dma_engine(struct stmmac_priv *priv)
                return ret;
        }
 
-       if (priv->synopsys_id >= DWMAC_CORE_4_00) {
-               /* DMA Configuration */
-               stmmac_dma_init(priv, priv->ioaddr, priv->plat->dma_cfg,
-                               dummy_dma_tx_phy, dummy_dma_rx_phy, atds);
-
-               /* DMA RX Channel Configuration */
-               for (chan = 0; chan < rx_channels_count; chan++) {
-                       rx_q = &priv->rx_queue[chan];
-
-                       stmmac_init_rx_chan(priv, priv->ioaddr,
-                                       priv->plat->dma_cfg, rx_q->dma_rx_phy,
-                                       chan);
-
-                       rx_q->rx_tail_addr = rx_q->dma_rx_phy +
-                                   (DMA_RX_SIZE * sizeof(struct dma_desc));
-                       stmmac_set_rx_tail_ptr(priv, priv->ioaddr,
-                                       rx_q->rx_tail_addr, chan);
-               }
-
-               /* DMA TX Channel Configuration */
-               for (chan = 0; chan < tx_channels_count; chan++) {
-                       tx_q = &priv->tx_queue[chan];
+       /* DMA RX Channel Configuration */
+       for (chan = 0; chan < rx_channels_count; chan++) {
+               rx_q = &priv->rx_queue[chan];
 
-                       stmmac_init_chan(priv, priv->ioaddr,
-                                       priv->plat->dma_cfg, chan);
+               stmmac_init_rx_chan(priv, priv->ioaddr, priv->plat->dma_cfg,
+                                   rx_q->dma_rx_phy, chan);
 
-                       stmmac_init_tx_chan(priv, priv->ioaddr,
-                                       priv->plat->dma_cfg, tx_q->dma_tx_phy,
-                                       chan);
+               rx_q->rx_tail_addr = rx_q->dma_rx_phy +
+                           (DMA_RX_SIZE * sizeof(struct dma_desc));
+               stmmac_set_rx_tail_ptr(priv, priv->ioaddr,
+                                      rx_q->rx_tail_addr, chan);
+       }
 
-                       tx_q->tx_tail_addr = tx_q->dma_tx_phy +
-                                   (DMA_TX_SIZE * sizeof(struct dma_desc));
-                       stmmac_set_tx_tail_ptr(priv, priv->ioaddr,
-                                       tx_q->tx_tail_addr, chan);
-               }
-       } else {
-               rx_q = &priv->rx_queue[chan];
+       /* DMA TX Channel Configuration */
+       for (chan = 0; chan < tx_channels_count; chan++) {
                tx_q = &priv->tx_queue[chan];
-               stmmac_dma_init(priv, priv->ioaddr, priv->plat->dma_cfg,
-                               tx_q->dma_tx_phy, rx_q->dma_rx_phy, atds);
+
+               stmmac_init_tx_chan(priv, priv->ioaddr, priv->plat->dma_cfg,
+                                   tx_q->dma_tx_phy, chan);
+
+               tx_q->tx_tail_addr = tx_q->dma_tx_phy +
+                           (DMA_TX_SIZE * sizeof(struct dma_desc));
+               stmmac_set_tx_tail_ptr(priv, priv->ioaddr,
+                                      tx_q->tx_tail_addr, chan);
        }
 
+       /* DMA CSR Channel configuration */
+       for (chan = 0; chan < dma_csr_ch; chan++)
+               stmmac_init_chan(priv, priv->ioaddr, priv->plat->dma_cfg, chan);
+
+       /* DMA Configuration */
+       stmmac_dma_init(priv, priv->ioaddr, priv->plat->dma_cfg, atds);
+
        if (priv->plat->axi)
                stmmac_axi(priv, priv->ioaddr, priv->plat->axi);
 
@@ -2510,12 +2474,10 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp)
        stmmac_core_init(priv, priv->hw, dev);
 
        /* Initialize MTL*/
-       if (priv->synopsys_id >= DWMAC_CORE_4_00)
-               stmmac_mtl_configuration(priv);
+       stmmac_mtl_configuration(priv);
 
        /* Initialize Safety Features */
-       if (priv->synopsys_id >= DWMAC_CORE_5_10)
-               stmmac_safety_feat_configuration(priv);
+       stmmac_safety_feat_configuration(priv);
 
        ret = stmmac_rx_ipc(priv, priv->hw);
        if (!ret) {
@@ -3069,10 +3031,9 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
        if (enh_desc)
                is_jumbo = stmmac_is_jumbo_frm(priv, skb->len, enh_desc);
 
-       if (unlikely(is_jumbo) && likely(priv->synopsys_id <
-                                        DWMAC_CORE_4_00)) {
+       if (unlikely(is_jumbo)) {
                entry = stmmac_jumbo_frm(priv, tx_q, skb, csum_insertion);
-               if (unlikely(entry < 0))
+               if (unlikely(entry < 0) && (entry != -EINVAL))
                        goto dma_map_err;
        }
 
@@ -3095,10 +3056,8 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
                        goto dma_map_err; /* should reuse desc w/o issues */
 
                tx_q->tx_skbuff_dma[entry].buf = des;
-               if (unlikely(priv->synopsys_id >= DWMAC_CORE_4_00))
-                       desc->des0 = cpu_to_le32(des);
-               else
-                       desc->des2 = cpu_to_le32(des);
+
+               stmmac_set_desc_addr(priv, desc, des);
 
                tx_q->tx_skbuff_dma[entry].map_as_page = true;
                tx_q->tx_skbuff_dma[entry].len = len;
@@ -3153,13 +3112,16 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
         * element in case of no SG.
         */
        priv->tx_count_frames += nfrags + 1;
-       if (likely(priv->tx_coal_frames > priv->tx_count_frames)) {
+       if (likely(priv->tx_coal_frames > priv->tx_count_frames) &&
+           !priv->tx_timer_armed) {
                mod_timer(&priv->txtimer,
                          STMMAC_COAL_TIMER(priv->tx_coal_timer));
+               priv->tx_timer_armed = true;
        } else {
                priv->tx_count_frames = 0;
                stmmac_set_tx_ic(priv, desc);
                priv->xstats.tx_set_ic_bit++;
+               priv->tx_timer_armed = false;
        }
 
        skb_tx_timestamp(skb);
@@ -3177,10 +3139,8 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
                        goto dma_map_err;
 
                tx_q->tx_skbuff_dma[first_entry].buf = des;
-               if (unlikely(priv->synopsys_id >= DWMAC_CORE_4_00))
-                       first->des0 = cpu_to_le32(des);
-               else
-                       first->des2 = cpu_to_le32(des);
+
+               stmmac_set_desc_addr(priv, first, des);
 
                tx_q->tx_skbuff_dma[first_entry].len = nopaged_len;
                tx_q->tx_skbuff_dma[first_entry].last_segment = last_segment;
@@ -3206,11 +3166,8 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 
        netdev_tx_sent_queue(netdev_get_tx_queue(dev, queue), skb->len);
 
-       if (priv->synopsys_id < DWMAC_CORE_4_00)
-               stmmac_enable_dma_transmission(priv, priv->ioaddr);
-       else
-               stmmac_set_tx_tail_ptr(priv, priv->ioaddr, tx_q->tx_tail_addr,
-                               queue);
+       stmmac_enable_dma_transmission(priv, priv->ioaddr);
+       stmmac_set_tx_tail_ptr(priv, priv->ioaddr, tx_q->tx_tail_addr, queue);
 
        return NETDEV_TX_OK;
 
@@ -3294,13 +3251,7 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv, u32 queue)
                                break;
                        }
 
-                       if (unlikely(priv->synopsys_id >= DWMAC_CORE_4_00)) {
-                               p->des0 = cpu_to_le32(rx_q->rx_skbuff_dma[entry]);
-                               p->des1 = 0;
-                       } else {
-                               p->des2 = cpu_to_le32(rx_q->rx_skbuff_dma[entry]);
-                       }
-
+                       stmmac_set_desc_addr(priv, p, rx_q->rx_skbuff_dma[entry]);
                        stmmac_refill_desc3(priv, rx_q, p);
 
                        if (rx_q->rx_zeroc_thresh > 0)
@@ -3311,10 +3262,7 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv, u32 queue)
                }
                dma_wmb();
 
-               if (unlikely(priv->synopsys_id >= DWMAC_CORE_4_00))
-                       stmmac_init_rx_desc(priv, p, priv->use_riwt, 0, 0);
-               else
-                       stmmac_set_rx_owner(priv, p);
+               stmmac_set_rx_owner(priv, p, priv->use_riwt);
 
                dma_wmb();
 
@@ -3402,11 +3350,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
                        int frame_len;
                        unsigned int des;
 
-                       if (unlikely(priv->synopsys_id >= DWMAC_CORE_4_00))
-                               des = le32_to_cpu(p->des0);
-                       else
-                               des = le32_to_cpu(p->des2);
-
+                       stmmac_get_desc_addr(priv, p, &des);
                        frame_len = stmmac_get_rx_frame_len(priv, p, coe);
 
                        /*  If frame length is greater than skb buffer size
@@ -3700,6 +3644,7 @@ static irqreturn_t stmmac_interrupt(int irq, void *dev_id)
        /* To handle GMAC own interrupts */
        if ((priv->plat->has_gmac) || (priv->plat->has_gmac4)) {
                int status = stmmac_host_irq_status(priv, priv->hw, &priv->xstats);
+               int mtl_status;
 
                if (unlikely(status)) {
                        /* For LPI we need to save the tx status */
@@ -3709,20 +3654,18 @@ static irqreturn_t stmmac_interrupt(int irq, void *dev_id)
                                priv->tx_path_in_lpi_mode = false;
                }
 
-               if (priv->synopsys_id >= DWMAC_CORE_4_00) {
-                       for (queue = 0; queue < queues_count; queue++) {
-                               struct stmmac_rx_queue *rx_q =
-                               &priv->rx_queue[queue];
+               for (queue = 0; queue < queues_count; queue++) {
+                       struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
 
-                               status |= stmmac_host_mtl_irq_status(priv,
-                                               priv->hw, queue);
+                       mtl_status = stmmac_host_mtl_irq_status(priv, priv->hw,
+                                                               queue);
+                       if (mtl_status != -EINVAL)
+                               status |= mtl_status;
 
-                               if (status & CORE_IRQ_MTL_RX_OVERFLOW)
-                                       stmmac_set_rx_tail_ptr(priv,
-                                                       priv->ioaddr,
-                                                       rx_q->rx_tail_addr,
-                                                       queue);
-                       }
+                       if (status & CORE_IRQ_MTL_RX_OVERFLOW)
+                               stmmac_set_rx_tail_ptr(priv, priv->ioaddr,
+                                                      rx_q->rx_tail_addr,
+                                                      queue);
                }
 
                /* PCS link status */
@@ -3786,6 +3729,58 @@ static int stmmac_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
        return ret;
 }
 
+static int stmmac_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
+                                   void *cb_priv)
+{
+       struct stmmac_priv *priv = cb_priv;
+       int ret = -EOPNOTSUPP;
+
+       stmmac_disable_all_queues(priv);
+
+       switch (type) {
+       case TC_SETUP_CLSU32:
+               if (tc_cls_can_offload_and_chain0(priv->dev, type_data))
+                       ret = stmmac_tc_setup_cls_u32(priv, priv, type_data);
+               break;
+       default:
+               break;
+       }
+
+       stmmac_enable_all_queues(priv);
+       return ret;
+}
+
+static int stmmac_setup_tc_block(struct stmmac_priv *priv,
+                                struct tc_block_offload *f)
+{
+       if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
+               return -EOPNOTSUPP;
+
+       switch (f->command) {
+       case TC_BLOCK_BIND:
+               return tcf_block_cb_register(f->block, stmmac_setup_tc_block_cb,
+                               priv, priv);
+       case TC_BLOCK_UNBIND:
+               tcf_block_cb_unregister(f->block, stmmac_setup_tc_block_cb, priv);
+               return 0;
+       default:
+               return -EOPNOTSUPP;
+       }
+}
+
+static int stmmac_setup_tc(struct net_device *ndev, enum tc_setup_type type,
+                          void *type_data)
+{
+       struct stmmac_priv *priv = netdev_priv(ndev);
+
+       switch (type) {
+       case TC_SETUP_BLOCK:
+               return stmmac_setup_tc_block(priv, type_data);
+       default:
+               return -EOPNOTSUPP;
+       }
+}
+
 static int stmmac_set_mac_address(struct net_device *ndev, void *addr)
 {
        struct stmmac_priv *priv = netdev_priv(ndev);
@@ -4024,6 +4019,7 @@ static const struct net_device_ops stmmac_netdev_ops = {
        .ndo_set_rx_mode = stmmac_set_rx_mode,
        .ndo_tx_timeout = stmmac_tx_timeout,
        .ndo_do_ioctl = stmmac_ioctl,
+       .ndo_setup_tc = stmmac_setup_tc,
 #ifdef CONFIG_NET_POLL_CONTROLLER
        .ndo_poll_controller = stmmac_poll_controller,
 #endif
@@ -4223,6 +4219,11 @@ int stmmac_dvr_probe(struct device *device,
        ndev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
                            NETIF_F_RXCSUM;
 
+       ret = stmmac_tc_init(priv, priv);
+       if (!ret) {
+               ndev->hw_features |= NETIF_F_HW_TC;
+       }
+
        if ((priv->plat->tso_en) && (priv->dma_cap.tsoen)) {
                ndev->hw_features |= NETIF_F_TSO | NETIF_F_TSO6;
                priv->tso = true;
index f5f37bfa1d588a639c311b75accad5e6033bca86..5df1a608e566fc26bcdac774b6cb04446d9fbf34 100644 (file)
@@ -233,10 +233,7 @@ int stmmac_mdio_register(struct net_device *ndev)
        new_bus->phy_mask = mdio_bus_data->phy_mask;
        new_bus->parent = priv->device;
 
-       if (mdio_node)
-               err = of_mdiobus_register(new_bus, mdio_node);
-       else
-               err = mdiobus_register(new_bus);
+       err = of_mdiobus_register(new_bus, mdio_node);
        if (err != 0) {
                dev_err(dev, "Cannot register the MDIO bus\n");
                goto bus_register_fail;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
new file mode 100644 (file)
index 0000000..881c94b
--- /dev/null
@@ -0,0 +1,295 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/*
+ * Copyright (c) 2018 Synopsys, Inc. and/or its affiliates.
+ * stmmac TC Handling (HW only)
+ */
+
+#include <net/pkt_cls.h>
+#include <net/tc_act/tc_gact.h>
+#include "common.h"
+#include "dwmac4.h"
+#include "dwmac5.h"
+#include "stmmac.h"
+
+static void tc_fill_all_pass_entry(struct stmmac_tc_entry *entry)
+{
+       memset(entry, 0, sizeof(*entry));
+       entry->in_use = true;
+       entry->is_last = true;
+       entry->is_frag = false;
+       entry->prio = ~0x0;
+       entry->handle = 0;
+       entry->val.match_data = 0x0;
+       entry->val.match_en = 0x0;
+       entry->val.af = 1;
+       entry->val.dma_ch_no = 0x0;
+}
+
+static struct stmmac_tc_entry *tc_find_entry(struct stmmac_priv *priv,
+                                            struct tc_cls_u32_offload *cls,
+                                            bool free)
+{
+       struct stmmac_tc_entry *entry, *first = NULL, *dup = NULL;
+       u32 loc = cls->knode.handle;
+       int i;
+
+       for (i = 0; i < priv->tc_entries_max; i++) {
+               entry = &priv->tc_entries[i];
+               if (!entry->in_use && !first && free)
+                       first = entry;
+               if (entry->handle == loc && !free)
+                       dup = entry;
+       }
+
+       if (dup)
+               return dup;
+       if (first) {
+               first->handle = loc;
+               first->in_use = true;
+
+               /* Reset HW values */
+               memset(&first->val, 0, sizeof(first->val));
+       }
+
+       return first;
+}
+
+static int tc_fill_actions(struct stmmac_tc_entry *entry,
+                          struct stmmac_tc_entry *frag,
+                          struct tc_cls_u32_offload *cls)
+{
+       struct stmmac_tc_entry *action_entry = entry;
+       const struct tc_action *act;
+       struct tcf_exts *exts;
+       LIST_HEAD(actions);
+
+       exts = cls->knode.exts;
+       if (!tcf_exts_has_actions(exts))
+               return -EINVAL;
+       if (frag)
+               action_entry = frag;
+
+       tcf_exts_to_list(exts, &actions);
+       list_for_each_entry(act, &actions, list) {
+               /* Accept */
+               if (is_tcf_gact_ok(act)) {
+                       action_entry->val.af = 1;
+                       break;
+               }
+               /* Drop */
+               if (is_tcf_gact_shot(act)) {
+                       action_entry->val.rf = 1;
+                       break;
+               }
+
+               /* Unsupported */
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static int tc_fill_entry(struct stmmac_priv *priv,
+                        struct tc_cls_u32_offload *cls)
+{
+       struct stmmac_tc_entry *entry, *frag = NULL;
+       struct tc_u32_sel *sel = cls->knode.sel;
+       u32 off, data, mask, real_off, rem;
+       u32 prio = cls->common.prio;
+       int ret;
+
+       /* Only 1 match per entry */
+       if (sel->nkeys <= 0 || sel->nkeys > 1)
+               return -EINVAL;
+
+       off = sel->keys[0].off << sel->offshift;
+       data = sel->keys[0].val;
+       mask = sel->keys[0].mask;
+
+       switch (ntohs(cls->common.protocol)) {
+       case ETH_P_ALL:
+               break;
+       case ETH_P_IP:
+               off += ETH_HLEN;
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       if (off > priv->tc_off_max)
+               return -EINVAL;
+
+       real_off = off / 4;
+       rem = off % 4;
+
+       entry = tc_find_entry(priv, cls, true);
+       if (!entry)
+               return -EINVAL;
+
+       if (rem) {
+               frag = tc_find_entry(priv, cls, true);
+               if (!frag) {
+                       ret = -EINVAL;
+                       goto err_unuse;
+               }
+
+               entry->frag_ptr = frag;
+               entry->val.match_en = (mask << (rem * 8)) &
+                       GENMASK(31, rem * 8);
+               entry->val.match_data = (data << (rem * 8)) &
+                       GENMASK(31, rem * 8);
+               entry->val.frame_offset = real_off;
+               entry->prio = prio;
+
+               frag->val.match_en = (mask >> (rem * 8)) &
+                       GENMASK(rem * 8 - 1, 0);
+               frag->val.match_data = (data >> (rem * 8)) &
+                       GENMASK(rem * 8 - 1, 0);
+               frag->val.frame_offset = real_off + 1;
+               frag->prio = prio;
+               frag->is_frag = true;
+       } else {
+               entry->frag_ptr = NULL;
+               entry->val.match_en = mask;
+               entry->val.match_data = data;
+               entry->val.frame_offset = real_off;
+               entry->prio = prio;
+       }
+
+       ret = tc_fill_actions(entry, frag, cls);
+       if (ret)
+               goto err_unuse;
+
+       return 0;
+
+err_unuse:
+       if (frag)
+               frag->in_use = false;
+       entry->in_use = false;
+       return ret;
+}
+
+static void tc_unfill_entry(struct stmmac_priv *priv,
+                           struct tc_cls_u32_offload *cls)
+{
+       struct stmmac_tc_entry *entry;
+
+       entry = tc_find_entry(priv, cls, false);
+       if (!entry)
+               return;
+
+       entry->in_use = false;
+       if (entry->frag_ptr) {
+               entry = entry->frag_ptr;
+               entry->is_frag = false;
+               entry->in_use = false;
+       }
+}
+
+static int tc_config_knode(struct stmmac_priv *priv,
+                          struct tc_cls_u32_offload *cls)
+{
+       int ret;
+
+       ret = tc_fill_entry(priv, cls);
+       if (ret)
+               return ret;
+
+       ret = stmmac_rxp_config(priv, priv->hw->pcsr, priv->tc_entries,
+                       priv->tc_entries_max);
+       if (ret)
+               goto err_unfill;
+
+       return 0;
+
+err_unfill:
+       tc_unfill_entry(priv, cls);
+       return ret;
+}
+
+static int tc_delete_knode(struct stmmac_priv *priv,
+                          struct tc_cls_u32_offload *cls)
+{
+       int ret;
+
+       /* Set entry and fragments as not used */
+       tc_unfill_entry(priv, cls);
+
+       ret = stmmac_rxp_config(priv, priv->hw->pcsr, priv->tc_entries,
+                       priv->tc_entries_max);
+       if (ret)
+               return ret;
+
+       return 0;
+}
+
+static int tc_setup_cls_u32(struct stmmac_priv *priv,
+                           struct tc_cls_u32_offload *cls)
+{
+       switch (cls->command) {
+       case TC_CLSU32_REPLACE_KNODE:
+               tc_unfill_entry(priv, cls);
+               /* Fall through */
+       case TC_CLSU32_NEW_KNODE:
+               return tc_config_knode(priv, cls);
+       case TC_CLSU32_DELETE_KNODE:
+               return tc_delete_knode(priv, cls);
+       default:
+               return -EOPNOTSUPP;
+       }
+}
+
+static int tc_init(struct stmmac_priv *priv)
+{
+       struct dma_features *dma_cap = &priv->dma_cap;
+       unsigned int count;
+
+       if (!dma_cap->frpsel)
+               return -EINVAL;
+
+       switch (dma_cap->frpbs) {
+       case 0x0:
+               priv->tc_off_max = 64;
+               break;
+       case 0x1:
+               priv->tc_off_max = 128;
+               break;
+       case 0x2:
+               priv->tc_off_max = 256;
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       switch (dma_cap->frpes) {
+       case 0x0:
+               count = 64;
+               break;
+       case 0x1:
+               count = 128;
+               break;
+       case 0x2:
+               count = 256;
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       /* Reserve one last filter which lets all pass */
+       priv->tc_entries_max = count;
+       priv->tc_entries = devm_kzalloc(priv->device,
+                       sizeof(*priv->tc_entries) * count, GFP_KERNEL);
+       if (!priv->tc_entries)
+               return -ENOMEM;
+
+       tc_fill_all_pass_entry(&priv->tc_entries[count - 1]);
+
+       dev_info(priv->device, "Enabling HW TC (entries=%d, max_off=%d)\n",
+                       priv->tc_entries_max, priv->tc_off_max);
+       return 0;
+}
+
+const struct stmmac_tc_ops dwmac510_tc_ops = {
+       .init = tc_init,
+       .setup_cls_u32 = tc_setup_cls_u32,
+};
index f081de4f38d73f7bb37cfa0e119447afc1718f50..88c12474a0c38cc10f539d7eff0b81a0cc9a4d7a 100644 (file)
@@ -3443,7 +3443,7 @@ static int niu_process_rx_pkt(struct napi_struct *napi, struct niu *np,
 
                len = (val & RCR_ENTRY_L2_LEN) >>
                        RCR_ENTRY_L2_LEN_SHIFT;
-               len -= ETH_FCS_LEN;
+               append_size = len + ETH_HLEN + ETH_FCS_LEN;
 
                addr = (val & RCR_ENTRY_PKT_BUF_ADDR) <<
                        RCR_ENTRY_PKT_BUF_ADDR_SHIFT;
@@ -3453,7 +3453,6 @@ static int niu_process_rx_pkt(struct napi_struct *napi, struct niu *np,
                                         RCR_ENTRY_PKTBUFSZ_SHIFT];
 
                off = addr & ~PAGE_MASK;
-               append_size = rcr_size;
                if (num_rcr == 1) {
                        int ptype;
 
@@ -3466,7 +3465,7 @@ static int niu_process_rx_pkt(struct napi_struct *napi, struct niu *np,
                        else
                                skb_checksum_none_assert(skb);
                } else if (!(val & RCR_ENTRY_MULTI))
-                       append_size = len - skb->len;
+                       append_size = append_size - skb->len;
 
                niu_rx_skb_append(skb, page, off, append_size, rcr_size);
                if ((page->index + rp->rbr_block_size) - rcr_size == addr) {
index 48a541eb0af20f9a0db46c0b7a94cdcc709b0b04..9263d638bd6d0ffaec45aa1da2fd57b981faa38d 100644 (file)
@@ -18,7 +18,7 @@ if NET_VENDOR_TI
 
 config TI_DAVINCI_EMAC
        tristate "TI DaVinci EMAC Support"
-       depends on ARM && ( ARCH_DAVINCI || ARCH_OMAP3 )
+       depends on ARM && ( ARCH_DAVINCI || ARCH_OMAP3 ) || COMPILE_TEST
        select TI_DAVINCI_MDIO
        select TI_DAVINCI_CPDMA
        select PHYLIB
@@ -30,7 +30,7 @@ config TI_DAVINCI_EMAC
 
 config TI_DAVINCI_MDIO
        tristate "TI DaVinci MDIO Support"
-       depends on ARCH_DAVINCI || ARCH_OMAP2PLUS || ARCH_KEYSTONE
+       depends on ARCH_DAVINCI || ARCH_OMAP2PLUS || ARCH_KEYSTONE || COMPILE_TEST
        select PHYLIB
        ---help---
          This driver supports TI's DaVinci MDIO module.
@@ -40,7 +40,7 @@ config TI_DAVINCI_MDIO
 
 config TI_DAVINCI_CPDMA
        tristate "TI DaVinci CPDMA Support"
-       depends on ARCH_DAVINCI || ARCH_OMAP2PLUS
+       depends on ARCH_DAVINCI || ARCH_OMAP2PLUS || COMPILE_TEST
        ---help---
          This driver supports TI's DaVinci CPDMA dma engine.
 
@@ -60,7 +60,7 @@ config TI_CPSW_ALE
 
 config TI_CPSW
        tristate "TI CPSW Switch Support"
-       depends on ARCH_DAVINCI || ARCH_OMAP2PLUS
+       depends on ARCH_DAVINCI || ARCH_OMAP2PLUS || COMPILE_TEST
        select TI_DAVINCI_CPDMA
        select TI_DAVINCI_MDIO
        select TI_CPSW_PHY_SEL
@@ -75,7 +75,7 @@ config TI_CPSW
 
 config TI_CPTS
        bool "TI Common Platform Time Sync (CPTS) Support"
-       depends on TI_CPSW || TI_KEYSTONE_NETCP
+       depends on TI_CPSW || TI_KEYSTONE_NETCP || COMPILE_TEST
        depends on POSIX_TIMERS
        ---help---
          This driver supports the Common Platform Time Sync unit of
index 18013645e76c8be4a460e50d7edd31abda29900f..0c1adad7415da7d9b858925d0ec5715e9ca7dfec 100644 (file)
@@ -177,12 +177,18 @@ void cpsw_phy_sel(struct device *dev, phy_interface_t phy_mode, int slave)
        }
 
        dev = bus_find_device(&platform_bus_type, NULL, node, match);
-       of_node_put(node);
+       if (!dev) {
+               dev_err(dev, "unable to find platform device for %pOF\n", node);
+               goto out;
+       }
+
        priv = dev_get_drvdata(dev);
 
        priv->cpsw_phy_sel(priv, phy_mode, slave);
 
        put_device(dev);
+out:
+       of_node_put(node);
 }
 EXPORT_SYMBOL_GPL(cpsw_phy_sel);
 
index 30371274409d7e5704989302939866a3ac1c6ca7..643cd2d9dfb684b4e68041f16fb5f05049b91011 100644 (file)
@@ -36,6 +36,7 @@
 #include <linux/of_device.h>
 #include <linux/if_vlan.h>
 #include <linux/kmemleak.h>
+#include <linux/sys_soc.h>
 
 #include <linux/pinctrl/consumer.h>
 
@@ -129,7 +130,7 @@ do {                                                                \
 
 #define RX_PRIORITY_MAPPING    0x76543210
 #define TX_PRIORITY_MAPPING    0x33221100
-#define CPDMA_TX_PRIORITY_MAP  0x01234567
+#define CPDMA_TX_PRIORITY_MAP  0x76543210
 
 #define CPSW_VLAN_AWARE                BIT(1)
 #define CPSW_RX_VLAN_ENCAP     BIT(2)
@@ -957,7 +958,7 @@ static irqreturn_t cpsw_rx_interrupt(int irq, void *dev_id)
        return IRQ_HANDLED;
 }
 
-static int cpsw_tx_poll(struct napi_struct *napi_tx, int budget)
+static int cpsw_tx_mq_poll(struct napi_struct *napi_tx, int budget)
 {
        u32                     ch_map;
        int                     num_tx, cur_budget, ch;
@@ -984,7 +985,21 @@ static int cpsw_tx_poll(struct napi_struct *napi_tx, int budget)
        if (num_tx < budget) {
                napi_complete(napi_tx);
                writel(0xff, &cpsw->wr_regs->tx_en);
-               if (cpsw->quirk_irq && cpsw->tx_irq_disabled) {
+       }
+
+       return num_tx;
+}
+
+static int cpsw_tx_poll(struct napi_struct *napi_tx, int budget)
+{
+       struct cpsw_common *cpsw = napi_to_cpsw(napi_tx);
+       int num_tx;
+
+       num_tx = cpdma_chan_process(cpsw->txv[0].ch, budget);
+       if (num_tx < budget) {
+               napi_complete(napi_tx);
+               writel(0xff, &cpsw->wr_regs->tx_en);
+               if (cpsw->tx_irq_disabled) {
                        cpsw->tx_irq_disabled = false;
                        enable_irq(cpsw->irqs_table[1]);
                }
@@ -993,7 +1008,7 @@ static int cpsw_tx_poll(struct napi_struct *napi_tx, int budget)
        return num_tx;
 }
 
-static int cpsw_rx_poll(struct napi_struct *napi_rx, int budget)
+static int cpsw_rx_mq_poll(struct napi_struct *napi_rx, int budget)
 {
        u32                     ch_map;
        int                     num_rx, cur_budget, ch;
@@ -1020,7 +1035,21 @@ static int cpsw_rx_poll(struct napi_struct *napi_rx, int budget)
        if (num_rx < budget) {
                napi_complete_done(napi_rx, num_rx);
                writel(0xff, &cpsw->wr_regs->rx_en);
-               if (cpsw->quirk_irq && cpsw->rx_irq_disabled) {
+       }
+
+       return num_rx;
+}
+
+static int cpsw_rx_poll(struct napi_struct *napi_rx, int budget)
+{
+       struct cpsw_common *cpsw = napi_to_cpsw(napi_rx);
+       int num_rx;
+
+       num_rx = cpdma_chan_process(cpsw->rxv[0].ch, budget);
+       if (num_rx < budget) {
+               napi_complete_done(napi_rx, num_rx);
+               writel(0xff, &cpsw->wr_regs->rx_en);
+               if (cpsw->rx_irq_disabled) {
                        cpsw->rx_irq_disabled = false;
                        enable_irq(cpsw->irqs_table[0]);
                }
@@ -1252,8 +1281,8 @@ static void cpsw_add_ch_strings(u8 **p, int ch_num, int rx_dir)
        for (i = 0; i < ch_stats_len; i++) {
                line = i % CPSW_STATS_CH_LEN;
                snprintf(*p, ETH_GSTRING_LEN,
-                        "%s DMA chan %d: %s", rx_dir ? "Rx" : "Tx",
-                        i / CPSW_STATS_CH_LEN,
+                        "%s DMA chan %ld: %s", rx_dir ? "Rx" : "Tx",
+                        (long)(i / CPSW_STATS_CH_LEN),
                         cpsw_gstrings_ch_stats[line].stat_string);
                *p += ETH_GSTRING_LEN;
        }
@@ -1340,6 +1369,8 @@ static inline void cpsw_add_dual_emac_def_ale_entries(
        cpsw_ale_add_ucast(cpsw->ale, priv->mac_addr,
                           HOST_PORT_NUM, ALE_VLAN |
                           ALE_SECURE, slave->port_vlan);
+       cpsw_ale_control_set(cpsw->ale, slave_port,
+                            ALE_PORT_DROP_UNKNOWN_VLAN, 1);
 }
 
 static void soft_reset_slave(struct cpsw_slave *slave)
@@ -2362,9 +2393,9 @@ static void cpsw_get_channels(struct net_device *ndev,
 {
        struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
 
+       ch->max_rx = cpsw->quirk_irq ? 1 : CPSW_MAX_QUEUES;
+       ch->max_tx = cpsw->quirk_irq ? 1 : CPSW_MAX_QUEUES;
        ch->max_combined = 0;
-       ch->max_rx = CPSW_MAX_QUEUES;
-       ch->max_tx = CPSW_MAX_QUEUES;
        ch->max_other = 0;
        ch->other_count = 0;
        ch->rx_count = cpsw->rx_ch_num;
@@ -2375,6 +2406,11 @@ static void cpsw_get_channels(struct net_device *ndev,
 static int cpsw_check_ch_settings(struct cpsw_common *cpsw,
                                  struct ethtool_channels *ch)
 {
+       if (cpsw->quirk_irq) {
+               dev_err(cpsw->dev, "Maximum one tx/rx queue is allowed");
+               return -EOPNOTSUPP;
+       }
+
        if (ch->combined_count)
                return -EINVAL;
 
@@ -2915,44 +2951,20 @@ static int cpsw_probe_dual_emac(struct cpsw_priv *priv)
        return ret;
 }
 
-#define CPSW_QUIRK_IRQ         BIT(0)
-
-static const struct platform_device_id cpsw_devtype[] = {
-       {
-               /* keep it for existing comaptibles */
-               .name = "cpsw",
-               .driver_data = CPSW_QUIRK_IRQ,
-       }, {
-               .name = "am335x-cpsw",
-               .driver_data = CPSW_QUIRK_IRQ,
-       }, {
-               .name = "am4372-cpsw",
-               .driver_data = 0,
-       }, {
-               .name = "dra7-cpsw",
-               .driver_data = 0,
-       }, {
-               /* sentinel */
-       }
-};
-MODULE_DEVICE_TABLE(platform, cpsw_devtype);
-
-enum ti_cpsw_type {
-       CPSW = 0,
-       AM335X_CPSW,
-       AM4372_CPSW,
-       DRA7_CPSW,
-};
-
 static const struct of_device_id cpsw_of_mtable[] = {
-       { .compatible = "ti,cpsw", .data = &cpsw_devtype[CPSW], },
-       { .compatible = "ti,am335x-cpsw", .data = &cpsw_devtype[AM335X_CPSW], },
-       { .compatible = "ti,am4372-cpsw", .data = &cpsw_devtype[AM4372_CPSW], },
-       { .compatible = "ti,dra7-cpsw", .data = &cpsw_devtype[DRA7_CPSW], },
+       { .compatible = "ti,cpsw"},
+       { .compatible = "ti,am335x-cpsw"},
+       { .compatible = "ti,am4372-cpsw"},
+       { .compatible = "ti,dra7-cpsw"},
        { /* sentinel */ },
 };
 MODULE_DEVICE_TABLE(of, cpsw_of_mtable);
 
+static const struct soc_device_attribute cpsw_soc_devices[] = {
+       { .family = "AM33xx", .revision = "ES1.0"},
+       { /* sentinel */ }
+};
+
 static int cpsw_probe(struct platform_device *pdev)
 {
        struct clk                      *clk;
@@ -2964,9 +2976,9 @@ static int cpsw_probe(struct platform_device *pdev)
        void __iomem                    *ss_regs;
        void __iomem                    *cpts_regs;
        struct resource                 *res, *ss_res;
-       const struct of_device_id       *of_id;
        struct gpio_descs               *mode;
        u32 slave_offset, sliver_offset, slave_size;
+       const struct soc_device_attribute *soc;
        struct cpsw_common              *cpsw;
        int ret = 0, i;
        int irq;
@@ -3139,6 +3151,10 @@ static int cpsw_probe(struct platform_device *pdev)
                goto clean_dt_ret;
        }
 
+       soc = soc_device_match(cpsw_soc_devices);
+       if (soc)
+               cpsw->quirk_irq = 1;
+
        cpsw->txv[0].ch = cpdma_chan_create(cpsw->dma, 0, cpsw_tx_handler, 0);
        if (IS_ERR(cpsw->txv[0].ch)) {
                dev_err(priv->dev, "error initializing tx dma channel\n");
@@ -3178,19 +3194,16 @@ static int cpsw_probe(struct platform_device *pdev)
                goto clean_dma_ret;
        }
 
-       of_id = of_match_device(cpsw_of_mtable, &pdev->dev);
-       if (of_id) {
-               pdev->id_entry = of_id->data;
-               if (pdev->id_entry->driver_data)
-                       cpsw->quirk_irq = true;
-       }
-
        ndev->features |= NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_CTAG_RX;
 
        ndev->netdev_ops = &cpsw_netdev_ops;
        ndev->ethtool_ops = &cpsw_ethtool_ops;
-       netif_napi_add(ndev, &cpsw->napi_rx, cpsw_rx_poll, CPSW_POLL_WEIGHT);
-       netif_tx_napi_add(ndev, &cpsw->napi_tx, cpsw_tx_poll, CPSW_POLL_WEIGHT);
+       netif_napi_add(ndev, &cpsw->napi_rx,
+                      cpsw->quirk_irq ? cpsw_rx_poll : cpsw_rx_mq_poll,
+                      CPSW_POLL_WEIGHT);
+       netif_tx_napi_add(ndev, &cpsw->napi_tx,
+                         cpsw->quirk_irq ? cpsw_tx_poll : cpsw_tx_mq_poll,
+                         CPSW_POLL_WEIGHT);
        cpsw_split_res(ndev);
 
        /* register the network device */
index e7b76f6b4f67ef7ff53a22a14805d3386b789520..6f63c8729afca1cf4e621bd1b18763850f33ffd3 100644 (file)
@@ -294,7 +294,8 @@ static long cpts_overflow_check(struct ptp_clock_info *ptp)
                delay = CPTS_SKB_TX_WORK_TIMEOUT;
        spin_unlock_irqrestore(&cpts->lock, flags);
 
-       pr_debug("cpts overflow check at %lld.%09lu\n", ts.tv_sec, ts.tv_nsec);
+       pr_debug("cpts overflow check at %lld.%09ld\n",
+                (long long)ts.tv_sec, ts.tv_nsec);
        return (long)delay;
 }
 
@@ -564,7 +565,7 @@ struct cpts *cpts_create(struct device *dev, void __iomem *regs,
        cpts->refclk = devm_clk_get(dev, "cpts");
        if (IS_ERR(cpts->refclk)) {
                dev_err(dev, "Failed to get cpts refclk\n");
-               return ERR_PTR(PTR_ERR(cpts->refclk));
+               return ERR_CAST(cpts->refclk);
        }
 
        clk_prepare(cpts->refclk);
index 31ae04117f0a2e174739c47b34379fb61c14c52e..cdbddf16dd2931ba66df103c064705d5f0aef350 100644 (file)
@@ -191,7 +191,7 @@ static void cpdma_desc_pool_destroy(struct cpdma_ctlr *ctlr)
                return;
 
        WARN(gen_pool_size(pool->gen_pool) != gen_pool_avail(pool->gen_pool),
-            "cpdma_desc_pool size %d != avail %d",
+            "cpdma_desc_pool size %zd != avail %zd",
             gen_pool_size(pool->gen_pool),
             gen_pool_avail(pool->gen_pool));
        if (pool->cpumap)
@@ -1080,7 +1080,7 @@ int cpdma_chan_submit(struct cpdma_chan *chan, void *token, void *data,
        writel_relaxed(buffer, &desc->hw_buffer);
        writel_relaxed(len, &desc->hw_len);
        writel_relaxed(mode | len, &desc->hw_mode);
-       writel_relaxed(token, &desc->sw_token);
+       writel_relaxed((uintptr_t)token, &desc->sw_token);
        writel_relaxed(buffer, &desc->sw_buffer);
        writel_relaxed(len, &desc->sw_len);
        desc_read(desc, sw_len);
@@ -1121,15 +1121,15 @@ static void __cpdma_chan_free(struct cpdma_chan *chan,
        struct cpdma_desc_pool          *pool = ctlr->pool;
        dma_addr_t                      buff_dma;
        int                             origlen;
-       void                            *token;
+       uintptr_t                       token;
 
-       token      = (void *)desc_read(desc, sw_token);
+       token      = desc_read(desc, sw_token);
        buff_dma   = desc_read(desc, sw_buffer);
        origlen    = desc_read(desc, sw_len);
 
        dma_unmap_single(ctlr->dev, buff_dma, origlen, chan->dir);
        cpdma_desc_free(pool, desc, 1);
-       (*chan->handler)(token, outlen, status);
+       (*chan->handler)((void *)token, outlen, status);
 }
 
 static int __cpdma_chan_process(struct cpdma_chan *chan)
index abceea802ea1b0b351628a264aca624696e2c290..be0fec17d95de0265159054799597c530f1aaaaf 100644 (file)
@@ -1930,8 +1930,8 @@ static int davinci_emac_probe(struct platform_device *pdev)
 
        if (netif_msg_probe(priv)) {
                dev_notice(&pdev->dev, "DaVinci EMAC Probe found device "
-                          "(regs: %p, irq: %d)\n",
-                          (void *)priv->emac_base_phys, ndev->irq);
+                          "(regs: %pa, irq: %d)\n",
+                          &priv->emac_base_phys, ndev->irq);
        }
        pm_runtime_put(&pdev->dev);
 
index 3c33f4504d8e23f45e6b7668f20b1c399d5806f6..8ac72831af050c15e2434f93711e81e8dd7e5cde 100644 (file)
@@ -34,6 +34,7 @@
 #include <linux/clk.h>
 #include <linux/err.h>
 #include <linux/io.h>
+#include <linux/iopoll.h>
 #include <linux/pm_runtime.h>
 #include <linux/davinci_emac.h>
 #include <linux/of.h>
@@ -227,14 +228,14 @@ static inline int wait_for_user_access(struct davinci_mdio_data *data)
 static inline int wait_for_idle(struct davinci_mdio_data *data)
 {
        struct davinci_mdio_regs __iomem *regs = data->regs;
-       unsigned long timeout = jiffies + msecs_to_jiffies(MDIO_TIMEOUT);
+       u32 val, ret;
 
-       while (time_after(timeout, jiffies)) {
-               if (__raw_readl(&regs->control) & CONTROL_IDLE)
-                       return 0;
-       }
-       dev_err(data->dev, "timed out waiting for idle\n");
-       return -ETIMEDOUT;
+       ret = readl_poll_timeout(&regs->control, val, val & CONTROL_IDLE,
+                                0, MDIO_TIMEOUT * 1000);
+       if (ret)
+               dev_err(data->dev, "timed out waiting for idle\n");
+
+       return ret;
 }
 
 static int davinci_mdio_read(struct mii_bus *bus, int phy_id, int phy_reg)
@@ -428,12 +429,10 @@ static int davinci_mdio_probe(struct platform_device *pdev)
         * defined to support backward compatibility with DTs which assume that
         * Davinci MDIO will always scan the bus for PHYs detection.
         */
-       if (dev->of_node && of_get_child_count(dev->of_node)) {
+       if (dev->of_node && of_get_child_count(dev->of_node))
                data->skip_scan = true;
-               ret = of_mdiobus_register(data->bus, dev->of_node);
-       } else {
-               ret = mdiobus_register(data->bus);
-       }
+
+       ret = of_mdiobus_register(data->bus, dev->of_node);
        if (ret)
                goto bail_out;
 
index c180b480f8ef519ee3af35d9ecaf36a867c042d3..13e4c1eff3536c6ab1bbc578d2a92d76c6e9027f 100644 (file)
@@ -217,7 +217,7 @@ static int kiss_esc_crc(unsigned char *s, unsigned char *d, unsigned short crc,
                        c = *s++;
                else if (len > 1)
                        c = crc >> 8;
-               else if (len > 0)
+               else
                        c = crc & 0xff;
 
                len--;
index 1ab97d99b9bae9f9dde6227dacff606256e66c72..f411164880799a4ac57a3de8469cd6f4d848dce7 100644 (file)
@@ -867,7 +867,7 @@ static u32 rr_handle_event(struct net_device *dev, u32 prodidx, u32 eidx)
                               dev->name);
                        goto drop;
                case E_FRM_ERR:
-                       printk(KERN_WARNING "%s: Framming Error\n",
+                       printk(KERN_WARNING "%s: Framing Error\n",
                               dev->name);
                        goto drop;
                case E_FLG_SYN_ERR:
index 6ebe39a3dde66b9b5af230944c618e2bb542dd90..1be34d2e356357780f35b8640a6f738749992ee0 100644 (file)
@@ -110,7 +110,7 @@ struct ndis_recv_scale_param { /* NDIS_RECEIVE_SCALE_PARAMETERS */
        u16 hashkey_size;
 
        /* The offset of the secret key from the beginning of this structure */
-       u32 kashkey_offset;
+       u32 hashkey_offset;
 
        u32 processor_masks_offset;
        u32 num_processor_masks;
index e7308958b7a9d9359958ae987d84c019c15c4034..d2ee66c259a70fe305fd235a11892fea7834400e 100644 (file)
@@ -652,16 +652,14 @@ static inline void netvsc_free_send_slot(struct netvsc_device *net_device,
        sync_change_bit(index, net_device->send_section_map);
 }
 
-static void netvsc_send_tx_complete(struct netvsc_device *net_device,
-                                   struct vmbus_channel *incoming_channel,
-                                   struct hv_device *device,
+static void netvsc_send_tx_complete(struct net_device *ndev,
+                                   struct netvsc_device *net_device,
+                                   struct vmbus_channel *channel,
                                    const struct vmpacket_descriptor *desc,
                                    int budget)
 {
        struct sk_buff *skb = (struct sk_buff *)(unsigned long)desc->trans_id;
-       struct net_device *ndev = hv_get_drvdata(device);
        struct net_device_context *ndev_ctx = netdev_priv(ndev);
-       struct vmbus_channel *channel = device->channel;
        u16 q_idx = 0;
        int queue_sends;
 
@@ -675,7 +673,6 @@ static void netvsc_send_tx_complete(struct netvsc_device *net_device,
                if (send_index != NETVSC_INVALID_INDEX)
                        netvsc_free_send_slot(net_device, send_index);
                q_idx = packet->q_idx;
-               channel = incoming_channel;
 
                tx_stats = &net_device->chan_table[q_idx].tx_stats;
 
@@ -705,14 +702,13 @@ static void netvsc_send_tx_complete(struct netvsc_device *net_device,
        }
 }
 
-static void netvsc_send_completion(struct netvsc_device *net_device,
+static void netvsc_send_completion(struct net_device *ndev,
+                                  struct netvsc_device *net_device,
                                   struct vmbus_channel *incoming_channel,
-                                  struct hv_device *device,
                                   const struct vmpacket_descriptor *desc,
                                   int budget)
 {
-       struct nvsp_message *nvsp_packet = hv_pkt_data(desc);
-       struct net_device *ndev = hv_get_drvdata(device);
+       const struct nvsp_message *nvsp_packet = hv_pkt_data(desc);
 
        switch (nvsp_packet->hdr.msg_type) {
        case NVSP_MSG_TYPE_INIT_COMPLETE:
@@ -726,8 +722,8 @@ static void netvsc_send_completion(struct netvsc_device *net_device,
                break;
 
        case NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE:
-               netvsc_send_tx_complete(net_device, incoming_channel,
-                                       device, desc, budget);
+               netvsc_send_tx_complete(ndev, net_device, incoming_channel,
+                                       desc, budget);
                break;
 
        default:
@@ -1092,12 +1088,11 @@ static void enq_receive_complete(struct net_device *ndev,
 
 static int netvsc_receive(struct net_device *ndev,
                          struct netvsc_device *net_device,
-                         struct net_device_context *net_device_ctx,
-                         struct hv_device *device,
                          struct vmbus_channel *channel,
                          const struct vmpacket_descriptor *desc,
-                         struct nvsp_message *nvsp)
+                         const struct nvsp_message *nvsp)
 {
+       struct net_device_context *net_device_ctx = netdev_priv(ndev);
        const struct vmtransfer_page_packet_header *vmxferpage_packet
                = container_of(desc, const struct vmtransfer_page_packet_header, d);
        u16 q_idx = channel->offermsg.offer.sub_channel_index;
@@ -1158,13 +1153,12 @@ static int netvsc_receive(struct net_device *ndev,
        return count;
 }
 
-static void netvsc_send_table(struct hv_device *hdev,
-                             struct nvsp_message *nvmsg)
+static void netvsc_send_table(struct net_device *ndev,
+                             const struct nvsp_message *nvmsg)
 {
-       struct net_device *ndev = hv_get_drvdata(hdev);
        struct net_device_context *net_device_ctx = netdev_priv(ndev);
-       int i;
        u32 count, *tab;
+       int i;
 
        count = nvmsg->msg.v5_msg.send_table.count;
        if (count != VRSS_SEND_TAB_SIZE) {
@@ -1179,24 +1173,25 @@ static void netvsc_send_table(struct hv_device *hdev,
                net_device_ctx->tx_table[i] = tab[i];
 }
 
-static void netvsc_send_vf(struct net_device_context *net_device_ctx,
-                          struct nvsp_message *nvmsg)
+static void netvsc_send_vf(struct net_device *ndev,
+                          const struct nvsp_message *nvmsg)
 {
+       struct net_device_context *net_device_ctx = netdev_priv(ndev);
+
        net_device_ctx->vf_alloc = nvmsg->msg.v4_msg.vf_assoc.allocated;
        net_device_ctx->vf_serial = nvmsg->msg.v4_msg.vf_assoc.serial;
 }
 
-static inline void netvsc_receive_inband(struct hv_device *hdev,
-                                struct net_device_context *net_device_ctx,
-                                struct nvsp_message *nvmsg)
+static  void netvsc_receive_inband(struct net_device *ndev,
+                                  const struct nvsp_message *nvmsg)
 {
        switch (nvmsg->hdr.msg_type) {
        case NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE:
-               netvsc_send_table(hdev, nvmsg);
+               netvsc_send_table(ndev, nvmsg);
                break;
 
        case NVSP_MSG4_TYPE_SEND_VF_ASSOCIATION:
-               netvsc_send_vf(net_device_ctx, nvmsg);
+               netvsc_send_vf(ndev, nvmsg);
                break;
        }
 }
@@ -1208,24 +1203,23 @@ static int netvsc_process_raw_pkt(struct hv_device *device,
                                  const struct vmpacket_descriptor *desc,
                                  int budget)
 {
-       struct net_device_context *net_device_ctx = netdev_priv(ndev);
-       struct nvsp_message *nvmsg = hv_pkt_data(desc);
+       const struct nvsp_message *nvmsg = hv_pkt_data(desc);
 
        trace_nvsp_recv(ndev, channel, nvmsg);
 
        switch (desc->type) {
        case VM_PKT_COMP:
-               netvsc_send_completion(net_device, channel, device,
+               netvsc_send_completion(ndev, net_device, channel,
                                       desc, budget);
                break;
 
        case VM_PKT_DATA_USING_XFER_PAGES:
-               return netvsc_receive(ndev, net_device, net_device_ctx,
-                                     device, channel, desc, nvmsg);
+               return netvsc_receive(ndev, net_device, channel,
+                                     desc, nvmsg);
                break;
 
        case VM_PKT_DATA_INBAND:
-               netvsc_receive_inband(device, net_device_ctx, nvmsg);
+               netvsc_receive_inband(ndev, nvmsg);
                break;
 
        default:
index ecc84954c511053fe6c493e0a1131f544467f178..da07ccdf84bf1979fdd43dc3fc1d4d742b41c1af 100644 (file)
@@ -1840,7 +1840,8 @@ static int netvsc_vf_join(struct net_device *vf_netdev,
                goto rx_handler_failed;
        }
 
-       ret = netdev_upper_dev_link(vf_netdev, ndev, NULL);
+       ret = netdev_master_upper_dev_link(vf_netdev, ndev,
+                                          NULL, NULL, NULL);
        if (ret != 0) {
                netdev_err(vf_netdev,
                           "can not set master device %s (err = %d)\n",
index 3b6dbacaf77d1fac3bc5dd2ef4358e0da1bddf6d..7f3dab4b4cbca2a690d32ff05b1ebf961012f733 100644 (file)
@@ -752,7 +752,7 @@ int rndis_filter_set_rss_param(struct rndis_device *rdev,
        rssp->indirect_tabsize = 4*ITAB_NUM;
        rssp->indirect_taboffset = sizeof(struct ndis_recv_scale_param);
        rssp->hashkey_size = NETVSC_HASH_KEYLEN;
-       rssp->kashkey_offset = rssp->indirect_taboffset +
+       rssp->hashkey_offset = rssp->indirect_taboffset +
                               rssp->indirect_tabsize;
 
        /* Set indirection table entries */
@@ -761,7 +761,7 @@ int rndis_filter_set_rss_param(struct rndis_device *rdev,
                itab[i] = rdev->rx_table[i];
 
        /* Set hask key values */
-       keyp = (u8 *)((unsigned long)rssp + rssp->kashkey_offset);
+       keyp = (u8 *)((unsigned long)rssp + rssp->hashkey_offset);
        memcpy(keyp, rss_key, NETVSC_HASH_KEYLEN);
 
        ret = rndis_filter_send_request(rdev, request);
@@ -1316,7 +1316,7 @@ struct netvsc_device *rndis_filter_device_add(struct hv_device *dev,
                   rndis_device->link_state ? "down" : "up");
 
        if (net_device->nvsp_version < NVSP_PROTOCOL_VERSION_5)
-               return net_device;
+               goto out;
 
        rndis_filter_query_link_speed(rndis_device, net_device);
 
index 9fb9b565a002ff7c2e9f6a06c4ef6b9c4d640266..4f684cbcdc57e4ce382f1b5e57f7885ded2f2af2 100644 (file)
@@ -1045,7 +1045,7 @@ static int atusb_probe(struct usb_interface *interface,
        atusb->tx_dr.bRequest = ATUSB_TX;
        atusb->tx_dr.wValue = cpu_to_le16(0);
 
-       atusb->tx_urb = usb_alloc_urb(0, GFP_ATOMIC);
+       atusb->tx_urb = usb_alloc_urb(0, GFP_KERNEL);
        if (!atusb->tx_urb)
                goto fail;
 
index 55a22c7618082c501fe39856988c337c4bda8f41..de0d7f28a181ca4acb1da2131d82a981627a8e96 100644 (file)
@@ -1267,7 +1267,7 @@ mcr20a_probe(struct spi_device *spi)
        ret = mcr20a_get_platform_data(spi, pdata);
        if (ret < 0) {
                dev_crit(&spi->dev, "mcr20a_get_platform_data failed.\n");
-               return ret;
+               goto free_pdata;
        }
 
        /* init reset gpio */
@@ -1275,7 +1275,7 @@ mcr20a_probe(struct spi_device *spi)
                ret = devm_gpio_request_one(&spi->dev, pdata->rst_gpio,
                                            GPIOF_OUT_INIT_HIGH, "reset");
                if (ret)
-                       return ret;
+                       goto free_pdata;
        }
 
        /* reset mcr20a */
@@ -1291,7 +1291,8 @@ mcr20a_probe(struct spi_device *spi)
        hw = ieee802154_alloc_hw(sizeof(*lp), &mcr20a_hw_ops);
        if (!hw) {
                dev_crit(&spi->dev, "ieee802154_alloc_hw failed\n");
-               return -ENOMEM;
+               ret = -ENOMEM;
+               goto free_pdata;
        }
 
        /* init mcr20a local data */
@@ -1308,8 +1309,10 @@ mcr20a_probe(struct spi_device *spi)
        /* init buf */
        lp->buf = devm_kzalloc(&spi->dev, SPI_COMMAND_BUFFER, GFP_KERNEL);
 
-       if (!lp->buf)
-               return -ENOMEM;
+       if (!lp->buf) {
+               ret = -ENOMEM;
+               goto free_dev;
+       }
 
        mcr20a_setup_tx_spi_messages(lp);
        mcr20a_setup_rx_spi_messages(lp);
@@ -1366,6 +1369,8 @@ mcr20a_probe(struct spi_device *spi)
 
 free_dev:
        ieee802154_free_hw(lp->hw);
+free_pdata:
+       kfree(pdata);
 
        return ret;
 }
index 450eec264a5ea53fe0e592e467de3626321a9f6d..4377c26f714d0522ebf5d1de6ac774b6e42024ea 100644 (file)
@@ -792,8 +792,10 @@ static int ipvlan_device_event(struct notifier_block *unused,
                break;
 
        case NETDEV_CHANGEADDR:
-               list_for_each_entry(ipvlan, &port->ipvlans, pnode)
+               list_for_each_entry(ipvlan, &port->ipvlans, pnode) {
                        ether_addr_copy(ipvlan->dev->dev_addr, dev->dev_addr);
+                       call_netdevice_notifiers(NETDEV_CHANGEADDR, ipvlan->dev);
+               }
                break;
 
        case NETDEV_PRE_TYPE_CHANGE:
index 725f4b4afc6da946e967d4070b9cf76143360332..adde8fc45588ba12c82bd79cf830e3ed99b907e3 100644 (file)
@@ -514,6 +514,7 @@ static int macvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev)
        const struct macvlan_dev *vlan = netdev_priv(dev);
        const struct macvlan_port *port = vlan->port;
        const struct macvlan_dev *dest;
+       void *accel_priv = NULL;
 
        if (vlan->mode == MACVLAN_MODE_BRIDGE) {
                const struct ethhdr *eth = (void *)skb->data;
@@ -533,9 +534,14 @@ static int macvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev)
                }
        }
 
+       /* For packets that are non-multicast and not bridged we will pass
+        * the necessary information so that the lowerdev can distinguish
+        * the source of the packets via the accel_priv value.
+        */
+       accel_priv = vlan->accel_priv;
 xmit_world:
        skb->dev = vlan->lowerdev;
-       return dev_queue_xmit(skb);
+       return dev_queue_xmit_accel(skb, accel_priv);
 }
 
 static inline netdev_tx_t macvlan_netpoll_send_skb(struct macvlan_dev *vlan, struct sk_buff *skb)
@@ -552,19 +558,14 @@ static inline netdev_tx_t macvlan_netpoll_send_skb(struct macvlan_dev *vlan, str
 static netdev_tx_t macvlan_start_xmit(struct sk_buff *skb,
                                      struct net_device *dev)
 {
+       struct macvlan_dev *vlan = netdev_priv(dev);
        unsigned int len = skb->len;
        int ret;
-       struct macvlan_dev *vlan = netdev_priv(dev);
 
        if (unlikely(netpoll_tx_running(dev)))
                return macvlan_netpoll_send_skb(vlan, skb);
 
-       if (vlan->fwd_priv) {
-               skb->dev = vlan->lowerdev;
-               ret = dev_queue_xmit_accel(skb, vlan->fwd_priv);
-       } else {
-               ret = macvlan_queue_xmit(skb, dev);
-       }
+       ret = macvlan_queue_xmit(skb, dev);
 
        if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) {
                struct vlan_pcpu_stats *pcpu_stats;
@@ -613,26 +614,27 @@ static int macvlan_open(struct net_device *dev)
                goto hash_add;
        }
 
-       if (lowerdev->features & NETIF_F_HW_L2FW_DOFFLOAD) {
-               vlan->fwd_priv =
-                     lowerdev->netdev_ops->ndo_dfwd_add_station(lowerdev, dev);
-
-               /* If we get a NULL pointer back, or if we get an error
-                * then we should just fall through to the non accelerated path
-                */
-               if (IS_ERR_OR_NULL(vlan->fwd_priv)) {
-                       vlan->fwd_priv = NULL;
-               } else
-                       return 0;
-       }
-
        err = -EBUSY;
        if (macvlan_addr_busy(vlan->port, dev->dev_addr))
                goto out;
 
-       err = dev_uc_add(lowerdev, dev->dev_addr);
-       if (err < 0)
-               goto out;
+       /* Attempt to populate accel_priv which is used to offload the L2
+        * forwarding requests for unicast packets.
+        */
+       if (lowerdev->features & NETIF_F_HW_L2FW_DOFFLOAD)
+               vlan->accel_priv =
+                     lowerdev->netdev_ops->ndo_dfwd_add_station(lowerdev, dev);
+
+       /* If earlier attempt to offload failed, or accel_priv is not
+        * populated we must add the unicast address to the lower device.
+        */
+       if (IS_ERR_OR_NULL(vlan->accel_priv)) {
+               vlan->accel_priv = NULL;
+               err = dev_uc_add(lowerdev, dev->dev_addr);
+               if (err < 0)
+                       goto out;
+       }
+
        if (dev->flags & IFF_ALLMULTI) {
                err = dev_set_allmulti(lowerdev, 1);
                if (err < 0)
@@ -653,13 +655,14 @@ static int macvlan_open(struct net_device *dev)
        if (dev->flags & IFF_ALLMULTI)
                dev_set_allmulti(lowerdev, -1);
 del_unicast:
-       dev_uc_del(lowerdev, dev->dev_addr);
-out:
-       if (vlan->fwd_priv) {
+       if (vlan->accel_priv) {
                lowerdev->netdev_ops->ndo_dfwd_del_station(lowerdev,
-                                                          vlan->fwd_priv);
-               vlan->fwd_priv = NULL;
+                                                          vlan->accel_priv);
+               vlan->accel_priv = NULL;
+       } else {
+               dev_uc_del(lowerdev, dev->dev_addr);
        }
+out:
        return err;
 }
 
@@ -668,11 +671,10 @@ static int macvlan_stop(struct net_device *dev)
        struct macvlan_dev *vlan = netdev_priv(dev);
        struct net_device *lowerdev = vlan->lowerdev;
 
-       if (vlan->fwd_priv) {
+       if (vlan->accel_priv) {
                lowerdev->netdev_ops->ndo_dfwd_del_station(lowerdev,
-                                                          vlan->fwd_priv);
-               vlan->fwd_priv = NULL;
-               return 0;
+                                                          vlan->accel_priv);
+               vlan->accel_priv = NULL;
        }
 
        dev_uc_unsync(lowerdev, dev);
index edb8b9ab827fb2a7d42d0f32908b5c796dae8a74..343989f9f9d981e201bedf66520f4be97567d6af 100644 (file)
@@ -118,11 +118,18 @@ config MDIO_I2C
 
 config MDIO_MOXART
        tristate "MOXA ART MDIO interface support"
-       depends on ARCH_MOXART
+       depends on ARCH_MOXART || COMPILE_TEST
        help
          This driver supports the MDIO interface found in the network
          interface units of the MOXA ART SoC
 
+config MDIO_MSCC_MIIM
+       tristate "Microsemi MIIM interface support"
+       depends on HAS_IOMEM
+       help
+         This driver supports the MIIM (MDIO) interface found in the network
+         switches of the Microsemi SoCs
+
 config MDIO_OCTEON
        tristate "Octeon and some ThunderX SOCs MDIO buses"
        depends on 64BIT
@@ -135,7 +142,7 @@ config MDIO_OCTEON
 
 config MDIO_SUN4I
        tristate "Allwinner sun4i MDIO interface support"
-       depends on ARCH_SUNXI
+       depends on ARCH_SUNXI || COMPILE_TEST
        help
          This driver supports the MDIO interface found in the network
          interface units of the Allwinner SoC that have an EMAC (A10,
@@ -291,6 +298,11 @@ config DP83822_PHY
        ---help---
          Supports the DP83822 PHY.
 
+config DP83TC811_PHY
+       tristate "Texas Instruments DP83TC822 PHY"
+       ---help---
+         Supports the DP83TC822 PHY.
+
 config DP83848_PHY
        tristate "Texas Instruments DP83848 PHY"
        ---help---
@@ -360,6 +372,11 @@ config MICROCHIP_PHY
        help
          Supports the LAN88XX PHYs.
 
+config MICROCHIP_T1_PHY
+       tristate "Microchip T1 PHYs"
+       ---help---
+         Supports the LAN87XX PHYs.
+
 config MICROSEMI_PHY
        tristate "Microsemi PHYs"
        ---help---
index 701ca0b8717e6ab72a7de80fa7ba1f80e845b8ea..5805c0b7d60e31eb5ab3f973c580df15632c4d70 100644 (file)
@@ -34,6 +34,7 @@ obj-$(CONFIG_MDIO_GPIO)               += mdio-gpio.o
 obj-$(CONFIG_MDIO_HISI_FEMAC)  += mdio-hisi-femac.o
 obj-$(CONFIG_MDIO_I2C)         += mdio-i2c.o
 obj-$(CONFIG_MDIO_MOXART)      += mdio-moxart.o
+obj-$(CONFIG_MDIO_MSCC_MIIM)   += mdio-mscc-miim.o
 obj-$(CONFIG_MDIO_OCTEON)      += mdio-octeon.o
 obj-$(CONFIG_MDIO_SUN4I)       += mdio-sun4i.o
 obj-$(CONFIG_MDIO_THUNDER)     += mdio-thunder.o
@@ -58,6 +59,7 @@ obj-$(CONFIG_CORTINA_PHY)     += cortina.o
 obj-$(CONFIG_DAVICOM_PHY)      += davicom.o
 obj-$(CONFIG_DP83640_PHY)      += dp83640.o
 obj-$(CONFIG_DP83822_PHY)      += dp83822.o
+obj-$(CONFIG_DP83TC811_PHY)    += dp83tc811.o
 obj-$(CONFIG_DP83848_PHY)      += dp83848.o
 obj-$(CONFIG_DP83867_PHY)      += dp83867.o
 obj-$(CONFIG_FIXED_PHY)                += fixed_phy.o
@@ -71,6 +73,7 @@ obj-$(CONFIG_MESON_GXL_PHY)   += meson-gxl.o
 obj-$(CONFIG_MICREL_KS8995MA)  += spi_ks8995.o
 obj-$(CONFIG_MICREL_PHY)       += micrel.o
 obj-$(CONFIG_MICROCHIP_PHY)    += microchip.o
+obj-$(CONFIG_MICROCHIP_T1_PHY) += microchip_t1.o
 obj-$(CONFIG_MICROSEMI_PHY)    += mscc.o
 obj-$(CONFIG_NATIONAL_PHY)     += national.o
 obj-$(CONFIG_QSEMI_PHY)                += qsemi.o
index 5ad130c3da43c869b39dc8ec83ec6795aa82be7d..0876aec7328c9b437403d8f2ee6688ce963b2c01 100644 (file)
@@ -346,10 +346,6 @@ void bcm_phy_get_strings(struct phy_device *phydev, u8 *data)
 }
 EXPORT_SYMBOL_GPL(bcm_phy_get_strings);
 
-#ifndef UINT64_MAX
-#define UINT64_MAX              (u64)(~((u64)0))
-#endif
-
 /* Caller is supposed to provide appropriate storage for the library code to
  * access the shadow copy
  */
@@ -362,7 +358,7 @@ static u64 bcm_phy_get_stat(struct phy_device *phydev, u64 *shadow,
 
        val = phy_read(phydev, stat.reg);
        if (val < 0) {
-               ret = UINT64_MAX;
+               ret = U64_MAX;
        } else {
                val >>= stat.shift;
                val = val & ((1 << stat.bits) - 1);
index 3bb6b66dc7bfb0d3eb6fd905c77feefc0138afac..f9c25912eb98c8713229405bc551a4c52816eb24 100644 (file)
@@ -720,6 +720,15 @@ static struct phy_driver broadcom_drivers[] = {
        .get_strings    = bcm_phy_get_strings,
        .get_stats      = bcm53xx_phy_get_stats,
        .probe          = bcm53xx_phy_probe,
+}, {
+       .phy_id         = PHY_ID_BCM89610,
+       .phy_id_mask    = 0xfffffff0,
+       .name           = "Broadcom BCM89610",
+       .features       = PHY_GBIT_FEATURES,
+       .flags          = PHY_HAS_INTERRUPT,
+       .config_init    = bcm54xx_config_init,
+       .ack_interrupt  = bcm_phy_ack_intr,
+       .config_intr    = bcm_phy_config_intr,
 } };
 
 module_phy_driver(broadcom_drivers);
@@ -741,6 +750,7 @@ static struct mdio_device_id __maybe_unused broadcom_tbl[] = {
        { PHY_ID_BCMAC131, 0xfffffff0 },
        { PHY_ID_BCM5241, 0xfffffff0 },
        { PHY_ID_BCM5395, 0xfffffff0 },
+       { PHY_ID_BCM89610, 0xfffffff0 },
        { }
 };
 
diff --git a/drivers/net/phy/dp83tc811.c b/drivers/net/phy/dp83tc811.c
new file mode 100644 (file)
index 0000000..081d99a
--- /dev/null
@@ -0,0 +1,347 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Driver for the Texas Instruments DP83TC811 PHY
+ *
+ * Copyright (C) 2018 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ */
+
+#include <linux/ethtool.h>
+#include <linux/etherdevice.h>
+#include <linux/kernel.h>
+#include <linux/mii.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/phy.h>
+#include <linux/netdevice.h>
+
+#define DP83TC811_PHY_ID       0x2000a253
+#define DP83811_DEVADDR                0x1f
+
+#define MII_DP83811_SGMII_CTRL 0x09
+#define MII_DP83811_INT_STAT1  0x12
+#define MII_DP83811_INT_STAT2  0x13
+#define MII_DP83811_RESET_CTRL 0x1f
+
+#define DP83811_HW_RESET       BIT(15)
+#define DP83811_SW_RESET       BIT(14)
+
+/* INT_STAT1 bits */
+#define DP83811_RX_ERR_HF_INT_EN       BIT(0)
+#define DP83811_MS_TRAINING_INT_EN     BIT(1)
+#define DP83811_ANEG_COMPLETE_INT_EN   BIT(2)
+#define DP83811_ESD_EVENT_INT_EN       BIT(3)
+#define DP83811_WOL_INT_EN             BIT(4)
+#define DP83811_LINK_STAT_INT_EN       BIT(5)
+#define DP83811_ENERGY_DET_INT_EN      BIT(6)
+#define DP83811_LINK_QUAL_INT_EN       BIT(7)
+
+/* INT_STAT2 bits */
+#define DP83811_JABBER_DET_INT_EN      BIT(0)
+#define DP83811_POLARITY_INT_EN                BIT(1)
+#define DP83811_SLEEP_MODE_INT_EN      BIT(2)
+#define DP83811_OVERTEMP_INT_EN                BIT(3)
+#define DP83811_OVERVOLTAGE_INT_EN     BIT(6)
+#define DP83811_UNDERVOLTAGE_INT_EN    BIT(7)
+
+#define MII_DP83811_RXSOP1     0x04a5
+#define MII_DP83811_RXSOP2     0x04a6
+#define MII_DP83811_RXSOP3     0x04a7
+
+/* WoL Registers */
+#define MII_DP83811_WOL_CFG    0x04a0
+#define MII_DP83811_WOL_STAT   0x04a1
+#define MII_DP83811_WOL_DA1    0x04a2
+#define MII_DP83811_WOL_DA2    0x04a3
+#define MII_DP83811_WOL_DA3    0x04a4
+
+/* WoL bits */
+#define DP83811_WOL_MAGIC_EN   BIT(0)
+#define DP83811_WOL_SECURE_ON  BIT(5)
+#define DP83811_WOL_EN         BIT(7)
+#define DP83811_WOL_INDICATION_SEL BIT(8)
+#define DP83811_WOL_CLR_INDICATION BIT(11)
+
+/* SGMII CTRL bits */
+#define DP83811_TDR_AUTO               BIT(8)
+#define DP83811_SGMII_EN               BIT(12)
+#define DP83811_SGMII_AUTO_NEG_EN      BIT(13)
+#define DP83811_SGMII_TX_ERR_DIS       BIT(14)
+#define DP83811_SGMII_SOFT_RESET       BIT(15)
+
+static int dp83811_ack_interrupt(struct phy_device *phydev)
+{
+       int err;
+
+       err = phy_read(phydev, MII_DP83811_INT_STAT1);
+       if (err < 0)
+               return err;
+
+       err = phy_read(phydev, MII_DP83811_INT_STAT2);
+       if (err < 0)
+               return err;
+
+       return 0;
+}
+
+static int dp83811_set_wol(struct phy_device *phydev,
+                          struct ethtool_wolinfo *wol)
+{
+       struct net_device *ndev = phydev->attached_dev;
+       const u8 *mac;
+       u16 value;
+
+       if (wol->wolopts & (WAKE_MAGIC | WAKE_MAGICSECURE)) {
+               mac = (const u8 *)ndev->dev_addr;
+
+               if (!is_valid_ether_addr(mac))
+                       return -EINVAL;
+
+               /* MAC addresses start with byte 5, but stored in mac[0].
+                * 811 PHYs store bytes 4|5, 2|3, 0|1
+                */
+               phy_write_mmd(phydev, DP83811_DEVADDR, MII_DP83811_WOL_DA1,
+                             (mac[1] << 8) | mac[0]);
+               phy_write_mmd(phydev, DP83811_DEVADDR, MII_DP83811_WOL_DA2,
+                             (mac[3] << 8) | mac[2]);
+               phy_write_mmd(phydev, DP83811_DEVADDR, MII_DP83811_WOL_DA3,
+                             (mac[5] << 8) | mac[4]);
+
+               value = phy_read_mmd(phydev, DP83811_DEVADDR,
+                                    MII_DP83811_WOL_CFG);
+               if (wol->wolopts & WAKE_MAGIC)
+                       value |= DP83811_WOL_MAGIC_EN;
+               else
+                       value &= ~DP83811_WOL_MAGIC_EN;
+
+               if (wol->wolopts & WAKE_MAGICSECURE) {
+                       phy_write_mmd(phydev, DP83811_DEVADDR,
+                                     MII_DP83811_RXSOP1,
+                                     (wol->sopass[1] << 8) | wol->sopass[0]);
+                       phy_write_mmd(phydev, DP83811_DEVADDR,
+                                     MII_DP83811_RXSOP2,
+                                     (wol->sopass[3] << 8) | wol->sopass[2]);
+                       phy_write_mmd(phydev, DP83811_DEVADDR,
+                                     MII_DP83811_RXSOP3,
+                                     (wol->sopass[5] << 8) | wol->sopass[4]);
+                       value |= DP83811_WOL_SECURE_ON;
+               } else {
+                       value &= ~DP83811_WOL_SECURE_ON;
+               }
+
+               value |= (DP83811_WOL_EN | DP83811_WOL_INDICATION_SEL |
+                         DP83811_WOL_CLR_INDICATION);
+               phy_write_mmd(phydev, DP83811_DEVADDR, MII_DP83811_WOL_CFG,
+                             value);
+       } else {
+               value = phy_read_mmd(phydev, DP83811_DEVADDR,
+                                    MII_DP83811_WOL_CFG);
+               value &= ~DP83811_WOL_EN;
+               phy_write_mmd(phydev, DP83811_DEVADDR, MII_DP83811_WOL_CFG,
+                             value);
+       }
+
+       return 0;
+}
+
+static void dp83811_get_wol(struct phy_device *phydev,
+                           struct ethtool_wolinfo *wol)
+{
+       u16 sopass_val;
+       int value;
+
+       wol->supported = (WAKE_MAGIC | WAKE_MAGICSECURE);
+       wol->wolopts = 0;
+
+       value = phy_read_mmd(phydev, DP83811_DEVADDR, MII_DP83811_WOL_CFG);
+
+       if (value & DP83811_WOL_MAGIC_EN)
+               wol->wolopts |= WAKE_MAGIC;
+
+       if (value & DP83811_WOL_SECURE_ON) {
+               sopass_val = phy_read_mmd(phydev, DP83811_DEVADDR,
+                                         MII_DP83811_RXSOP1);
+               wol->sopass[0] = (sopass_val & 0xff);
+               wol->sopass[1] = (sopass_val >> 8);
+
+               sopass_val = phy_read_mmd(phydev, DP83811_DEVADDR,
+                                         MII_DP83811_RXSOP2);
+               wol->sopass[2] = (sopass_val & 0xff);
+               wol->sopass[3] = (sopass_val >> 8);
+
+               sopass_val = phy_read_mmd(phydev, DP83811_DEVADDR,
+                                         MII_DP83811_RXSOP3);
+               wol->sopass[4] = (sopass_val & 0xff);
+               wol->sopass[5] = (sopass_val >> 8);
+
+               wol->wolopts |= WAKE_MAGICSECURE;
+       }
+
+       /* WoL is not enabled so set wolopts to 0 */
+       if (!(value & DP83811_WOL_EN))
+               wol->wolopts = 0;
+}
+
+static int dp83811_config_intr(struct phy_device *phydev)
+{
+       int misr_status, err;
+
+       if (phydev->interrupts == PHY_INTERRUPT_ENABLED) {
+               misr_status = phy_read(phydev, MII_DP83811_INT_STAT1);
+               if (misr_status < 0)
+                       return misr_status;
+
+               misr_status |= (DP83811_RX_ERR_HF_INT_EN |
+                               DP83811_MS_TRAINING_INT_EN |
+                               DP83811_ANEG_COMPLETE_INT_EN |
+                               DP83811_ESD_EVENT_INT_EN |
+                               DP83811_WOL_INT_EN |
+                               DP83811_LINK_STAT_INT_EN |
+                               DP83811_ENERGY_DET_INT_EN |
+                               DP83811_LINK_QUAL_INT_EN);
+
+               err = phy_write(phydev, MII_DP83811_INT_STAT1, misr_status);
+               if (err < 0)
+                       return err;
+
+               misr_status = phy_read(phydev, MII_DP83811_INT_STAT2);
+               if (misr_status < 0)
+                       return misr_status;
+
+               misr_status |= (DP83811_JABBER_DET_INT_EN |
+                               DP83811_POLARITY_INT_EN |
+                               DP83811_SLEEP_MODE_INT_EN |
+                               DP83811_OVERTEMP_INT_EN |
+                               DP83811_OVERVOLTAGE_INT_EN |
+                               DP83811_UNDERVOLTAGE_INT_EN);
+
+               err = phy_write(phydev, MII_DP83811_INT_STAT2, misr_status);
+
+       } else {
+               err = phy_write(phydev, MII_DP83811_INT_STAT1, 0);
+               if (err < 0)
+                       return err;
+
+               err = phy_write(phydev, MII_DP83811_INT_STAT1, 0);
+       }
+
+       return err;
+}
+
+static int dp83811_config_aneg(struct phy_device *phydev)
+{
+       int value, err;
+
+       if (phydev->interface == PHY_INTERFACE_MODE_SGMII) {
+               value = phy_read(phydev, MII_DP83811_SGMII_CTRL);
+               if (phydev->autoneg == AUTONEG_ENABLE) {
+                       err = phy_write(phydev, MII_DP83811_SGMII_CTRL,
+                                       (DP83811_SGMII_AUTO_NEG_EN | value));
+                       if (err < 0)
+                               return err;
+               } else {
+                       err = phy_write(phydev, MII_DP83811_SGMII_CTRL,
+                                       (~DP83811_SGMII_AUTO_NEG_EN & value));
+                       if (err < 0)
+                               return err;
+               }
+       }
+
+       return genphy_config_aneg(phydev);
+}
+
+static int dp83811_config_init(struct phy_device *phydev)
+{
+       int value, err;
+
+       err = genphy_config_init(phydev);
+       if (err < 0)
+               return err;
+
+       if (phydev->interface == PHY_INTERFACE_MODE_SGMII) {
+               value = phy_read(phydev, MII_DP83811_SGMII_CTRL);
+               if (!(value & DP83811_SGMII_EN)) {
+                       err = phy_write(phydev, MII_DP83811_SGMII_CTRL,
+                                       (DP83811_SGMII_EN | value));
+                       if (err < 0)
+                               return err;
+               } else {
+                       err = phy_write(phydev, MII_DP83811_SGMII_CTRL,
+                                       (~DP83811_SGMII_EN & value));
+                       if (err < 0)
+                               return err;
+               }
+       }
+
+       value = DP83811_WOL_MAGIC_EN | DP83811_WOL_SECURE_ON | DP83811_WOL_EN;
+
+       return phy_write_mmd(phydev, DP83811_DEVADDR, MII_DP83811_WOL_CFG,
+             value);
+}
+
+static int dp83811_phy_reset(struct phy_device *phydev)
+{
+       int err;
+
+       err = phy_write(phydev, MII_DP83811_RESET_CTRL, DP83811_HW_RESET);
+       if (err < 0)
+               return err;
+
+       return 0;
+}
+
+static int dp83811_suspend(struct phy_device *phydev)
+{
+       int value;
+
+       value = phy_read_mmd(phydev, DP83811_DEVADDR, MII_DP83811_WOL_CFG);
+
+       if (!(value & DP83811_WOL_EN))
+               genphy_suspend(phydev);
+
+       return 0;
+}
+
+static int dp83811_resume(struct phy_device *phydev)
+{
+       int value;
+
+       genphy_resume(phydev);
+
+       value = phy_read_mmd(phydev, DP83811_DEVADDR, MII_DP83811_WOL_CFG);
+
+       phy_write_mmd(phydev, DP83811_DEVADDR, MII_DP83811_WOL_CFG, value |
+                     DP83811_WOL_CLR_INDICATION);
+
+       return 0;
+}
+
+static struct phy_driver dp83811_driver[] = {
+       {
+               .phy_id = DP83TC811_PHY_ID,
+               .phy_id_mask = 0xfffffff0,
+               .name = "TI DP83TC811",
+               .features = PHY_BASIC_FEATURES,
+               .flags = PHY_HAS_INTERRUPT,
+               .config_init = dp83811_config_init,
+               .config_aneg = dp83811_config_aneg,
+               .soft_reset = dp83811_phy_reset,
+               .get_wol = dp83811_get_wol,
+               .set_wol = dp83811_set_wol,
+               .ack_interrupt = dp83811_ack_interrupt,
+               .config_intr = dp83811_config_intr,
+               .suspend = dp83811_suspend,
+               .resume = dp83811_resume,
+        },
+};
+module_phy_driver(dp83811_driver);
+
+static struct mdio_device_id __maybe_unused dp83811_tbl[] = {
+       { DP83TC811_PHY_ID, 0xfffffff0 },
+       { },
+};
+MODULE_DEVICE_TABLE(mdio, dp83811_tbl);
+
+MODULE_DESCRIPTION("Texas Instruments DP83TC811 PHY driver");
+MODULE_AUTHOR("Dan Murphy <dmurphy@ti.com");
+MODULE_LICENSE("GPL");
index c22e8e38324793024d05ce6df15fc9274a5d7fdc..b8f57e9b937901fd142413c4002f39205546c35a 100644 (file)
@@ -1393,6 +1393,15 @@ static int m88e1318_set_wol(struct phy_device *phydev,
                if (err < 0)
                        goto error;
 
+               /* If WOL event happened once, the LED[2] interrupt pin
+                * will not be cleared unless we reading the interrupt status
+                * register. If interrupts are in use, the normal interrupt
+                * handling will clear the WOL event. Clear the WOL event
+                * before enabling it if !phy_interrupt_is_valid()
+                */
+               if (!phy_interrupt_is_valid(phydev))
+                       phy_read(phydev, MII_M1011_IEVENT);
+
                /* Enable the WOL interrupt */
                err = __phy_modify(phydev, MII_88E1318S_PHY_CSIER, 0,
                                   MII_88E1318S_PHY_CSIER_WOL_EIE);
@@ -1473,9 +1482,6 @@ static void marvell_get_strings(struct phy_device *phydev, u8 *data)
        }
 }
 
-#ifndef UINT64_MAX
-#define UINT64_MAX             (u64)(~((u64)0))
-#endif
 static u64 marvell_get_stat(struct phy_device *phydev, int i)
 {
        struct marvell_hw_stat stat = marvell_hw_stats[i];
@@ -1485,7 +1491,7 @@ static u64 marvell_get_stat(struct phy_device *phydev, int i)
 
        val = phy_read_paged(phydev, stat.page, stat.reg);
        if (val < 0) {
-               ret = UINT64_MAX;
+               ret = U64_MAX;
        } else {
                val = val & ((1 << stat.bits) - 1);
                priv->stats[i] += val;
index b501221819e12e32a841d46f369d50744d42415a..4e4c8daf44c308285201ecc051bc4e535e03036e 100644 (file)
@@ -179,11 +179,7 @@ static int mdio_gpio_probe(struct platform_device *pdev)
        if (!new_bus)
                return -ENODEV;
 
-       if (pdev->dev.of_node)
-               ret = of_mdiobus_register(new_bus, pdev->dev.of_node);
-       else
-               ret = mdiobus_register(new_bus);
-
+       ret = of_mdiobus_register(new_bus, pdev->dev.of_node);
        if (ret)
                mdio_gpio_bus_deinit(&pdev->dev);
 
diff --git a/drivers/net/phy/mdio-mscc-miim.c b/drivers/net/phy/mdio-mscc-miim.c
new file mode 100644 (file)
index 0000000..badbc99
--- /dev/null
@@ -0,0 +1,193 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/*
+ * Driver for the MDIO interface of Microsemi network switches.
+ *
+ * Author: Alexandre Belloni <alexandre.belloni@bootlin.com>
+ * Copyright (c) 2017 Microsemi Corporation
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/phy.h>
+#include <linux/platform_device.h>
+#include <linux/bitops.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/of_mdio.h>
+
+#define MSCC_MIIM_REG_STATUS           0x0
+#define                MSCC_MIIM_STATUS_STAT_BUSY      BIT(3)
+#define MSCC_MIIM_REG_CMD              0x8
+#define                MSCC_MIIM_CMD_OPR_WRITE         BIT(1)
+#define                MSCC_MIIM_CMD_OPR_READ          BIT(2)
+#define                MSCC_MIIM_CMD_WRDATA_SHIFT      4
+#define                MSCC_MIIM_CMD_REGAD_SHIFT       20
+#define                MSCC_MIIM_CMD_PHYAD_SHIFT       25
+#define                MSCC_MIIM_CMD_VLD               BIT(31)
+#define MSCC_MIIM_REG_DATA             0xC
+#define                MSCC_MIIM_DATA_ERROR            (BIT(16) | BIT(17))
+
+#define MSCC_PHY_REG_PHY_CFG   0x0
+#define                PHY_CFG_PHY_ENA         (BIT(0) | BIT(1) | BIT(2) | BIT(3))
+#define                PHY_CFG_PHY_COMMON_RESET BIT(4)
+#define                PHY_CFG_PHY_RESET       (BIT(5) | BIT(6) | BIT(7) | BIT(8))
+#define MSCC_PHY_REG_PHY_STATUS        0x4
+
+struct mscc_miim_dev {
+       void __iomem *regs;
+       void __iomem *phy_regs;
+};
+
+static int mscc_miim_wait_ready(struct mii_bus *bus)
+{
+       struct mscc_miim_dev *miim = bus->priv;
+       u32 val;
+
+       readl_poll_timeout(miim->regs + MSCC_MIIM_REG_STATUS, val,
+                          !(val & MSCC_MIIM_STATUS_STAT_BUSY), 100, 250000);
+       if (val & MSCC_MIIM_STATUS_STAT_BUSY)
+               return -ETIMEDOUT;
+
+       return 0;
+}
+
+static int mscc_miim_read(struct mii_bus *bus, int mii_id, int regnum)
+{
+       struct mscc_miim_dev *miim = bus->priv;
+       u32 val;
+       int ret;
+
+       ret = mscc_miim_wait_ready(bus);
+       if (ret)
+               goto out;
+
+       writel(MSCC_MIIM_CMD_VLD | (mii_id << MSCC_MIIM_CMD_PHYAD_SHIFT) |
+              (regnum << MSCC_MIIM_CMD_REGAD_SHIFT) | MSCC_MIIM_CMD_OPR_READ,
+              miim->regs + MSCC_MIIM_REG_CMD);
+
+       ret = mscc_miim_wait_ready(bus);
+       if (ret)
+               goto out;
+
+       val = readl(miim->regs + MSCC_MIIM_REG_DATA);
+       if (val & MSCC_MIIM_DATA_ERROR) {
+               ret = -EIO;
+               goto out;
+       }
+
+       ret = val & 0xFFFF;
+out:
+       return ret;
+}
+
+static int mscc_miim_write(struct mii_bus *bus, int mii_id,
+                          int regnum, u16 value)
+{
+       struct mscc_miim_dev *miim = bus->priv;
+       int ret;
+
+       ret = mscc_miim_wait_ready(bus);
+       if (ret < 0)
+               goto out;
+
+       writel(MSCC_MIIM_CMD_VLD | (mii_id << MSCC_MIIM_CMD_PHYAD_SHIFT) |
+              (regnum << MSCC_MIIM_CMD_REGAD_SHIFT) |
+              (value << MSCC_MIIM_CMD_WRDATA_SHIFT) |
+              MSCC_MIIM_CMD_OPR_WRITE,
+              miim->regs + MSCC_MIIM_REG_CMD);
+
+out:
+       return ret;
+}
+
+static int mscc_miim_reset(struct mii_bus *bus)
+{
+       struct mscc_miim_dev *miim = bus->priv;
+
+       if (miim->phy_regs) {
+               writel(0, miim->phy_regs + MSCC_PHY_REG_PHY_CFG);
+               writel(0x1ff, miim->phy_regs + MSCC_PHY_REG_PHY_CFG);
+               mdelay(500);
+       }
+
+       return 0;
+}
+
+static int mscc_miim_probe(struct platform_device *pdev)
+{
+       struct resource *res;
+       struct mii_bus *bus;
+       struct mscc_miim_dev *dev;
+       int ret;
+
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       if (!res)
+               return -ENODEV;
+
+       bus = devm_mdiobus_alloc_size(&pdev->dev, sizeof(*dev));
+       if (!bus)
+               return -ENOMEM;
+
+       bus->name = "mscc_miim";
+       bus->read = mscc_miim_read;
+       bus->write = mscc_miim_write;
+       bus->reset = mscc_miim_reset;
+       snprintf(bus->id, MII_BUS_ID_SIZE, "%s-mii", dev_name(&pdev->dev));
+       bus->parent = &pdev->dev;
+
+       dev = bus->priv;
+       dev->regs = devm_ioremap_resource(&pdev->dev, res);
+       if (IS_ERR(dev->regs)) {
+               dev_err(&pdev->dev, "Unable to map MIIM registers\n");
+               return PTR_ERR(dev->regs);
+       }
+
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+       if (res) {
+               dev->phy_regs = devm_ioremap_resource(&pdev->dev, res);
+               if (IS_ERR(dev->phy_regs)) {
+                       dev_err(&pdev->dev, "Unable to map internal phy registers\n");
+                       return PTR_ERR(dev->phy_regs);
+               }
+       }
+
+       ret = of_mdiobus_register(bus, pdev->dev.of_node);
+       if (ret < 0) {
+               dev_err(&pdev->dev, "Cannot register MDIO bus (%d)\n", ret);
+               return ret;
+       }
+
+       platform_set_drvdata(pdev, bus);
+
+       return 0;
+}
+
+static int mscc_miim_remove(struct platform_device *pdev)
+{
+       struct mii_bus *bus = platform_get_drvdata(pdev);
+
+       mdiobus_unregister(bus);
+
+       return 0;
+}
+
+static const struct of_device_id mscc_miim_match[] = {
+       { .compatible = "mscc,ocelot-miim" },
+       { }
+};
+MODULE_DEVICE_TABLE(of, mscc_miim_match);
+
+static struct platform_driver mscc_miim_driver = {
+       .probe = mscc_miim_probe,
+       .remove = mscc_miim_remove,
+       .driver = {
+               .name = "mscc-miim",
+               .of_match_table = mscc_miim_match,
+       },
+};
+
+module_platform_driver(mscc_miim_driver);
+
+MODULE_DESCRIPTION("Microsemi MIIM driver");
+MODULE_AUTHOR("Alexandre Belloni <alexandre.belloni@bootlin.com>");
+MODULE_LICENSE("Dual MIT/GPL");
index f41b224a9cdbf49ccf82d72b5052686548c005a7..3db06b40580d319a34d94ae54baf6ee61bce3ef5 100644 (file)
@@ -573,9 +573,40 @@ static int ksz9031_config_init(struct phy_device *phydev)
                ksz9031_of_load_skew_values(phydev, of_node,
                                MII_KSZ9031RN_TX_DATA_PAD_SKEW, 4,
                                tx_data_skews, 4);
+
+               /* Silicon Errata Sheet (DS80000691D or DS80000692D):
+                * When the device links in the 1000BASE-T slave mode only,
+                * the optional 125MHz reference output clock (CLK125_NDO)
+                * has wide duty cycle variation.
+                *
+                * The optional CLK125_NDO clock does not meet the RGMII
+                * 45/55 percent (min/max) duty cycle requirement and therefore
+                * cannot be used directly by the MAC side for clocking
+                * applications that have setup/hold time requirements on
+                * rising and falling clock edges.
+                *
+                * Workaround:
+                * Force the phy to be the master to receive a stable clock
+                * which meets the duty cycle requirement.
+                */
+               if (of_property_read_bool(of_node, "micrel,force-master")) {
+                       result = phy_read(phydev, MII_CTRL1000);
+                       if (result < 0)
+                               goto err_force_master;
+
+                       /* enable master mode, config & prefer master */
+                       result |= CTL1000_ENABLE_MASTER | CTL1000_AS_MASTER;
+                       result = phy_write(phydev, MII_CTRL1000, result);
+                       if (result < 0)
+                               goto err_force_master;
+               }
        }
 
        return ksz9031_center_flp_timing(phydev);
+
+err_force_master:
+       phydev_err(phydev, "failed to force the phy to master mode\n");
+       return result;
 }
 
 #define KSZ8873MLL_GLOBAL_CONTROL_4    0x06
@@ -650,9 +681,6 @@ static void kszphy_get_strings(struct phy_device *phydev, u8 *data)
        }
 }
 
-#ifndef UINT64_MAX
-#define UINT64_MAX              (u64)(~((u64)0))
-#endif
 static u64 kszphy_get_stat(struct phy_device *phydev, int i)
 {
        struct kszphy_hw_stat stat = kszphy_hw_stats[i];
@@ -662,7 +690,7 @@ static u64 kszphy_get_stat(struct phy_device *phydev, int i)
 
        val = phy_read(phydev, stat.reg);
        if (val < 0) {
-               ret = UINT64_MAX;
+               ret = U64_MAX;
        } else {
                val = val & ((1 << stat.bits) - 1);
                priv->stats[i] += val;
diff --git a/drivers/net/phy/microchip_t1.c b/drivers/net/phy/microchip_t1.c
new file mode 100644 (file)
index 0000000..b1917dd
--- /dev/null
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2018 Microchip Technology
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mii.h>
+#include <linux/phy.h>
+
+/* Interrupt Source Register */
+#define LAN87XX_INTERRUPT_SOURCE                (0x18)
+
+/* Interrupt Mask Register */
+#define LAN87XX_INTERRUPT_MASK                  (0x19)
+#define LAN87XX_MASK_LINK_UP                    (0x0004)
+#define LAN87XX_MASK_LINK_DOWN                  (0x0002)
+
+#define DRIVER_AUTHOR  "Nisar Sayed <nisar.sayed@microchip.com>"
+#define DRIVER_DESC    "Microchip LAN87XX T1 PHY driver"
+
+static int lan87xx_phy_config_intr(struct phy_device *phydev)
+{
+       int rc, val = 0;
+
+       if (phydev->interrupts == PHY_INTERRUPT_ENABLED) {
+               /* unmask all source and clear them before enable */
+               rc = phy_write(phydev, LAN87XX_INTERRUPT_MASK, 0x7FFF);
+               rc = phy_read(phydev, LAN87XX_INTERRUPT_SOURCE);
+               val = LAN87XX_MASK_LINK_UP | LAN87XX_MASK_LINK_DOWN;
+       }
+
+       rc = phy_write(phydev, LAN87XX_INTERRUPT_MASK, val);
+
+       return rc < 0 ? rc : 0;
+}
+
+static int lan87xx_phy_ack_interrupt(struct phy_device *phydev)
+{
+       int rc = phy_read(phydev, LAN87XX_INTERRUPT_SOURCE);
+
+       return rc < 0 ? rc : 0;
+}
+
+static struct phy_driver microchip_t1_phy_driver[] = {
+       {
+               .phy_id         = 0x0007c150,
+               .phy_id_mask    = 0xfffffff0,
+               .name           = "Microchip LAN87xx T1",
+
+               .features       = SUPPORTED_100baseT_Full,
+               .flags          = PHY_HAS_INTERRUPT,
+
+               .config_init    = genphy_config_init,
+               .config_aneg    = genphy_config_aneg,
+
+               .ack_interrupt  = lan87xx_phy_ack_interrupt,
+               .config_intr    = lan87xx_phy_config_intr,
+
+               .suspend        = genphy_suspend,
+               .resume         = genphy_resume,
+       }
+};
+
+module_phy_driver(microchip_t1_phy_driver);
+
+static struct mdio_device_id __maybe_unused microchip_t1_tbl[] = {
+       { 0x0007c150, 0xfffffff0 },
+       { }
+};
+
+MODULE_DEVICE_TABLE(mdio, microchip_t1_tbl);
+
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_LICENSE("GPL");
index ac23322a32e1ce57e5fac4d90699dcbe48219d26..9e4ba8e80a18af1a655fd189191cfc242c0a9722 100644 (file)
@@ -535,8 +535,17 @@ static int get_phy_id(struct mii_bus *bus, int addr, u32 *phy_id,
 
        /* Grab the bits from PHYIR1, and put them in the upper half */
        phy_reg = mdiobus_read(bus, addr, MII_PHYSID1);
-       if (phy_reg < 0)
+       if (phy_reg < 0) {
+               /* if there is no device, return without an error so scanning
+                * the bus works properly
+                */
+               if (phy_reg == -EIO || phy_reg == -ENODEV) {
+                       *phy_id = 0xffffffff;
+                       return 0;
+               }
+
                return -EIO;
+       }
 
        *phy_id = (phy_reg & 0xffff) << 16;
 
index c582b2d7546c3359e858e826ddac2dab68b481c7..af4dc4425be28101dd3e8e53b3327c3ef41f725f 100644 (file)
@@ -19,6 +19,7 @@
 #include <linux/phylink.h>
 #include <linux/rtnetlink.h>
 #include <linux/spinlock.h>
+#include <linux/timer.h>
 #include <linux/workqueue.h>
 
 #include "sfp.h"
@@ -54,6 +55,7 @@ struct phylink {
        /* The link configuration settings */
        struct phylink_link_state link_config;
        struct gpio_desc *link_gpio;
+       struct timer_list link_poll;
        void (*get_fixed_state)(struct net_device *dev,
                                struct phylink_link_state *s);
 
@@ -360,7 +362,7 @@ static void phylink_get_fixed_state(struct phylink *pl, struct phylink_link_stat
        if (pl->get_fixed_state)
                pl->get_fixed_state(pl->netdev, state);
        else if (pl->link_gpio)
-               state->link = !!gpiod_get_value(pl->link_gpio);
+               state->link = !!gpiod_get_value_cansleep(pl->link_gpio);
 }
 
 /* Flow control is resolved according to our and the link partners
@@ -500,6 +502,15 @@ static void phylink_run_resolve(struct phylink *pl)
                queue_work(system_power_efficient_wq, &pl->resolve);
 }
 
+static void phylink_fixed_poll(struct timer_list *t)
+{
+       struct phylink *pl = container_of(t, struct phylink, link_poll);
+
+       mod_timer(t, jiffies + HZ);
+
+       phylink_run_resolve(pl);
+}
+
 static const struct sfp_upstream_ops sfp_phylink_ops;
 
 static int phylink_register_sfp(struct phylink *pl,
@@ -572,6 +583,7 @@ struct phylink *phylink_create(struct net_device *ndev,
        pl->link_config.an_enabled = true;
        pl->ops = ops;
        __set_bit(PHYLINK_DISABLE_STOPPED, &pl->phylink_disable_state);
+       timer_setup(&pl->link_poll, phylink_fixed_poll, 0);
 
        bitmap_fill(pl->supported, __ETHTOOL_LINK_MODE_MASK_NBITS);
        linkmode_copy(pl->link_config.advertising, pl->supported);
@@ -612,6 +624,8 @@ void phylink_destroy(struct phylink *pl)
 {
        if (pl->sfp_bus)
                sfp_unregister_upstream(pl->sfp_bus);
+       if (!IS_ERR_OR_NULL(pl->link_gpio))
+               gpiod_put(pl->link_gpio);
 
        cancel_work_sync(&pl->resolve);
        kfree(pl);
@@ -903,6 +917,8 @@ void phylink_start(struct phylink *pl)
        clear_bit(PHYLINK_DISABLE_STOPPED, &pl->phylink_disable_state);
        phylink_run_resolve(pl);
 
+       if (pl->link_an_mode == MLO_AN_FIXED && !IS_ERR(pl->link_gpio))
+               mod_timer(&pl->link_poll, jiffies + HZ);
        if (pl->sfp_bus)
                sfp_upstream_start(pl->sfp_bus);
        if (pl->phydev)
@@ -927,6 +943,8 @@ void phylink_stop(struct phylink *pl)
                phy_stop(pl->phydev);
        if (pl->sfp_bus)
                sfp_upstream_stop(pl->sfp_bus);
+       if (pl->link_an_mode == MLO_AN_FIXED && !IS_ERR(pl->link_gpio))
+               del_timer_sync(&pl->link_poll);
 
        set_bit(PHYLINK_DISABLE_STOPPED, &pl->phylink_disable_state);
        queue_work(system_power_efficient_wq, &pl->resolve);
index 0381da78d228dc2fe7f87fabb8260cc257a3fc66..d437f4f5ed5291d21236a71ef3e36089344f9201 100644 (file)
@@ -125,13 +125,20 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
        if (id->base.br_nominal) {
                if (id->base.br_nominal != 255) {
                        br_nom = id->base.br_nominal * 100;
-                       br_min = br_nom + id->base.br_nominal * id->ext.br_min;
+                       br_min = br_nom - id->base.br_nominal * id->ext.br_min;
                        br_max = br_nom + id->base.br_nominal * id->ext.br_max;
                } else if (id->ext.br_max) {
                        br_nom = 250 * id->ext.br_max;
                        br_max = br_nom + br_nom * id->ext.br_min / 100;
                        br_min = br_nom - br_nom * id->ext.br_min / 100;
                }
+
+               /* When using passive cables, in case neither BR,min nor BR,max
+                * are specified, set br_min to 0 as the nominal value is then
+                * used as the maximum.
+                */
+               if (br_min == br_max && id->base.sfp_ct_passive)
+                       br_min = 0;
        }
 
        /* Set ethtool support from the compliance fields. */
index be399d645224bcbc74d090b924b30cd30c82cb2e..c328208388da7eafda9bf87219469bf75733fce4 100644 (file)
@@ -168,9 +168,6 @@ static void smsc_get_strings(struct phy_device *phydev, u8 *data)
        }
 }
 
-#ifndef UINT64_MAX
-#define UINT64_MAX              (u64)(~((u64)0))
-#endif
 static u64 smsc_get_stat(struct phy_device *phydev, int i)
 {
        struct smsc_hw_stat stat = smsc_hw_stats[i];
@@ -179,7 +176,7 @@ static u64 smsc_get_stat(struct phy_device *phydev, int i)
 
        val = phy_read(phydev, stat.reg);
        if (val < 0)
-               ret = UINT64_MAX;
+               ret = U64_MAX;
        else
                ret = val;
 
index 1483bc7b01e1a019e81ea98b06804f5a4361a3af..7df07337d69c90a1f1fe1ddd6611645d3ce751ee 100644 (file)
@@ -620,6 +620,10 @@ static int pppoe_connect(struct socket *sock, struct sockaddr *uservaddr,
        lock_sock(sk);
 
        error = -EINVAL;
+
+       if (sockaddr_len != sizeof(struct sockaddr_pppox))
+               goto end;
+
        if (sp->sa_protocol != PX_PROTO_OE)
                goto end;
 
index 5c77fa445d2d80f46f05d4d90d0b8dac27936b5e..9dbd390ace340811e356539fa988705cf49c8fd9 100644 (file)
@@ -1072,14 +1072,11 @@ static void team_port_leave(struct team *team, struct team_port *port)
 }
 
 #ifdef CONFIG_NET_POLL_CONTROLLER
-static int team_port_enable_netpoll(struct team *team, struct team_port *port)
+static int __team_port_enable_netpoll(struct team_port *port)
 {
        struct netpoll *np;
        int err;
 
-       if (!team->dev->npinfo)
-               return 0;
-
        np = kzalloc(sizeof(*np), GFP_KERNEL);
        if (!np)
                return -ENOMEM;
@@ -1093,6 +1090,14 @@ static int team_port_enable_netpoll(struct team *team, struct team_port *port)
        return err;
 }
 
+static int team_port_enable_netpoll(struct team_port *port)
+{
+       if (!port->team->dev->npinfo)
+               return 0;
+
+       return __team_port_enable_netpoll(port);
+}
+
 static void team_port_disable_netpoll(struct team_port *port)
 {
        struct netpoll *np = port->np;
@@ -1107,7 +1112,7 @@ static void team_port_disable_netpoll(struct team_port *port)
        kfree(np);
 }
 #else
-static int team_port_enable_netpoll(struct team *team, struct team_port *port)
+static int team_port_enable_netpoll(struct team_port *port)
 {
        return 0;
 }
@@ -1221,7 +1226,7 @@ static int team_port_add(struct team *team, struct net_device *port_dev,
                goto err_vids_add;
        }
 
-       err = team_port_enable_netpoll(team, port);
+       err = team_port_enable_netpoll(port);
        if (err) {
                netdev_err(dev, "Failed to enable netpoll on device %s\n",
                           portname);
@@ -1918,7 +1923,7 @@ static int team_netpoll_setup(struct net_device *dev,
 
        mutex_lock(&team->lock);
        list_for_each_entry(port, &team->port_list, list) {
-               err = team_port_enable_netpoll(team, port);
+               err = __team_port_enable_netpoll(port);
                if (err) {
                        __team_netpoll_cleanup(team);
                        break;
index d3c04ab9752a3fd1245d4f6103bc8ee1309c29fd..99bf3cee134525ae285aae1bd93ed9b6f69d3c7e 100644 (file)
@@ -675,15 +675,6 @@ static void tun_queue_purge(struct tun_file *tfile)
        skb_queue_purge(&tfile->sk.sk_error_queue);
 }
 
-static void tun_cleanup_tx_ring(struct tun_file *tfile)
-{
-       if (tfile->tx_ring.queue) {
-               ptr_ring_cleanup(&tfile->tx_ring, tun_ptr_free);
-               xdp_rxq_info_unreg(&tfile->xdp_rxq);
-               memset(&tfile->tx_ring, 0, sizeof(tfile->tx_ring));
-       }
-}
-
 static void __tun_detach(struct tun_file *tfile, bool clean)
 {
        struct tun_file *ntfile;
@@ -730,7 +721,9 @@ static void __tun_detach(struct tun_file *tfile, bool clean)
                            tun->dev->reg_state == NETREG_REGISTERED)
                                unregister_netdevice(tun->dev);
                }
-               tun_cleanup_tx_ring(tfile);
+               if (tun)
+                       xdp_rxq_info_unreg(&tfile->xdp_rxq);
+               ptr_ring_cleanup(&tfile->tx_ring, tun_ptr_free);
                sock_put(&tfile->sk);
        }
 }
@@ -777,14 +770,14 @@ static void tun_detach_all(struct net_device *dev)
                tun_napi_del(tun, tfile);
                /* Drop read queue */
                tun_queue_purge(tfile);
+               xdp_rxq_info_unreg(&tfile->xdp_rxq);
                sock_put(&tfile->sk);
-               tun_cleanup_tx_ring(tfile);
        }
        list_for_each_entry_safe(tfile, tmp, &tun->disabled, next) {
                tun_enable_queue(tfile);
                tun_queue_purge(tfile);
+               xdp_rxq_info_unreg(&tfile->xdp_rxq);
                sock_put(&tfile->sk);
-               tun_cleanup_tx_ring(tfile);
        }
        BUG_ON(tun->numdisabled != 0);
 
@@ -828,7 +821,8 @@ static int tun_attach(struct tun_struct *tun, struct file *file,
        }
 
        if (!tfile->detached &&
-           ptr_ring_init(&tfile->tx_ring, dev->tx_queue_len, GFP_KERNEL)) {
+           ptr_ring_resize(&tfile->tx_ring, dev->tx_queue_len,
+                           GFP_KERNEL, tun_ptr_free)) {
                err = -ENOMEM;
                goto out;
        }
@@ -2850,10 +2844,10 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
                            unsigned long arg, int ifreq_len)
 {
        struct tun_file *tfile = file->private_data;
+       struct net *net = sock_net(&tfile->sk);
        struct tun_struct *tun;
        void __user* argp = (void __user*)arg;
        struct ifreq ifr;
-       struct net *net;
        kuid_t owner;
        kgid_t group;
        int sndbuf;
@@ -2877,14 +2871,18 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
                 */
                return put_user(IFF_TUN | IFF_TAP | TUN_FEATURES,
                                (unsigned int __user*)argp);
-       } else if (cmd == TUNSETQUEUE)
+       } else if (cmd == TUNSETQUEUE) {
                return tun_set_queue(file, &ifr);
+       } else if (cmd == SIOCGSKNS) {
+               if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
+                       return -EPERM;
+               return open_related_ns(&net->ns, get_net_ns);
+       }
 
        ret = 0;
        rtnl_lock();
 
        tun = tun_get(tfile);
-       net = sock_net(&tfile->sk);
        if (cmd == TUNSETIFF) {
                ret = -EEXIST;
                if (tun)
@@ -2914,14 +2912,6 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
                tfile->ifindex = ifindex;
                goto unlock;
        }
-       if (cmd == SIOCGSKNS) {
-               ret = -EPERM;
-               if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
-                       goto unlock;
-
-               ret = open_related_ns(&net->ns, get_net_ns);
-               goto unlock;
-       }
 
        ret = -EBADFD;
        if (!tun)
@@ -3225,6 +3215,11 @@ static int tun_chr_open(struct inode *inode, struct file * file)
                                            &tun_proto, 0);
        if (!tfile)
                return -ENOMEM;
+       if (ptr_ring_init(&tfile->tx_ring, 0, GFP_KERNEL)) {
+               sk_free(&tfile->sk);
+               return -ENOMEM;
+       }
+
        RCU_INIT_POINTER(tfile->tun, NULL);
        tfile->flags = 0;
        tfile->ifindex = 0;
@@ -3245,8 +3240,6 @@ static int tun_chr_open(struct inode *inode, struct file * file)
 
        sock_set_flag(&tfile->sk, SOCK_ZEROCOPY);
 
-       memset(&tfile->tx_ring, 0, sizeof(tfile->tx_ring));
-
        return 0;
 }
 
index f28bd74ac275a039a000cc01b0448d8866f14d1f..418b0904cecb92bd29f6c91429b7e83056901b43 100644 (file)
@@ -111,6 +111,7 @@ config USB_LAN78XX
        select MII
        select PHYLIB
        select MICROCHIP_PHY
+       select FIXED_PHY
        help
          This option adds support for Microchip LAN78XX based USB 2
          & USB 3 10/100/1000 Ethernet adapters.
index c59f8afd0d73c13a8867532f95a56bb19a2b9af2..8dff87ec6d99c5dca122dcdb5d3697157564cfa2 100644 (file)
@@ -36,7 +36,7 @@
 #include <linux/irq.h>
 #include <linux/irqchip/chained_irq.h>
 #include <linux/microchipphy.h>
-#include <linux/phy.h>
+#include <linux/phy_fixed.h>
 #include <linux/of_mdio.h>
 #include <linux/of_net.h>
 #include "lan78xx.h"
@@ -44,7 +44,6 @@
 #define DRIVER_AUTHOR  "WOOJUNG HUH <woojung.huh@microchip.com>"
 #define DRIVER_DESC    "LAN78XX USB 3.0 Gigabit Ethernet Devices"
 #define DRIVER_NAME    "lan78xx"
-#define DRIVER_VERSION "1.0.6"
 
 #define TX_TIMEOUT_JIFFIES             (5 * HZ)
 #define THROTTLE_JIFFIES               (HZ / 8)
@@ -1503,7 +1502,6 @@ static void lan78xx_get_drvinfo(struct net_device *net,
        struct lan78xx_net *dev = netdev_priv(net);
 
        strncpy(info->driver, DRIVER_NAME, sizeof(info->driver));
-       strncpy(info->version, DRIVER_VERSION, sizeof(info->version));
        usb_make_path(dev->udev, info->bus_info, sizeof(info->bus_info));
 }
 
@@ -1845,12 +1843,9 @@ static int lan78xx_mdio_init(struct lan78xx_net *dev)
        }
 
        node = of_get_child_by_name(dev->udev->dev.of_node, "mdio");
-       if (node) {
-               ret = of_mdiobus_register(dev->mdiobus, node);
+       ret = of_mdiobus_register(dev->mdiobus, node);
+       if (node)
                of_node_put(node);
-       } else {
-               ret = mdiobus_register(dev->mdiobus);
-       }
        if (ret) {
                netdev_err(dev->net, "can't register MDIO bus\n");
                goto exit1;
@@ -2063,52 +2058,91 @@ static int ksz9031rnx_fixup(struct phy_device *phydev)
        return 1;
 }
 
-static int lan78xx_phy_init(struct lan78xx_net *dev)
+static struct phy_device *lan7801_phy_init(struct lan78xx_net *dev)
 {
+       u32 buf;
        int ret;
-       u32 mii_adv;
+       struct fixed_phy_status fphy_status = {
+               .link = 1,
+               .speed = SPEED_1000,
+               .duplex = DUPLEX_FULL,
+       };
        struct phy_device *phydev;
 
        phydev = phy_find_first(dev->mdiobus);
        if (!phydev) {
-               netdev_err(dev->net, "no PHY found\n");
-               return -EIO;
-       }
-
-       if ((dev->chipid == ID_REV_CHIP_ID_7800_) ||
-           (dev->chipid == ID_REV_CHIP_ID_7850_)) {
-               phydev->is_internal = true;
-               dev->interface = PHY_INTERFACE_MODE_GMII;
-
-       } else if (dev->chipid == ID_REV_CHIP_ID_7801_) {
+               netdev_dbg(dev->net, "PHY Not Found!! Registering Fixed PHY\n");
+               phydev = fixed_phy_register(PHY_POLL, &fphy_status, -1,
+                                           NULL);
+               if (IS_ERR(phydev)) {
+                       netdev_err(dev->net, "No PHY/fixed_PHY found\n");
+                       return NULL;
+               }
+               netdev_dbg(dev->net, "Registered FIXED PHY\n");
+               dev->interface = PHY_INTERFACE_MODE_RGMII;
+               ret = lan78xx_write_reg(dev, MAC_RGMII_ID,
+                                       MAC_RGMII_ID_TXC_DELAY_EN_);
+               ret = lan78xx_write_reg(dev, RGMII_TX_BYP_DLL, 0x3D00);
+               ret = lan78xx_read_reg(dev, HW_CFG, &buf);
+               buf |= HW_CFG_CLK125_EN_;
+               buf |= HW_CFG_REFCLK25_EN_;
+               ret = lan78xx_write_reg(dev, HW_CFG, buf);
+       } else {
                if (!phydev->drv) {
                        netdev_err(dev->net, "no PHY driver found\n");
-                       return -EIO;
+                       return NULL;
                }
-
                dev->interface = PHY_INTERFACE_MODE_RGMII;
-
                /* external PHY fixup for KSZ9031RNX */
                ret = phy_register_fixup_for_uid(PHY_KSZ9031RNX, 0xfffffff0,
                                                 ksz9031rnx_fixup);
                if (ret < 0) {
-                       netdev_err(dev->net, "fail to register fixup\n");
-                       return ret;
+                       netdev_err(dev->net, "Failed to register fixup for PHY_KSZ9031RNX\n");
+                       return NULL;
                }
                /* external PHY fixup for LAN8835 */
                ret = phy_register_fixup_for_uid(PHY_LAN8835, 0xfffffff0,
                                                 lan8835_fixup);
                if (ret < 0) {
-                       netdev_err(dev->net, "fail to register fixup\n");
-                       return ret;
+                       netdev_err(dev->net, "Failed to register fixup for PHY_LAN8835\n");
+                       return NULL;
                }
                /* add more external PHY fixup here if needed */
 
                phydev->is_internal = false;
-       } else {
-               netdev_err(dev->net, "unknown ID found\n");
-               ret = -EIO;
-               goto error;
+       }
+       return phydev;
+}
+
+static int lan78xx_phy_init(struct lan78xx_net *dev)
+{
+       int ret;
+       u32 mii_adv;
+       struct phy_device *phydev;
+
+       switch (dev->chipid) {
+       case ID_REV_CHIP_ID_7801_:
+               phydev = lan7801_phy_init(dev);
+               if (!phydev) {
+                       netdev_err(dev->net, "lan7801: PHY Init Failed");
+                       return -EIO;
+               }
+               break;
+
+       case ID_REV_CHIP_ID_7800_:
+       case ID_REV_CHIP_ID_7850_:
+               phydev = phy_find_first(dev->mdiobus);
+               if (!phydev) {
+                       netdev_err(dev->net, "no PHY found\n");
+                       return -EIO;
+               }
+               phydev->is_internal = true;
+               dev->interface = PHY_INTERFACE_MODE_GMII;
+               break;
+
+       default:
+               netdev_err(dev->net, "Unknown CHIP ID found\n");
+               return -EIO;
        }
 
        /* if phyirq is not set, use polling mode in phylib */
@@ -2127,6 +2161,16 @@ static int lan78xx_phy_init(struct lan78xx_net *dev)
        if (ret) {
                netdev_err(dev->net, "can't attach PHY to %s\n",
                           dev->mdiobus->id);
+               if (dev->chipid == ID_REV_CHIP_ID_7801_) {
+                       if (phy_is_pseudo_fixed_link(phydev)) {
+                               fixed_phy_unregister(phydev);
+                       } else {
+                               phy_unregister_fixup_for_uid(PHY_KSZ9031RNX,
+                                                            0xfffffff0);
+                               phy_unregister_fixup_for_uid(PHY_LAN8835,
+                                                            0xfffffff0);
+                       }
+               }
                return -EIO;
        }
 
@@ -2166,12 +2210,6 @@ static int lan78xx_phy_init(struct lan78xx_net *dev)
        dev->fc_autoneg = phydev->autoneg;
 
        return 0;
-
-error:
-       phy_unregister_fixup_for_uid(PHY_KSZ9031RNX, 0xfffffff0);
-       phy_unregister_fixup_for_uid(PHY_LAN8835, 0xfffffff0);
-
-       return ret;
 }
 
 static int lan78xx_set_rx_max_frame_length(struct lan78xx_net *dev, int size)
@@ -3569,6 +3607,7 @@ static void lan78xx_disconnect(struct usb_interface *intf)
        struct lan78xx_net              *dev;
        struct usb_device               *udev;
        struct net_device               *net;
+       struct phy_device               *phydev;
 
        dev = usb_get_intfdata(intf);
        usb_set_intfdata(intf, NULL);
@@ -3577,12 +3616,16 @@ static void lan78xx_disconnect(struct usb_interface *intf)
 
        udev = interface_to_usbdev(intf);
        net = dev->net;
+       phydev = net->phydev;
 
        phy_unregister_fixup_for_uid(PHY_KSZ9031RNX, 0xfffffff0);
        phy_unregister_fixup_for_uid(PHY_LAN8835, 0xfffffff0);
 
        phy_disconnect(net->phydev);
 
+       if (phy_is_pseudo_fixed_link(phydev))
+               fixed_phy_unregister(phydev);
+
        unregister_netdev(net);
 
        cancel_delayed_work_sync(&dev->wq);
index c853e7410f5aae3ba1447a81bc42fd525bc4dd43..42565dd33aa66a09d73b0e397f3c9872587a6843 100644 (file)
@@ -1098,6 +1098,7 @@ static const struct usb_device_id products[] = {
        {QMI_FIXED_INTF(0x05c6, 0x9080, 8)},
        {QMI_FIXED_INTF(0x05c6, 0x9083, 3)},
        {QMI_FIXED_INTF(0x05c6, 0x9084, 4)},
+       {QMI_FIXED_INTF(0x05c6, 0x90b2, 3)},    /* ublox R410M */
        {QMI_FIXED_INTF(0x05c6, 0x920d, 0)},
        {QMI_FIXED_INTF(0x05c6, 0x920d, 5)},
        {QMI_QUIRK_SET_DTR(0x05c6, 0x9625, 4)}, /* YUGA CLM920-NC5 */
@@ -1343,6 +1344,18 @@ static int qmi_wwan_probe(struct usb_interface *intf,
                id->driver_info = (unsigned long)&qmi_wwan_info;
        }
 
+       /* There are devices where the same interface number can be
+        * configured as different functions. We should only bind to
+        * vendor specific functions when matching on interface number
+        */
+       if (id->match_flags & USB_DEVICE_ID_MATCH_INT_NUMBER &&
+           desc->bInterfaceClass != USB_CLASS_VENDOR_SPEC) {
+               dev_dbg(&intf->dev,
+                       "Rejecting interface number match for class %02x\n",
+                       desc->bInterfaceClass);
+               return -ENODEV;
+       }
+
        /* Quectel EC20 quirk where we've QMI on interface 4 instead of 0 */
        if (quectel_ec20_detected(intf) && desc->bInterfaceNumber == 0) {
                dev_dbg(&intf->dev, "Quectel EC20 quirk, skipping interface 0\n");
index 3b5991734118ac4834e160a41c50894ba702cc49..f34794a76c4df71767edf5428d81d6187929d953 100644 (file)
@@ -765,7 +765,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
                        /* recalculate len if xdp.data or xdp.data_end were
                         * adjusted
                         */
-                       len = xdp.data_end - xdp.data;
+                       len = xdp.data_end - xdp.data + vi->hdr_len;
                        /* We can only create skb based on xdp_page. */
                        if (unlikely(xdp_page != page)) {
                                rcu_read_unlock();
index 9ebe2a689966d056a1b9a94e5ea557f0d0f9a55e..e454dfc9ad8f2151ddeaf2a2621033ccdce0e1c6 100644 (file)
@@ -369,6 +369,11 @@ vmxnet3_tq_tx_complete(struct vmxnet3_tx_queue *tq,
 
        gdesc = tq->comp_ring.base + tq->comp_ring.next2proc;
        while (VMXNET3_TCD_GET_GEN(&gdesc->tcd) == tq->comp_ring.gen) {
+               /* Prevent any &gdesc->tcd field from being (speculatively)
+                * read before (&gdesc->tcd)->gen is read.
+                */
+               dma_rmb();
+
                completed += vmxnet3_unmap_pkt(VMXNET3_TCD_GET_TXIDX(
                                               &gdesc->tcd), tq, adapter->pdev,
                                               adapter);
@@ -1103,6 +1108,11 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
                gdesc->txd.tci = skb_vlan_tag_get(skb);
        }
 
+       /* Ensure that the write to (&gdesc->txd)->gen will be observed after
+        * all other writes to &gdesc->txd.
+        */
+       dma_wmb();
+
        /* finally flips the GEN bit of the SOP desc. */
        gdesc->dword[2] = cpu_to_le32(le32_to_cpu(gdesc->dword[2]) ^
                                                  VMXNET3_TXD_GEN);
@@ -1298,6 +1308,12 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
                         */
                        break;
                }
+
+               /* Prevent any rcd field from being (speculatively) read before
+                * rcd->gen is read.
+                */
+               dma_rmb();
+
                BUG_ON(rcd->rqID != rq->qid && rcd->rqID != rq->qid2 &&
                       rcd->rqID != rq->dataRingQid);
                idx = rcd->rxdIdx;
@@ -1528,6 +1544,12 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
                ring->next2comp = idx;
                num_to_alloc = vmxnet3_cmd_ring_desc_avail(ring);
                ring = rq->rx_ring + ring_idx;
+
+               /* Ensure that the writes to rxd->gen bits will be observed
+                * after all other writes to rxd objects.
+                */
+               dma_wmb();
+
                while (num_to_alloc) {
                        vmxnet3_getRxDesc(rxd, &ring->base[ring->next2fill].rxd,
                                          &rxCmdDesc);
@@ -2688,7 +2710,7 @@ vmxnet3_set_mac_addr(struct net_device *netdev, void *p)
 /* ==================== initialization and cleanup routines ============ */
 
 static int
-vmxnet3_alloc_pci_resources(struct vmxnet3_adapter *adapter, bool *dma64)
+vmxnet3_alloc_pci_resources(struct vmxnet3_adapter *adapter)
 {
        int err;
        unsigned long mmio_start, mmio_len;
@@ -2700,30 +2722,12 @@ vmxnet3_alloc_pci_resources(struct vmxnet3_adapter *adapter, bool *dma64)
                return err;
        }
 
-       if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) == 0) {
-               if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)) != 0) {
-                       dev_err(&pdev->dev,
-                               "pci_set_consistent_dma_mask failed\n");
-                       err = -EIO;
-                       goto err_set_mask;
-               }
-               *dma64 = true;
-       } else {
-               if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) != 0) {
-                       dev_err(&pdev->dev,
-                               "pci_set_dma_mask failed\n");
-                       err = -EIO;
-                       goto err_set_mask;
-               }
-               *dma64 = false;
-       }
-
        err = pci_request_selected_regions(pdev, (1 << 2) - 1,
                                           vmxnet3_driver_name);
        if (err) {
                dev_err(&pdev->dev,
                        "Failed to request region for adapter: error %d\n", err);
-               goto err_set_mask;
+               goto err_enable_device;
        }
 
        pci_set_master(pdev);
@@ -2751,7 +2755,7 @@ vmxnet3_alloc_pci_resources(struct vmxnet3_adapter *adapter, bool *dma64)
        iounmap(adapter->hw_addr0);
 err_ioremap:
        pci_release_selected_regions(pdev, (1 << 2) - 1);
-err_set_mask:
+err_enable_device:
        pci_disable_device(pdev);
        return err;
 }
@@ -2945,7 +2949,7 @@ vmxnet3_close(struct net_device *netdev)
         * completion.
         */
        while (test_and_set_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state))
-               msleep(1);
+               usleep_range(1000, 2000);
 
        vmxnet3_quiesce_dev(adapter);
 
@@ -2995,7 +2999,7 @@ vmxnet3_change_mtu(struct net_device *netdev, int new_mtu)
         * completion.
         */
        while (test_and_set_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state))
-               msleep(1);
+               usleep_range(1000, 2000);
 
        if (netif_running(netdev)) {
                vmxnet3_quiesce_dev(adapter);
@@ -3254,7 +3258,7 @@ vmxnet3_probe_device(struct pci_dev *pdev,
 #endif
        };
        int err;
-       bool dma64 = false; /* stupid gcc */
+       bool dma64;
        u32 ver;
        struct net_device *netdev;
        struct vmxnet3_adapter *adapter;
@@ -3300,6 +3304,24 @@ vmxnet3_probe_device(struct pci_dev *pdev,
        adapter->rx_ring_size = VMXNET3_DEF_RX_RING_SIZE;
        adapter->rx_ring2_size = VMXNET3_DEF_RX_RING2_SIZE;
 
+       if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) == 0) {
+               if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)) != 0) {
+                       dev_err(&pdev->dev,
+                               "pci_set_consistent_dma_mask failed\n");
+                       err = -EIO;
+                       goto err_set_mask;
+               }
+               dma64 = true;
+       } else {
+               if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) != 0) {
+                       dev_err(&pdev->dev,
+                               "pci_set_dma_mask failed\n");
+                       err = -EIO;
+                       goto err_set_mask;
+               }
+               dma64 = false;
+       }
+
        spin_lock_init(&adapter->cmd_lock);
        adapter->adapter_pa = dma_map_single(&adapter->pdev->dev, adapter,
                                             sizeof(struct vmxnet3_adapter),
@@ -3307,7 +3329,7 @@ vmxnet3_probe_device(struct pci_dev *pdev,
        if (dma_mapping_error(&adapter->pdev->dev, adapter->adapter_pa)) {
                dev_err(&pdev->dev, "Failed to map dma\n");
                err = -EFAULT;
-               goto err_dma_map;
+               goto err_set_mask;
        }
        adapter->shared = dma_alloc_coherent(
                                &adapter->pdev->dev,
@@ -3358,7 +3380,7 @@ vmxnet3_probe_device(struct pci_dev *pdev,
        }
 #endif /* VMXNET3_RSS */
 
-       err = vmxnet3_alloc_pci_resources(adapter, &dma64);
+       err = vmxnet3_alloc_pci_resources(adapter);
        if (err < 0)
                goto err_alloc_pci;
 
@@ -3504,7 +3526,7 @@ vmxnet3_probe_device(struct pci_dev *pdev,
 err_alloc_shared:
        dma_unmap_single(&adapter->pdev->dev, adapter->adapter_pa,
                         sizeof(struct vmxnet3_adapter), PCI_DMA_TODEVICE);
-err_dma_map:
+err_set_mask:
        free_netdev(netdev);
        return err;
 }
@@ -3567,7 +3589,7 @@ static void vmxnet3_shutdown_device(struct pci_dev *pdev)
         * completion.
         */
        while (test_and_set_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state))
-               msleep(1);
+               usleep_range(1000, 2000);
 
        if (test_and_set_bit(VMXNET3_STATE_BIT_QUIESCED,
                             &adapter->state)) {
index 2ff27314e04739034cef408b59aed6a77cd98911..559db051a500f4f339a20ac8d9094379c31eb805 100644 (file)
@@ -600,7 +600,7 @@ vmxnet3_set_ringparam(struct net_device *netdev,
         * completion.
         */
        while (test_and_set_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state))
-               msleep(1);
+               usleep_range(1000, 2000);
 
        if (netif_running(netdev)) {
                vmxnet3_quiesce_dev(adapter);
index a3326463b71f1e26cd74b33b4480044f57ec4f31..a2c554f8a61bc3262823d9ffd551af6ccedfc299 100644 (file)
 /*
  * Version numbers
  */
-#define VMXNET3_DRIVER_VERSION_STRING   "1.4.14.0-k"
+#define VMXNET3_DRIVER_VERSION_STRING   "1.4.16.0-k"
 
-/* a 32-bit int, each byte encode a verion number in VMXNET3_DRIVER_VERSION */
-#define VMXNET3_DRIVER_VERSION_NUM      0x01040e00
+/* Each byte of this 32-bit integer encodes a version number in
+ * VMXNET3_DRIVER_VERSION_STRING.
+ */
+#define VMXNET3_DRIVER_VERSION_NUM      0x01041000
 
 #if defined(CONFIG_PCI_MSI)
        /* RSS only makes sense if MSI-X is supported. */
index db95f85751e3448302b3a6afe103396ff1b951d2..808fb30be9ad1299d0412fc4e424188b6244e267 100644 (file)
@@ -426,7 +426,7 @@ void ath6kl_connect_ap_mode_sta(struct ath6kl_vif *vif, u16 aid, u8 *mac_addr,
 {
        u8 *ies = NULL, *wpa_ie = NULL, *pos;
        size_t ies_len = 0;
-       struct station_info sinfo;
+       struct station_info *sinfo;
 
        ath6kl_dbg(ATH6KL_DBG_TRC, "new station %pM aid=%d\n", mac_addr, aid);
 
@@ -482,16 +482,20 @@ void ath6kl_connect_ap_mode_sta(struct ath6kl_vif *vif, u16 aid, u8 *mac_addr,
                           keymgmt, ucipher, auth, apsd_info);
 
        /* send event to application */
-       memset(&sinfo, 0, sizeof(sinfo));
+       sinfo = kzalloc(sizeof(*sinfo), GFP_KERNEL);
+       if (!sinfo)
+               return;
 
        /* TODO: sinfo.generation */
 
-       sinfo.assoc_req_ies = ies;
-       sinfo.assoc_req_ies_len = ies_len;
+       sinfo->assoc_req_ies = ies;
+       sinfo->assoc_req_ies_len = ies_len;
 
-       cfg80211_new_sta(vif->ndev, mac_addr, &sinfo, GFP_KERNEL);
+       cfg80211_new_sta(vif->ndev, mac_addr, sinfo, GFP_KERNEL);
 
        netif_wake_queue(vif->ndev);
+
+       kfree(sinfo);
 }
 
 void disconnect_timer_handler(struct timer_list *t)
index 524a7d6898335ee0f805af7eb7d20dc1151d8874..ebfdff4d328cbc66217222d1c1df8329e2586b52 100644 (file)
@@ -1202,8 +1202,12 @@ static const struct file_operations fops_freq = {
 static int wil_link_debugfs_show(struct seq_file *s, void *data)
 {
        struct wil6210_priv *wil = s->private;
-       struct station_info sinfo;
-       int i, rc;
+       struct station_info *sinfo;
+       int i, rc = 0;
+
+       sinfo = kzalloc(sizeof(*sinfo), GFP_KERNEL);
+       if (!sinfo)
+               return -ENOMEM;
 
        for (i = 0; i < ARRAY_SIZE(wil->sta); i++) {
                struct wil_sta_info *p = &wil->sta[i];
@@ -1231,19 +1235,21 @@ static int wil_link_debugfs_show(struct seq_file *s, void *data)
 
                vif = (mid < wil->max_vifs) ? wil->vifs[mid] : NULL;
                if (vif) {
-                       rc = wil_cid_fill_sinfo(vif, i, &sinfo);
+                       rc = wil_cid_fill_sinfo(vif, i, sinfo);
                        if (rc)
-                               return rc;
+                               goto out;
 
-                       seq_printf(s, "  Tx_mcs = %d\n", sinfo.txrate.mcs);
-                       seq_printf(s, "  Rx_mcs = %d\n", sinfo.rxrate.mcs);
-                       seq_printf(s, "  SQ     = %d\n", sinfo.signal);
+                       seq_printf(s, "  Tx_mcs = %d\n", sinfo->txrate.mcs);
+                       seq_printf(s, "  Rx_mcs = %d\n", sinfo->rxrate.mcs);
+                       seq_printf(s, "  SQ     = %d\n", sinfo->signal);
                } else {
                        seq_puts(s, "  INVALID MID\n");
                }
        }
 
-       return 0;
+out:
+       kfree(sinfo);
+       return rc;
 }
 
 static int wil_link_seq_open(struct inode *inode, struct file *file)
index 8a9bbd6bcea83b084e96e5bd5581ead6f277ade1..5d991243cdb59c341d5637dc20a82cbbc9f9f389 100644 (file)
@@ -824,7 +824,7 @@ static void wmi_evt_connect(struct wil6210_vif *vif, int id, void *d, int len)
        struct wireless_dev *wdev = vif_to_wdev(vif);
        struct wmi_connect_event *evt = d;
        int ch; /* channel number */
-       struct station_info sinfo;
+       struct station_info *sinfo;
        u8 *assoc_req_ie, *assoc_resp_ie;
        size_t assoc_req_ielen, assoc_resp_ielen;
        /* capinfo(u16) + listen_interval(u16) + IEs */
@@ -940,6 +940,7 @@ static void wmi_evt_connect(struct wil6210_vif *vif, int id, void *d, int len)
                vif->bss = NULL;
        } else if ((wdev->iftype == NL80211_IFTYPE_AP) ||
                   (wdev->iftype == NL80211_IFTYPE_P2P_GO)) {
+
                if (rc) {
                        if (disable_ap_sme)
                                /* notify new_sta has failed */
@@ -947,16 +948,22 @@ static void wmi_evt_connect(struct wil6210_vif *vif, int id, void *d, int len)
                        goto out;
                }
 
-               memset(&sinfo, 0, sizeof(sinfo));
+               sinfo = kzalloc(sizeof(*sinfo), GFP_KERNEL);
+               if (!sinfo) {
+                       rc = -ENOMEM;
+                       goto out;
+               }
 
-               sinfo.generation = wil->sinfo_gen++;
+               sinfo->generation = wil->sinfo_gen++;
 
                if (assoc_req_ie) {
-                       sinfo.assoc_req_ies = assoc_req_ie;
-                       sinfo.assoc_req_ies_len = assoc_req_ielen;
+                       sinfo->assoc_req_ies = assoc_req_ie;
+                       sinfo->assoc_req_ies_len = assoc_req_ielen;
                }
 
-               cfg80211_new_sta(ndev, evt->bssid, &sinfo, GFP_KERNEL);
+               cfg80211_new_sta(ndev, evt->bssid, sinfo, GFP_KERNEL);
+
+               kfree(sinfo);
        } else {
                wil_err(wil, "unhandled iftype %d for CID %d\n", wdev->iftype,
                        evt->cid);
index 6837064908bed38f05c4d98642c50c82d9cf984d..6b0e1ec346cb60aacd8076600033cf9ee554c462 100644 (file)
@@ -1484,7 +1484,7 @@ void b43_dma_handle_txstatus(struct b43_wldev *dev,
        int slot, firstused;
        bool frame_succeed;
        int skip;
-       static u8 err_out1, err_out2;
+       static u8 err_out1;
 
        ring = parse_cookie(dev, status->cookie, &slot);
        if (unlikely(!ring))
@@ -1518,13 +1518,13 @@ void b43_dma_handle_txstatus(struct b43_wldev *dev,
                        }
                } else {
                        /* More than a single header/data pair were missed.
-                        * Report this error once.
+                        * Report this error, and reset the controller to
+                        * revive operation.
                         */
-                       if (!err_out2)
-                               b43dbg(dev->wl,
-                                      "Out of order TX status report on DMA ring %d. Expected %d, but got %d\n",
-                                      ring->index, firstused, slot);
-                       err_out2 = 1;
+                       b43dbg(dev->wl,
+                              "Out of order TX status report on DMA ring %d. Expected %d, but got %d\n",
+                              ring->index, firstused, slot);
+                       b43_controller_restart(dev, "Out of order TX");
                        return;
                }
        }
index cfa617ddb2f12174ebccd68e19c27cb1fbe7257e..2f0c64cef65f36d2da7cd93bb17f709faeb44320 100644 (file)
@@ -1064,7 +1064,7 @@ static int dma_tx_fragment(struct b43legacy_dmaring *ring,
        meta->dmaaddr = map_descbuffer(ring, skb->data, skb->len, 1);
        /* create a bounce buffer in zone_dma on mapping failure. */
        if (b43legacy_dma_mapping_error(ring, meta->dmaaddr, skb->len, 1)) {
-               bounce_skb = alloc_skb(skb->len, GFP_ATOMIC | GFP_DMA);
+               bounce_skb = alloc_skb(skb->len, GFP_KERNEL | GFP_DMA);
                if (!bounce_skb) {
                        ring->current_slot = old_top_slot;
                        ring->used_slots = old_used_slots;
index 0b68240ec7b4c789ac8fab69b7c6adbad02b4281..a1915411c280b32a1ed7778cfb153fe90f8026f8 100644 (file)
@@ -963,6 +963,7 @@ static const struct sdio_device_id brcmf_sdmmc_ids[] = {
        BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_43340),
        BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_43341),
        BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_43362),
+       BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_43364),
        BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_4335_4339),
        BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_4339),
        BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_43430),
index 89b86251910ead25ce8f115b12f4de90cb329055..f5b405c98047ea01bcfdbbd85f85a9c6b3476922 100644 (file)
@@ -2728,9 +2728,8 @@ static s32 brcmf_inform_single_bss(struct brcmf_cfg80211_info *cfg,
                                   struct brcmf_bss_info_le *bi)
 {
        struct wiphy *wiphy = cfg_to_wiphy(cfg);
-       struct ieee80211_channel *notify_channel;
        struct cfg80211_bss *bss;
-       struct ieee80211_supported_band *band;
+       enum nl80211_band band;
        struct brcmu_chan ch;
        u16 channel;
        u32 freq;
@@ -2738,7 +2737,7 @@ static s32 brcmf_inform_single_bss(struct brcmf_cfg80211_info *cfg,
        u16 notify_interval;
        u8 *notify_ie;
        size_t notify_ielen;
-       s32 notify_signal;
+       struct cfg80211_inform_bss bss_data = {};
 
        if (le32_to_cpu(bi->length) > WL_BSS_INFO_MAX) {
                brcmf_err("Bss info is larger than buffer. Discarding\n");
@@ -2753,32 +2752,33 @@ static s32 brcmf_inform_single_bss(struct brcmf_cfg80211_info *cfg,
        channel = bi->ctl_ch;
 
        if (channel <= CH_MAX_2G_CHANNEL)
-               band = wiphy->bands[NL80211_BAND_2GHZ];
+               band = NL80211_BAND_2GHZ;
        else
-               band = wiphy->bands[NL80211_BAND_5GHZ];
+               band = NL80211_BAND_5GHZ;
 
-       freq = ieee80211_channel_to_frequency(channel, band->band);
-       notify_channel = ieee80211_get_channel(wiphy, freq);
+       freq = ieee80211_channel_to_frequency(channel, band);
+       bss_data.chan = ieee80211_get_channel(wiphy, freq);
+       bss_data.scan_width = NL80211_BSS_CHAN_WIDTH_20;
+       bss_data.boottime_ns = ktime_to_ns(ktime_get_boottime());
 
        notify_capability = le16_to_cpu(bi->capability);
        notify_interval = le16_to_cpu(bi->beacon_period);
        notify_ie = (u8 *)bi + le16_to_cpu(bi->ie_offset);
        notify_ielen = le32_to_cpu(bi->ie_length);
-       notify_signal = (s16)le16_to_cpu(bi->RSSI) * 100;
+       bss_data.signal = (s16)le16_to_cpu(bi->RSSI) * 100;
 
        brcmf_dbg(CONN, "bssid: %pM\n", bi->BSSID);
        brcmf_dbg(CONN, "Channel: %d(%d)\n", channel, freq);
        brcmf_dbg(CONN, "Capability: %X\n", notify_capability);
        brcmf_dbg(CONN, "Beacon interval: %d\n", notify_interval);
-       brcmf_dbg(CONN, "Signal: %d\n", notify_signal);
+       brcmf_dbg(CONN, "Signal: %d\n", bss_data.signal);
 
-       bss = cfg80211_inform_bss(wiphy, notify_channel,
-                                 CFG80211_BSS_FTYPE_UNKNOWN,
-                                 (const u8 *)bi->BSSID,
-                                 0, notify_capability,
-                                 notify_interval, notify_ie,
-                                 notify_ielen, notify_signal,
-                                 GFP_KERNEL);
+       bss = cfg80211_inform_bss_data(wiphy, &bss_data,
+                                      CFG80211_BSS_FTYPE_UNKNOWN,
+                                      (const u8 *)bi->BSSID,
+                                      0, notify_capability,
+                                      notify_interval, notify_ie,
+                                      notify_ielen, GFP_KERNEL);
 
        if (!bss)
                return -ENOMEM;
@@ -5498,7 +5498,7 @@ brcmf_notify_connect_status_ap(struct brcmf_cfg80211_info *cfg,
        static int generation;
        u32 event = e->event_code;
        u32 reason = e->reason;
-       struct station_info sinfo;
+       struct station_info *sinfo;
 
        brcmf_dbg(CONN, "event %s (%u), reason %d\n",
                  brcmf_fweh_event_name(event), event, reason);
@@ -5511,16 +5511,22 @@ brcmf_notify_connect_status_ap(struct brcmf_cfg80211_info *cfg,
 
        if (((event == BRCMF_E_ASSOC_IND) || (event == BRCMF_E_REASSOC_IND)) &&
            (reason == BRCMF_E_STATUS_SUCCESS)) {
-               memset(&sinfo, 0, sizeof(sinfo));
                if (!data) {
                        brcmf_err("No IEs present in ASSOC/REASSOC_IND");
                        return -EINVAL;
                }
-               sinfo.assoc_req_ies = data;
-               sinfo.assoc_req_ies_len = e->datalen;
+
+               sinfo = kzalloc(sizeof(*sinfo), GFP_KERNEL);
+               if (!sinfo)
+                       return -ENOMEM;
+
+               sinfo->assoc_req_ies = data;
+               sinfo->assoc_req_ies_len = e->datalen;
                generation++;
-               sinfo.generation = generation;
-               cfg80211_new_sta(ndev, e->addr, &sinfo, GFP_KERNEL);
+               sinfo->generation = generation;
+               cfg80211_new_sta(ndev, e->addr, sinfo, GFP_KERNEL);
+
+               kfree(sinfo);
        } else if ((event == BRCMF_E_DISASSOC_IND) ||
                   (event == BRCMF_E_DEAUTH_IND) ||
                   (event == BRCMF_E_DEAUTH)) {
@@ -6512,6 +6518,7 @@ static int brcmf_setup_wiphy(struct wiphy *wiphy, struct brcmf_if *ifp)
 
        wiphy->flags |= WIPHY_FLAG_NETNS_OK |
                        WIPHY_FLAG_PS_ON_BY_DEFAULT |
+                       WIPHY_FLAG_HAVE_AP_SME |
                        WIPHY_FLAG_OFFCHAN_TX |
                        WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL;
        if (brcmf_feat_is_enabled(ifp, BRCMF_FEAT_TDLS))
index 9277f4c2bfebfbf5aaf3ff77fe512c1535ef9c23..9095b830ae4d7a8146d9d77b4f818c825d541635 100644 (file)
@@ -459,7 +459,7 @@ static void brcmf_fw_free_request(struct brcmf_fw_request *req)
        kfree(req);
 }
 
-static void brcmf_fw_request_nvram_done(const struct firmware *fw, void *ctx)
+static int brcmf_fw_request_nvram_done(const struct firmware *fw, void *ctx)
 {
        struct brcmf_fw *fwctx = ctx;
        struct brcmf_fw_item *cur;
@@ -498,13 +498,10 @@ static void brcmf_fw_request_nvram_done(const struct firmware *fw, void *ctx)
        brcmf_dbg(TRACE, "nvram %p len %d\n", nvram, nvram_length);
        cur->nv_data.data = nvram;
        cur->nv_data.len = nvram_length;
-       return;
+       return 0;
 
 fail:
-       brcmf_dbg(TRACE, "failed: dev=%s\n", dev_name(fwctx->dev));
-       fwctx->done(fwctx->dev, -ENOENT, NULL);
-       brcmf_fw_free_request(fwctx->req);
-       kfree(fwctx);
+       return -ENOENT;
 }
 
 static int brcmf_fw_request_next_item(struct brcmf_fw *fwctx, bool async)
@@ -553,20 +550,27 @@ static void brcmf_fw_request_done(const struct firmware *fw, void *ctx)
        brcmf_dbg(TRACE, "enter: firmware %s %sfound\n", cur->path,
                  fw ? "" : "not ");
 
-       if (fw) {
-               if (cur->type == BRCMF_FW_TYPE_BINARY)
-                       cur->binary = fw;
-               else if (cur->type == BRCMF_FW_TYPE_NVRAM)
-                       brcmf_fw_request_nvram_done(fw, fwctx);
-               else
-                       release_firmware(fw);
-       } else if (cur->type == BRCMF_FW_TYPE_NVRAM) {
-               brcmf_fw_request_nvram_done(NULL, fwctx);
-       } else if (!(cur->flags & BRCMF_FW_REQF_OPTIONAL)) {
+       if (!fw)
                ret = -ENOENT;
+
+       switch (cur->type) {
+       case BRCMF_FW_TYPE_NVRAM:
+               ret = brcmf_fw_request_nvram_done(fw, fwctx);
+               break;
+       case BRCMF_FW_TYPE_BINARY:
+               cur->binary = fw;
+               break;
+       default:
+               /* something fishy here so bail out early */
+               brcmf_err("unknown fw type: %d\n", cur->type);
+               release_firmware(fw);
+               ret = -EINVAL;
                goto fail;
        }
 
+       if (ret < 0 && !(cur->flags & BRCMF_FW_REQF_OPTIONAL))
+               goto fail;
+
        do {
                if (++fwctx->curpos == fwctx->req->n_items) {
                        ret = 0;
@@ -630,7 +634,7 @@ int brcmf_fw_get_firmwares(struct device *dev, struct brcmf_fw_request *req,
 
 struct brcmf_fw_request *
 brcmf_fw_alloc_request(u32 chip, u32 chiprev,
-                      struct brcmf_firmware_mapping mapping_table[],
+                      const struct brcmf_firmware_mapping mapping_table[],
                       u32 table_size, struct brcmf_fw_name *fwnames,
                       u32 n_fwnames)
 {
index 79a21095c349dca59cf523d7be0fada2bfc61083..2893e56910f02ead0d910c8d71bd0d4a71ecd2c7 100644 (file)
@@ -80,7 +80,7 @@ struct brcmf_fw_name {
 
 struct brcmf_fw_request *
 brcmf_fw_alloc_request(u32 chip, u32 chiprev,
-                      struct brcmf_firmware_mapping mapping_table[],
+                      const struct brcmf_firmware_mapping mapping_table[],
                       u32 table_size, struct brcmf_fw_name *fwnames,
                       u32 n_fwnames);
 
index f93ba6be1ef877ff780476f00b04fd59e4ef3674..692235d252779935ad33f266a842de7c5f933fe6 100644 (file)
 #define BRCMF_H2D_MSGRING_CONTROL_SUBMIT_ITEMSIZE      40
 #define BRCMF_H2D_MSGRING_RXPOST_SUBMIT_ITEMSIZE       32
 #define BRCMF_D2H_MSGRING_CONTROL_COMPLETE_ITEMSIZE    24
-#define BRCMF_D2H_MSGRING_TX_COMPLETE_ITEMSIZE         16
-#define BRCMF_D2H_MSGRING_RX_COMPLETE_ITEMSIZE         32
+#define BRCMF_D2H_MSGRING_TX_COMPLETE_ITEMSIZE_PRE_V7  16
+#define BRCMF_D2H_MSGRING_TX_COMPLETE_ITEMSIZE         24
+#define BRCMF_D2H_MSGRING_RX_COMPLETE_ITEMSIZE_PRE_V7  32
+#define BRCMF_D2H_MSGRING_RX_COMPLETE_ITEMSIZE         40
 #define BRCMF_H2D_TXFLOWRING_ITEMSIZE                  48
 
 struct msgbuf_buf_addr {
index bcef208a81a512dd8f19918731721e20ed1dcc2e..4b2149b483626074816caaeeaf76be79a0741278 100644 (file)
@@ -2073,6 +2073,13 @@ static struct wireless_dev *brcmf_p2p_create_p2pdev(struct brcmf_p2p_info *p2p,
        }
 
        pri_ifp = p2p->bss_idx[P2PAPI_BSSCFG_PRIMARY].vif->ifp;
+
+       /* firmware requires unique mac address for p2pdev interface */
+       if (addr && ether_addr_equal(addr, pri_ifp->mac_addr)) {
+               brcmf_err("discovery vif must be different from primary interface\n");
+               return ERR_PTR(-EINVAL);
+       }
+
        brcmf_p2p_generate_bss_mac(p2p, addr);
        brcmf_p2p_set_firmware(pri_ifp, p2p->dev_addr);
 
index 6e25ff94dd07a33926bd7c60820877a02024778a..f0797aeada67f9c44c0ac3d80d32ee83f599d555 100644 (file)
@@ -59,7 +59,7 @@ BRCMF_FW_DEF(4366B, "brcmfmac4366b-pcie");
 BRCMF_FW_DEF(4366C, "brcmfmac4366c-pcie");
 BRCMF_FW_DEF(4371, "brcmfmac4371-pcie");
 
-static struct brcmf_firmware_mapping brcmf_pcie_fwnames[] = {
+static const struct brcmf_firmware_mapping brcmf_pcie_fwnames[] = {
        BRCMF_FW_ENTRY(BRCM_CC_43602_CHIP_ID, 0xFFFFFFFF, 43602),
        BRCMF_FW_ENTRY(BRCM_CC_43465_CHIP_ID, 0xFFFFFFF0, 4366C),
        BRCMF_FW_ENTRY(BRCM_CC_4350_CHIP_ID, 0x000000FF, 4350C),
@@ -105,7 +105,8 @@ static struct brcmf_firmware_mapping brcmf_pcie_fwnames[] = {
 #define BRCMF_PCIE_PCIE2REG_MAILBOXMASK                0x4C
 #define BRCMF_PCIE_PCIE2REG_CONFIGADDR         0x120
 #define BRCMF_PCIE_PCIE2REG_CONFIGDATA         0x124
-#define BRCMF_PCIE_PCIE2REG_H2D_MAILBOX                0x140
+#define BRCMF_PCIE_PCIE2REG_H2D_MAILBOX_0      0x140
+#define BRCMF_PCIE_PCIE2REG_H2D_MAILBOX_1      0x144
 
 #define BRCMF_PCIE2_INTA                       0x01
 #define BRCMF_PCIE2_INTB                       0x02
@@ -135,11 +136,13 @@ static struct brcmf_firmware_mapping brcmf_pcie_fwnames[] = {
                                                 BRCMF_PCIE_MB_INT_D2H3_DB0 | \
                                                 BRCMF_PCIE_MB_INT_D2H3_DB1)
 
+#define BRCMF_PCIE_SHARED_VERSION_7            7
 #define BRCMF_PCIE_MIN_SHARED_VERSION          5
-#define BRCMF_PCIE_MAX_SHARED_VERSION          6
+#define BRCMF_PCIE_MAX_SHARED_VERSION          BRCMF_PCIE_SHARED_VERSION_7
 #define BRCMF_PCIE_SHARED_VERSION_MASK         0x00FF
 #define BRCMF_PCIE_SHARED_DMA_INDEX            0x10000
 #define BRCMF_PCIE_SHARED_DMA_2B_IDX           0x100000
+#define BRCMF_PCIE_SHARED_HOSTRDY_DB1          0x10000000
 
 #define BRCMF_PCIE_FLAGS_HTOD_SPLIT            0x4000
 #define BRCMF_PCIE_FLAGS_DTOH_SPLIT            0x8000
@@ -316,6 +319,14 @@ static const u32 brcmf_ring_max_item[BRCMF_NROF_COMMON_MSGRINGS] = {
        BRCMF_D2H_MSGRING_RX_COMPLETE_MAX_ITEM
 };
 
+static const u32 brcmf_ring_itemsize_pre_v7[BRCMF_NROF_COMMON_MSGRINGS] = {
+       BRCMF_H2D_MSGRING_CONTROL_SUBMIT_ITEMSIZE,
+       BRCMF_H2D_MSGRING_RXPOST_SUBMIT_ITEMSIZE,
+       BRCMF_D2H_MSGRING_CONTROL_COMPLETE_ITEMSIZE,
+       BRCMF_D2H_MSGRING_TX_COMPLETE_ITEMSIZE_PRE_V7,
+       BRCMF_D2H_MSGRING_RX_COMPLETE_ITEMSIZE_PRE_V7
+};
+
 static const u32 brcmf_ring_itemsize[BRCMF_NROF_COMMON_MSGRINGS] = {
        BRCMF_H2D_MSGRING_CONTROL_SUBMIT_ITEMSIZE,
        BRCMF_H2D_MSGRING_RXPOST_SUBMIT_ITEMSIZE,
@@ -782,6 +793,12 @@ static void brcmf_pcie_intr_enable(struct brcmf_pciedev_info *devinfo)
                               BRCMF_PCIE_MB_INT_FN0_1);
 }
 
+static void brcmf_pcie_hostready(struct brcmf_pciedev_info *devinfo)
+{
+       if (devinfo->shared.flags & BRCMF_PCIE_SHARED_HOSTRDY_DB1)
+               brcmf_pcie_write_reg32(devinfo,
+                                      BRCMF_PCIE_PCIE2REG_H2D_MAILBOX_1, 1);
+}
 
 static irqreturn_t brcmf_pcie_quick_check_isr(int irq, void *arg)
 {
@@ -924,7 +941,7 @@ static int brcmf_pcie_ring_mb_ring_bell(void *ctx)
 
        brcmf_dbg(PCIE, "RING !\n");
        /* Any arbitrary value will do, lets use 1 */
-       brcmf_pcie_write_reg32(devinfo, BRCMF_PCIE_PCIE2REG_H2D_MAILBOX, 1);
+       brcmf_pcie_write_reg32(devinfo, BRCMF_PCIE_PCIE2REG_H2D_MAILBOX_0, 1);
 
        return 0;
 }
@@ -999,8 +1016,14 @@ brcmf_pcie_alloc_dma_and_ring(struct brcmf_pciedev_info *devinfo, u32 ring_id,
        struct brcmf_pcie_ringbuf *ring;
        u32 size;
        u32 addr;
+       const u32 *ring_itemsize_array;
+
+       if (devinfo->shared.version < BRCMF_PCIE_SHARED_VERSION_7)
+               ring_itemsize_array = brcmf_ring_itemsize_pre_v7;
+       else
+               ring_itemsize_array = brcmf_ring_itemsize;
 
-       size = brcmf_ring_max_item[ring_id] * brcmf_ring_itemsize[ring_id];
+       size = brcmf_ring_max_item[ring_id] * ring_itemsize_array[ring_id];
        dma_buf = brcmf_pcie_init_dmabuffer_for_device(devinfo, size,
                        tcm_ring_phys_addr + BRCMF_RING_MEM_BASE_ADDR_OFFSET,
                        &dma_handle);
@@ -1010,7 +1033,7 @@ brcmf_pcie_alloc_dma_and_ring(struct brcmf_pciedev_info *devinfo, u32 ring_id,
        addr = tcm_ring_phys_addr + BRCMF_RING_MAX_ITEM_OFFSET;
        brcmf_pcie_write_tcm16(devinfo, addr, brcmf_ring_max_item[ring_id]);
        addr = tcm_ring_phys_addr + BRCMF_RING_LEN_ITEMS_OFFSET;
-       brcmf_pcie_write_tcm16(devinfo, addr, brcmf_ring_itemsize[ring_id]);
+       brcmf_pcie_write_tcm16(devinfo, addr, ring_itemsize_array[ring_id]);
 
        ring = kzalloc(sizeof(*ring), GFP_KERNEL);
        if (!ring) {
@@ -1019,7 +1042,7 @@ brcmf_pcie_alloc_dma_and_ring(struct brcmf_pciedev_info *devinfo, u32 ring_id,
                return NULL;
        }
        brcmf_commonring_config(&ring->commonring, brcmf_ring_max_item[ring_id],
-                               brcmf_ring_itemsize[ring_id], dma_buf);
+                               ring_itemsize_array[ring_id], dma_buf);
        ring->dma_handle = dma_handle;
        ring->devinfo = devinfo;
        brcmf_commonring_register_cb(&ring->commonring,
@@ -1728,6 +1751,7 @@ static void brcmf_pcie_setup(struct device *dev, int ret,
        init_waitqueue_head(&devinfo->mbdata_resp_wait);
 
        brcmf_pcie_intr_enable(devinfo);
+       brcmf_pcie_hostready(devinfo);
        if (brcmf_attach(&devinfo->pdev->dev, devinfo->settings) == 0)
                return;
 
@@ -1950,6 +1974,7 @@ static int brcmf_pcie_pm_leave_D3(struct device *dev)
                brcmf_pcie_select_core(devinfo, BCMA_CORE_PCIE2);
                brcmf_bus_change_state(bus, BRCMF_BUS_UP);
                brcmf_pcie_intr_enable(devinfo);
+               brcmf_pcie_hostready(devinfo);
                return 0;
        }
 
index 1037df7297bb6e9f1a19801b1c35555739c1e772..412a05b9a2b2720d619f89af5a9c3ddf42be735a 100644 (file)
@@ -619,7 +619,7 @@ BRCMF_FW_DEF(4354, "brcmfmac4354-sdio");
 BRCMF_FW_DEF(4356, "brcmfmac4356-sdio");
 BRCMF_FW_DEF(4373, "brcmfmac4373-sdio");
 
-static struct brcmf_firmware_mapping brcmf_sdio_fwnames[] = {
+static const struct brcmf_firmware_mapping brcmf_sdio_fwnames[] = {
        BRCMF_FW_ENTRY(BRCM_CC_43143_CHIP_ID, 0xFFFFFFFF, 43143),
        BRCMF_FW_ENTRY(BRCM_CC_43241_CHIP_ID, 0x0000001F, 43241B0),
        BRCMF_FW_ENTRY(BRCM_CC_43241_CHIP_ID, 0x00000020, 43241B4),
index a0873adcc01c89243939e2dda810f09111990137..a4308c6e72d79559a43b1ddad868072c4f8c25df 100644 (file)
@@ -52,7 +52,7 @@ BRCMF_FW_DEF(43242A, "brcmfmac43242a");
 BRCMF_FW_DEF(43569, "brcmfmac43569");
 BRCMF_FW_DEF(4373, "brcmfmac4373");
 
-static struct brcmf_firmware_mapping brcmf_usb_fwnames[] = {
+static const struct brcmf_firmware_mapping brcmf_usb_fwnames[] = {
        BRCMF_FW_ENTRY(BRCM_CC_43143_CHIP_ID, 0xFFFFFFFF, 43143),
        BRCMF_FW_ENTRY(BRCM_CC_43235_CHIP_ID, 0x00000008, 43236B),
        BRCMF_FW_ENTRY(BRCM_CC_43236_CHIP_ID, 0x00000008, 43236B),
index 236b524235062f13697b8e030ac01ba7a67b7426..7c4f550a1475d24dd87a6c075b8ff423a250243b 100644 (file)
@@ -3732,7 +3732,7 @@ IPW2100_ORD(STAT_TX_HOST_REQUESTS, "requested Host Tx's (MSDU)"),
            IPW2100_ORD(ASSOCIATED_AP_PTR,
                                "0 if not associated, else pointer to AP table entry"),
            IPW2100_ORD(AVAILABLE_AP_CNT,
-                               "AP's decsribed in the AP table"),
+                               "AP's described in the AP table"),
            IPW2100_ORD(AP_LIST_PTR, "Ptr to list of available APs"),
            IPW2100_ORD(STAT_AP_ASSNS, "associations"),
            IPW2100_ORD(STAT_ASSN_FAIL, "association failures"),
index 193947865efd0623034cee61cf1ebbfba5e86dd5..ce3e35f6b60f485faec5b0a9ba2ba9b35a435f8c 100644 (file)
@@ -1009,7 +1009,7 @@ typedef enum _ORDINAL_TABLE_1 {   // NS - means Not Supported by FW
        IPW_ORD_STAT_PERCENT_RETRIES,   // current calculation of % missed tx retries
        IPW_ORD_ASSOCIATED_AP_PTR,      // If associated, this is ptr to the associated
        // AP table entry. set to 0 if not associated
-       IPW_ORD_AVAILABLE_AP_CNT,       // # of AP's decsribed in the AP table
+       IPW_ORD_AVAILABLE_AP_CNT,       // # of AP's described in the AP table
        IPW_ORD_AP_LIST_PTR,    // Ptr to list of available APs
        IPW_ORD_STAT_AP_ASSNS,  // # of associations
        IPW_ORD_STAT_ASSN_FAIL, // # of association failures
index 87a5e414c2f7d863d206a949ba0a4dba75d7ddb7..ba3fb1d2ddb43b003cf0829cced21d9500d36e83 100644 (file)
@@ -12012,7 +12012,7 @@ MODULE_PARM_DESC(rtap_iface, "create the rtap interface (1 - create, default 0)"
 
 #ifdef CONFIG_IPW2200_QOS
 module_param(qos_enable, int, 0444);
-MODULE_PARM_DESC(qos_enable, "enable all QoS functionalitis");
+MODULE_PARM_DESC(qos_enable, "enable all QoS functionalities");
 
 module_param(qos_burst_enable, int, 0444);
 MODULE_PARM_DESC(qos_burst_enable, "enable QoS burst mode");
index b2573b1d15065b088de26863baa1a3d453008507..5916879849621dc079a1cb0cd61848201057527d 100644 (file)
@@ -1,6 +1,7 @@
 /******************************************************************************
  *
  * Copyright(c) 2008 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2018 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License as
@@ -27,7 +28,6 @@
 #include <linux/module.h>
 #include <linux/stringify.h>
 #include "iwl-config.h"
-#include "iwl-csr.h"
 #include "iwl-agn-hw.h"
 
 /* Highest firmware API version supported */
@@ -91,7 +91,8 @@ static const struct iwl_eeprom_params iwl1000_eeprom_params = {
        .base_params = &iwl1000_base_params,                    \
        .eeprom_params = &iwl1000_eeprom_params,                \
        .led_mode = IWL_LED_BLINK,                              \
-       .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K
+       .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K,    \
+       .csr = &iwl_csr_v1
 
 const struct iwl_cfg iwl1000_bgn_cfg = {
        .name = "Intel(R) Centrino(R) Wireless-N 1000 BGN",
@@ -117,7 +118,8 @@ const struct iwl_cfg iwl1000_bg_cfg = {
        .eeprom_params = &iwl1000_eeprom_params,                \
        .led_mode = IWL_LED_RF_STATE,                           \
        .rx_with_siso_diversity = true,                         \
-       .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K
+       .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K,    \
+       .csr = &iwl_csr_v1
 
 const struct iwl_cfg iwl100_bgn_cfg = {
        .name = "Intel(R) Centrino(R) Wireless-N 100 BGN",
index 1b32ad413b9e04ef2c980e7b741fcb27f8b57007..a63ca8820568ac6daf8d0bfbdf23c5e44d77b85b 100644 (file)
@@ -1,6 +1,7 @@
 /******************************************************************************
  *
  * Copyright(c) 2008 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2018 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License as
@@ -115,7 +116,8 @@ static const struct iwl_eeprom_params iwl20x0_eeprom_params = {
        .base_params = &iwl2000_base_params,                    \
        .eeprom_params = &iwl20x0_eeprom_params,                \
        .led_mode = IWL_LED_RF_STATE,                           \
-       .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K
+       .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K,    \
+       .csr = &iwl_csr_v1
 
 
 const struct iwl_cfg iwl2000_2bgn_cfg = {
@@ -142,7 +144,8 @@ const struct iwl_cfg iwl2000_2bgn_d_cfg = {
        .base_params = &iwl2030_base_params,                    \
        .eeprom_params = &iwl20x0_eeprom_params,                \
        .led_mode = IWL_LED_RF_STATE,                           \
-       .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K
+       .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K,    \
+       .csr = &iwl_csr_v1
 
 const struct iwl_cfg iwl2030_2bgn_cfg = {
        .name = "Intel(R) Centrino(R) Wireless-N 2230 BGN",
@@ -163,7 +166,8 @@ const struct iwl_cfg iwl2030_2bgn_cfg = {
        .eeprom_params = &iwl20x0_eeprom_params,                \
        .led_mode = IWL_LED_RF_STATE,                           \
        .rx_with_siso_diversity = true,                         \
-       .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K
+       .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K,    \
+       .csr = &iwl_csr_v1
 
 const struct iwl_cfg iwl105_bgn_cfg = {
        .name = "Intel(R) Centrino(R) Wireless-N 105 BGN",
@@ -190,7 +194,8 @@ const struct iwl_cfg iwl105_bgn_d_cfg = {
        .eeprom_params = &iwl20x0_eeprom_params,                \
        .led_mode = IWL_LED_RF_STATE,                           \
        .rx_with_siso_diversity = true,                         \
-       .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K
+       .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K,    \
+       .csr = &iwl_csr_v1
 
 const struct iwl_cfg iwl135_bgn_cfg = {
        .name = "Intel(R) Centrino(R) Wireless-N 135 BGN",
index f2b6e0e8d787e6edb1dd41745ccbfefb5e743bec..d4ba66aecdc94ada5b66545afcde6ff88a8756b0 100644 (file)
@@ -54,7 +54,6 @@
 #include <linux/module.h>
 #include <linux/stringify.h>
 #include "iwl-config.h"
-#include "iwl-agn-hw.h"
 
 /* Highest firmware API version supported */
 #define IWL_22000_UCODE_API_MAX        38
@@ -115,8 +114,6 @@ static const struct iwl_ht_params iwl_22000_ht_params = {
        .ucode_api_max = IWL_22000_UCODE_API_MAX,                       \
        .ucode_api_min = IWL_22000_UCODE_API_MIN,                       \
        .device_family = IWL_DEVICE_FAMILY_22000,                       \
-       .max_inst_size = IWL60_RTC_INST_SIZE,                           \
-       .max_data_size = IWL60_RTC_DATA_SIZE,                           \
        .base_params = &iwl_22000_base_params,                          \
        .led_mode = IWL_LED_RF_STATE,                                   \
        .nvm_hw_section_num = NVM_HW_SECTION_NUM_FAMILY_22000,          \
@@ -143,6 +140,7 @@ const struct iwl_cfg iwl22000_2ac_cfg_hr = {
        .name = "Intel(R) Dual Band Wireless AC 22000",
        .fw_name_pre = IWL_22000_HR_FW_PRE,
        IWL_DEVICE_22000,
+       .csr = &iwl_csr_v1,
        .ht_params = &iwl_22000_ht_params,
        .nvm_ver = IWL_22000_NVM_VERSION,
        .nvm_calib_ver = IWL_22000_TX_POWER_VERSION,
@@ -153,6 +151,7 @@ const struct iwl_cfg iwl22000_2ac_cfg_hr_cdb = {
        .name = "Intel(R) Dual Band Wireless AC 22000",
        .fw_name_pre = IWL_22000_HR_CDB_FW_PRE,
        IWL_DEVICE_22000,
+       .csr = &iwl_csr_v1,
        .ht_params = &iwl_22000_ht_params,
        .nvm_ver = IWL_22000_NVM_VERSION,
        .nvm_calib_ver = IWL_22000_TX_POWER_VERSION,
@@ -164,6 +163,7 @@ const struct iwl_cfg iwl22000_2ac_cfg_jf = {
        .name = "Intel(R) Dual Band Wireless AC 22000",
        .fw_name_pre = IWL_22000_JF_FW_PRE,
        IWL_DEVICE_22000,
+       .csr = &iwl_csr_v1,
        .ht_params = &iwl_22000_ht_params,
        .nvm_ver = IWL_22000_NVM_VERSION,
        .nvm_calib_ver = IWL_22000_TX_POWER_VERSION,
@@ -174,6 +174,7 @@ const struct iwl_cfg iwl22000_2ax_cfg_hr = {
        .name = "Intel(R) Dual Band Wireless AX 22000",
        .fw_name_pre = IWL_22000_HR_FW_PRE,
        IWL_DEVICE_22000,
+       .csr = &iwl_csr_v1,
        .ht_params = &iwl_22000_ht_params,
        .nvm_ver = IWL_22000_NVM_VERSION,
        .nvm_calib_ver = IWL_22000_TX_POWER_VERSION,
@@ -184,6 +185,7 @@ const struct iwl_cfg iwl22000_2ax_cfg_qnj_hr_f0 = {
        .name = "Intel(R) Dual Band Wireless AX 22000",
        .fw_name_pre = IWL_22000_HR_F0_FW_PRE,
        IWL_DEVICE_22000,
+       .csr = &iwl_csr_v1,
        .ht_params = &iwl_22000_ht_params,
        .nvm_ver = IWL_22000_NVM_VERSION,
        .nvm_calib_ver = IWL_22000_TX_POWER_VERSION,
@@ -194,6 +196,7 @@ const struct iwl_cfg iwl22000_2ax_cfg_qnj_jf_b0 = {
        .name = "Intel(R) Dual Band Wireless AX 22000",
        .fw_name_pre = IWL_22000_JF_B0_FW_PRE,
        IWL_DEVICE_22000,
+       .csr = &iwl_csr_v1,
        .ht_params = &iwl_22000_ht_params,
        .nvm_ver = IWL_22000_NVM_VERSION,
        .nvm_calib_ver = IWL_22000_TX_POWER_VERSION,
@@ -204,6 +207,7 @@ const struct iwl_cfg iwl22000_2ax_cfg_qnj_hr_a0 = {
        .name = "Intel(R) Dual Band Wireless AX 22000",
        .fw_name_pre = IWL_22000_HR_A0_FW_PRE,
        IWL_DEVICE_22000,
+       .csr = &iwl_csr_v1,
        .ht_params = &iwl_22000_ht_params,
        .nvm_ver = IWL_22000_NVM_VERSION,
        .nvm_calib_ver = IWL_22000_TX_POWER_VERSION,
index 4aa8f0a05c8aea14ec98392cd9e0c6091687b458..a224f1be1ec2cd614d947a26d8e35ed1fa917e27 100644 (file)
@@ -1,6 +1,7 @@
 /******************************************************************************
  *
  * Copyright(c) 2007 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2018 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License as
@@ -28,7 +29,6 @@
 #include <linux/stringify.h>
 #include "iwl-config.h"
 #include "iwl-agn-hw.h"
-#include "iwl-csr.h"
 
 /* Highest firmware API version supported */
 #define IWL5000_UCODE_API_MAX 5
@@ -89,7 +89,8 @@ static const struct iwl_eeprom_params iwl5000_eeprom_params = {
        .base_params = &iwl5000_base_params,                    \
        .eeprom_params = &iwl5000_eeprom_params,                \
        .led_mode = IWL_LED_BLINK,                              \
-       .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K
+       .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K,    \
+       .csr = &iwl_csr_v1
 
 const struct iwl_cfg iwl5300_agn_cfg = {
        .name = "Intel(R) Ultimate N WiFi Link 5300 AGN",
@@ -153,7 +154,8 @@ const struct iwl_cfg iwl5350_agn_cfg = {
        .eeprom_params = &iwl5000_eeprom_params,                \
        .led_mode = IWL_LED_BLINK,                              \
        .internal_wimax_coex = true,                            \
-       .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K
+       .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K,    \
+       .csr = &iwl_csr_v1
 
 const struct iwl_cfg iwl5150_agn_cfg = {
        .name = "Intel(R) WiMAX/WiFi Link 5150 AGN",
index 39335b7b0c165c69a4d1aa004069c70184ed9675..51cec0bb75fc760c82bb427026ef535bcba77126 100644 (file)
@@ -1,6 +1,7 @@
 /******************************************************************************
  *
  * Copyright(c) 2008 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2018 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License as
@@ -135,7 +136,8 @@ static const struct iwl_eeprom_params iwl6000_eeprom_params = {
        .base_params = &iwl6000_g2_base_params,                 \
        .eeprom_params = &iwl6000_eeprom_params,                \
        .led_mode = IWL_LED_RF_STATE,                           \
-       .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K
+       .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K,    \
+       .csr = &iwl_csr_v1
 
 const struct iwl_cfg iwl6005_2agn_cfg = {
        .name = "Intel(R) Centrino(R) Advanced-N 6205 AGN",
@@ -189,7 +191,8 @@ const struct iwl_cfg iwl6005_2agn_mow2_cfg = {
        .base_params = &iwl6000_g2_base_params,                 \
        .eeprom_params = &iwl6000_eeprom_params,                \
        .led_mode = IWL_LED_RF_STATE,                           \
-       .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K
+       .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K,    \
+       .csr = &iwl_csr_v1
 
 const struct iwl_cfg iwl6030_2agn_cfg = {
        .name = "Intel(R) Centrino(R) Advanced-N 6230 AGN",
@@ -225,7 +228,8 @@ const struct iwl_cfg iwl6030_2bg_cfg = {
        .base_params = &iwl6000_g2_base_params,                 \
        .eeprom_params = &iwl6000_eeprom_params,                \
        .led_mode = IWL_LED_RF_STATE,                           \
-       .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K
+       .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K,    \
+       .csr = &iwl_csr_v1
 
 const struct iwl_cfg iwl6035_2agn_cfg = {
        .name = "Intel(R) Centrino(R) Advanced-N 6235 AGN",
@@ -280,7 +284,8 @@ const struct iwl_cfg iwl130_bg_cfg = {
        .base_params = &iwl6000_base_params,                    \
        .eeprom_params = &iwl6000_eeprom_params,                \
        .led_mode = IWL_LED_BLINK,                              \
-       .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K
+       .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K,    \
+       .csr = &iwl_csr_v1
 
 const struct iwl_cfg iwl6000i_2agn_cfg = {
        .name = "Intel(R) Centrino(R) Advanced-N 6200 AGN",
@@ -313,7 +318,8 @@ const struct iwl_cfg iwl6000i_2bg_cfg = {
        .eeprom_params = &iwl6000_eeprom_params,                \
        .led_mode = IWL_LED_BLINK,                              \
        .internal_wimax_coex = true,                            \
-       .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K
+       .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K,    \
+       .csr = &iwl_csr_v1
 
 const struct iwl_cfg iwl6050_2agn_cfg = {
        .name = "Intel(R) Centrino(R) Advanced-N + WiMAX 6250 AGN",
@@ -339,7 +345,8 @@ const struct iwl_cfg iwl6050_2abg_cfg = {
        .eeprom_params = &iwl6000_eeprom_params,                \
        .led_mode = IWL_LED_BLINK,                              \
        .internal_wimax_coex = true,                            \
-       .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K
+       .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K,    \
+       .csr = &iwl_csr_v1
 
 const struct iwl_cfg iwl6150_bgn_cfg = {
        .name = "Intel(R) Centrino(R) Wireless-N + WiMAX 6150 BGN",
index ce741beec1fcf7c779cc993c4f6b240151257028..69bfa827e82adf6512f0e5b868150840ba636481 100644 (file)
@@ -7,7 +7,8 @@
  *
  * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH
- * Copyright(c) 2015 Intel Deutschland GmbH
+ * Copyright(c) 2015        Intel Deutschland GmbH
+ * Copyright(c) 2018        Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -34,7 +35,8 @@
  *
  * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH
- * Copyright(c) 2015 Intel Deutschland GmbH
+ * Copyright(c) 2015        Intel Deutschland GmbH
+ * Copyright(c) 2018        Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -68,7 +70,6 @@
 #include <linux/module.h>
 #include <linux/stringify.h>
 #include "iwl-config.h"
-#include "iwl-agn-hw.h"
 
 /* Highest firmware API version supported */
 #define IWL7260_UCODE_API_MAX  17
@@ -160,14 +161,13 @@ static const struct iwl_ht_params iwl7000_ht_params = {
 
 #define IWL_DEVICE_7000_COMMON                                 \
        .device_family = IWL_DEVICE_FAMILY_7000,                \
-       .max_inst_size = IWL60_RTC_INST_SIZE,                   \
-       .max_data_size = IWL60_RTC_DATA_SIZE,                   \
        .base_params = &iwl7000_base_params,                    \
        .led_mode = IWL_LED_RF_STATE,                           \
        .nvm_hw_section_num = NVM_HW_SECTION_NUM_FAMILY_7000,   \
        .non_shared_ant = ANT_A,                                \
        .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K,    \
-       .dccm_offset = IWL7000_DCCM_OFFSET
+       .dccm_offset = IWL7000_DCCM_OFFSET,                     \
+       .csr = &iwl_csr_v1
 
 #define IWL_DEVICE_7000                                                \
        IWL_DEVICE_7000_COMMON,                                 \
index 3f4d9bac9f73a03d5c8b814876c03bf998b7f148..7262e973e0d6483f03f60b0d321693654f669040 100644 (file)
@@ -7,7 +7,8 @@
  *
  * Copyright(c) 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2014 - 2015 Intel Mobile Communications GmbH
- * Copyright(c) 2016 Intel Deutschland GmbH
+ * Copyright(c) 2016        Intel Deutschland GmbH
+ * Copyright(c) 2018        Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -34,6 +35,7 @@
  *
  * Copyright(c) 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2014 - 2015 Intel Mobile Communications GmbH
+ * Copyright(c) 2018        Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -67,7 +69,6 @@
 #include <linux/module.h>
 #include <linux/stringify.h>
 #include "iwl-config.h"
-#include "iwl-agn-hw.h"
 
 /* Highest firmware API version supported */
 #define IWL8000_UCODE_API_MAX  36
@@ -140,8 +141,6 @@ static const struct iwl_tt_params iwl8000_tt_params = {
 
 #define IWL_DEVICE_8000_COMMON                                         \
        .device_family = IWL_DEVICE_FAMILY_8000,                        \
-       .max_inst_size = IWL60_RTC_INST_SIZE,                           \
-       .max_data_size = IWL60_RTC_DATA_SIZE,                           \
        .base_params = &iwl8000_base_params,                            \
        .led_mode = IWL_LED_RF_STATE,                                   \
        .nvm_hw_section_num = NVM_HW_SECTION_NUM_FAMILY_8000,           \
@@ -158,7 +157,8 @@ static const struct iwl_tt_params iwl8000_tt_params = {
        .apmg_not_supported = true,                                     \
        .nvm_type = IWL_NVM_EXT,                                        \
        .dbgc_supported = true,                                         \
-       .min_umac_error_event_table = 0x800000
+       .min_umac_error_event_table = 0x800000,                         \
+       .csr = &iwl_csr_v1
 
 #define IWL_DEVICE_8000                                                        \
        IWL_DEVICE_8000_COMMON,                                         \
index e1c869a1f8cc9b49d0f7267e7d864da15fe8b272..9706624911f8c988c26dcf90aff22737e9de3099 100644 (file)
@@ -54,7 +54,6 @@
 #include <linux/module.h>
 #include <linux/stringify.h>
 #include "iwl-config.h"
-#include "iwl-agn-hw.h"
 #include "fw/file.h"
 
 /* Highest firmware API version supported */
@@ -135,8 +134,6 @@ static const struct iwl_tt_params iwl9000_tt_params = {
        .ucode_api_max = IWL9000_UCODE_API_MAX,                         \
        .ucode_api_min = IWL9000_UCODE_API_MIN,                         \
        .device_family = IWL_DEVICE_FAMILY_9000,                        \
-       .max_inst_size = IWL60_RTC_INST_SIZE,                           \
-       .max_data_size = IWL60_RTC_DATA_SIZE,                           \
        .base_params = &iwl9000_base_params,                            \
        .led_mode = IWL_LED_RF_STATE,                                   \
        .nvm_hw_section_num = NVM_HW_SECTION_NUM_FAMILY_9000,           \
@@ -156,7 +153,8 @@ static const struct iwl_tt_params iwl9000_tt_params = {
        .rf_id = true,                                                  \
        .nvm_type = IWL_NVM_EXT,                                        \
        .dbgc_supported = true,                                         \
-       .min_umac_error_event_table = 0x800000
+       .min_umac_error_event_table = 0x800000,                         \
+       .csr = &iwl_csr_v1
 
 const struct iwl_cfg iwl9160_2ac_cfg = {
        .name = "Intel(R) Dual Band Wireless AC 9160",
index 7af3a0f51b77d951444a0a5990f913fa3d83d4f2..a17c4a79b8d468460fa3d62911e568e5e2f05668 100644 (file)
@@ -8,6 +8,7 @@
  * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
+ * Copyright(c) 2018        Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -30,7 +31,7 @@
  * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 Intel Corporation
+ * Copyright(c) 2018        Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -749,13 +750,9 @@ struct iwl_scan_req_umac {
 } __packed;
 
 #define IWL_SCAN_REQ_UMAC_SIZE_V8 sizeof(struct iwl_scan_req_umac)
-#define IWL_SCAN_REQ_UMAC_SIZE_V7 (sizeof(struct iwl_scan_req_umac) - \
-                                        4 * sizeof(u8))
-#define IWL_SCAN_REQ_UMAC_SIZE_V6 (sizeof(struct iwl_scan_req_umac) - \
-                                  2 * sizeof(u8) - sizeof(__le16))
-#define IWL_SCAN_REQ_UMAC_SIZE_V1 (sizeof(struct iwl_scan_req_umac) - \
-                                  2 * sizeof(__le32) - 2 * sizeof(u8) - \
-                                  sizeof(__le16))
+#define IWL_SCAN_REQ_UMAC_SIZE_V7 48
+#define IWL_SCAN_REQ_UMAC_SIZE_V6 44
+#define IWL_SCAN_REQ_UMAC_SIZE_V1 36
 
 /**
  * struct iwl_umac_scan_abort
index 9b2805e1e3b14d16761390a17a45f0fbac072bf5..9d939cbaf6c6ef84751b2a5aece20ea4b2069b22 100644 (file)
@@ -8,6 +8,7 @@
  * Copyright(c) 2008 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
+ * Copyright(c) 2018 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -35,6 +36,7 @@
  * Copyright(c) 2005 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
+ * Copyright(c) 2018 Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -143,6 +145,7 @@ enum iwl_ucode_tlv_type {
        IWL_UCODE_TLV_FW_DBG_TRIGGER    = 40,
        IWL_UCODE_TLV_FW_GSCAN_CAPA     = 50,
        IWL_UCODE_TLV_FW_MEM_SEG        = 51,
+       IWL_UCODE_TLV_IML               = 52,
 };
 
 struct iwl_ucode_tlv {
index b23ffe12ad84d1b747bd812c0e0e601127e6ae71..f4912382b6af39418eb84a7033ccc537b7f6e0a8 100644 (file)
@@ -8,6 +8,7 @@
  * Copyright(c) 2008 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2016        Intel Deutschland GmbH
+ * Copyright(c) 2018 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -35,6 +36,7 @@
  * Copyright(c) 2005 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2016        Intel Deutschland GmbH
+ * Copyright(c) 2018 Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -241,6 +243,8 @@ enum iwl_fw_type {
  * @ucode_ver: ucode version from the ucode file
  * @fw_version: firmware version string
  * @img: ucode image like ucode_rt, ucode_init, ucode_wowlan.
+ * @iml_len: length of the image loader image
+ * @iml: image loader fw image
  * @ucode_capa: capabilities parsed from the ucode file.
  * @enhance_sensitivity_table: device can do enhanced sensitivity.
  * @init_evtlog_ptr: event log offset for init ucode.
@@ -267,6 +271,8 @@ struct iwl_fw {
 
        /* ucode images */
        struct fw_img img[IWL_UCODE_TYPE_MAX];
+       size_t iml_len;
+       u8 *iml;
 
        struct iwl_ucode_capabilities ucode_capa;
        bool enhance_sensitivity_table;
index 1fec8e3a6b35cea23eeccf7f1c1c9b9b1f4bcdba..9b8dd7fe7112c3856057e4632a513ddc2fdc3b7e 100644 (file)
@@ -8,6 +8,7 @@
  * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
+ * Copyright(c) 2018        Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -30,6 +31,7 @@
  * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
+ * Copyright(c) 2018        Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -163,7 +165,7 @@ static int iwl_alloc_fw_paging_mem(struct iwl_fw_runtime *fwrt,
 static int iwl_fill_paging_mem(struct iwl_fw_runtime *fwrt,
                               const struct fw_img *image)
 {
-       int sec_idx, idx;
+       int sec_idx, idx, ret;
        u32 offset = 0;
 
        /*
@@ -190,17 +192,23 @@ static int iwl_fill_paging_mem(struct iwl_fw_runtime *fwrt,
         */
        if (sec_idx >= image->num_sec - 1) {
                IWL_ERR(fwrt, "Paging: Missing CSS and/or paging sections\n");
-               iwl_free_fw_paging(fwrt);
-               return -EINVAL;
+               ret = -EINVAL;
+               goto err;
        }
 
        /* copy the CSS block to the dram */
        IWL_DEBUG_FW(fwrt, "Paging: load paging CSS to FW, sec = %d\n",
                     sec_idx);
 
+       if (image->sec[sec_idx].len > fwrt->fw_paging_db[0].fw_paging_size) {
+               IWL_ERR(fwrt, "CSS block is larger than paging size\n");
+               ret = -EINVAL;
+               goto err;
+       }
+
        memcpy(page_address(fwrt->fw_paging_db[0].fw_paging_block),
               image->sec[sec_idx].data,
-              fwrt->fw_paging_db[0].fw_paging_size);
+              image->sec[sec_idx].len);
        dma_sync_single_for_device(fwrt->trans->dev,
                                   fwrt->fw_paging_db[0].fw_paging_phys,
                                   fwrt->fw_paging_db[0].fw_paging_size,
@@ -213,17 +221,39 @@ static int iwl_fill_paging_mem(struct iwl_fw_runtime *fwrt,
        sec_idx++;
 
        /*
-        * copy the paging blocks to the dram
-        * loop index start from 1 since that CSS block already copied to dram
-        * and CSS index is 0.
-        * loop stop at num_of_paging_blk since that last block is not full.
+        * Copy the paging blocks to the dram.  The loop index starts
+        * from 1 since the CSS block (index 0) was already copied to
+        * dram.  We use num_of_paging_blk + 1 to account for that.
         */
-       for (idx = 1; idx < fwrt->num_of_paging_blk; idx++) {
+       for (idx = 1; idx < fwrt->num_of_paging_blk + 1; idx++) {
                struct iwl_fw_paging *block = &fwrt->fw_paging_db[idx];
+               int remaining = image->sec[sec_idx].len - offset;
+               int len = block->fw_paging_size;
+
+               /*
+                * For the last block, we copy all that is remaining,
+                * for all other blocks, we copy fw_paging_size at a
+                * time. */
+               if (idx == fwrt->num_of_paging_blk) {
+                       len = remaining;
+                       if (remaining !=
+                           fwrt->num_of_pages_in_last_blk * FW_PAGING_SIZE) {
+                               IWL_ERR(fwrt,
+                                       "Paging: last block contains more data than expected %d\n",
+                                       remaining);
+                               ret = -EINVAL;
+                               goto err;
+                       }
+               } else if (block->fw_paging_size > remaining) {
+                       IWL_ERR(fwrt,
+                               "Paging: not enough data in other in block %d (%d)\n",
+                               idx, remaining);
+                       ret = -EINVAL;
+                       goto err;
+               }
 
                memcpy(page_address(block->fw_paging_block),
-                      image->sec[sec_idx].data + offset,
-                      block->fw_paging_size);
+                      image->sec[sec_idx].data + offset, len);
                dma_sync_single_for_device(fwrt->trans->dev,
                                           block->fw_paging_phys,
                                           block->fw_paging_size,
@@ -231,30 +261,16 @@ static int iwl_fill_paging_mem(struct iwl_fw_runtime *fwrt,
 
                IWL_DEBUG_FW(fwrt,
                             "Paging: copied %d paging bytes to block %d\n",
-                            fwrt->fw_paging_db[idx].fw_paging_size,
-                            idx);
-
-               offset += fwrt->fw_paging_db[idx].fw_paging_size;
-       }
-
-       /* copy the last paging block */
-       if (fwrt->num_of_pages_in_last_blk > 0) {
-               struct iwl_fw_paging *block = &fwrt->fw_paging_db[idx];
+                            len, idx);
 
-               memcpy(page_address(block->fw_paging_block),
-                      image->sec[sec_idx].data + offset,
-                      FW_PAGING_SIZE * fwrt->num_of_pages_in_last_blk);
-               dma_sync_single_for_device(fwrt->trans->dev,
-                                          block->fw_paging_phys,
-                                          block->fw_paging_size,
-                                          DMA_BIDIRECTIONAL);
-
-               IWL_DEBUG_FW(fwrt,
-                            "Paging: copied %d pages in the last block %d\n",
-                            fwrt->num_of_pages_in_last_blk, idx);
+               offset += block->fw_paging_size;
        }
 
        return 0;
+
+err:
+       iwl_free_fw_paging(fwrt);
+       return ret;
 }
 
 static int iwl_save_fw_paging(struct iwl_fw_runtime *fwrt,
index 14e69e7a22873b59ceede9f7873b382d2acebd13..c503b26793f6dd77fb63da69c7a838c059eb12be 100644 (file)
@@ -7,6 +7,7 @@
  *
  * Copyright(c) 2007 - 2014 Intel Corporation. All rights reserved.
  * Copyright (C) 2016 - 2017 Intel Deutschland GmbH
+ * Copyright(c) 2018 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -33,6 +34,7 @@
  *
  * Copyright(c) 2005 - 2014 Intel Corporation. All rights reserved.
  * Copyright (C) 2016 - 2017 Intel Deutschland GmbH
+ * Copyright(c) 2018 Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -69,6 +71,7 @@
 #include <linux/netdevice.h>
 #include <linux/ieee80211.h>
 #include <linux/nl80211.h>
+#include "iwl-csr.h"
 
 enum iwl_device_family {
        IWL_DEVICE_FAMILY_UNDEFINED,
@@ -284,6 +287,52 @@ struct iwl_pwr_tx_backoff {
        u32 backoff;
 };
 
+/**
+ * struct iwl_csr_params
+ *
+ * @flag_sw_reset: reset the device
+ * @flag_mac_clock_ready:
+ *     Indicates MAC (ucode processor, etc.) is powered up and can run.
+ *     Internal resources are accessible.
+ *     NOTE:  This does not indicate that the processor is actually running.
+ *     NOTE:  This does not indicate that device has completed
+ *            init or post-power-down restore of internal SRAM memory.
+ *            Use CSR_UCODE_DRV_GP1_BIT_MAC_SLEEP as indication that
+ *            SRAM is restored and uCode is in normal operation mode.
+ *            This note is relevant only for pre 5xxx devices.
+ *     NOTE:  After device reset, this bit remains "0" until host sets
+ *            INIT_DONE
+ * @flag_init_done: Host sets this to put device into fully operational
+ *     D0 power mode. Host resets this after SW_RESET to put device into
+ *     low power mode.
+ * @flag_mac_access_req: Host sets this to request and maintain MAC wakeup,
+ *     to allow host access to device-internal resources. Host must wait for
+ *     mac_clock_ready (and !GOING_TO_SLEEP) before accessing non-CSR device
+ *     registers.
+ * @flag_val_mac_access_en: mac access is enabled
+ * @flag_master_dis: disable master
+ * @flag_stop_master: stop master
+ * @addr_sw_reset: address for resetting the device
+ * @mac_addr0_otp: first part of MAC address from OTP
+ * @mac_addr1_otp: second part of MAC address from OTP
+ * @mac_addr0_strap: first part of MAC address from strap
+ * @mac_addr1_strap: second part of MAC address from strap
+ */
+struct iwl_csr_params {
+       u8 flag_sw_reset;
+       u8 flag_mac_clock_ready;
+       u8 flag_init_done;
+       u8 flag_mac_access_req;
+       u8 flag_val_mac_access_en;
+       u8 flag_master_dis;
+       u8 flag_stop_master;
+       u8 addr_sw_reset;
+       u32 mac_addr0_otp;
+       u32 mac_addr1_otp;
+       u32 mac_addr0_strap;
+       u32 mac_addr1_strap;
+};
+
 /**
  * struct iwl_cfg
  * @name: Official name of the device
@@ -296,8 +345,8 @@ struct iwl_pwr_tx_backoff {
  *     next step. Supported only in integrated solutions.
  * @ucode_api_max: Highest version of uCode API supported by driver.
  * @ucode_api_min: Lowest version of uCode API supported by driver.
- * @max_inst_size: The maximal length of the fw inst section
- * @max_data_size: The maximal length of the fw data section
+ * @max_inst_size: The maximal length of the fw inst section (only DVM)
+ * @max_data_size: The maximal length of the fw data section (only DVM)
  * @valid_tx_ant: valid transmit antenna
  * @valid_rx_ant: valid receive antenna
  * @non_shared_ant: the antenna that is for WiFi only
@@ -316,6 +365,7 @@ struct iwl_pwr_tx_backoff {
  * @mac_addr_from_csr: read HW address from CSR registers
  * @features: hw features, any combination of feature_whitelist
  * @pwr_tx_backoffs: translation table between power limits and backoffs
+ * @csr: csr flags and addresses that are different across devices
  * @max_rx_agg_size: max RX aggregation size of the ADDBA request/response
  * @max_tx_agg_size: max TX aggregation size of the ADDBA request/response
  * @max_ht_ampdu_factor: the exponent of the max length of A-MPDU that the
@@ -354,6 +404,7 @@ struct iwl_cfg {
        const struct iwl_pwr_tx_backoff *pwr_tx_backoffs;
        const char *default_nvm_file_C_step;
        const struct iwl_tt_params *thermal_params;
+       const struct iwl_csr_params *csr;
        enum iwl_device_family device_family;
        enum iwl_led_mode led_mode;
        enum iwl_nvm_type nvm_type;
@@ -369,7 +420,7 @@ struct iwl_cfg {
        u32 soc_latency;
        u16 nvm_ver;
        u16 nvm_calib_ver;
-       u16 rx_with_siso_diversity:1,
+       u32 rx_with_siso_diversity:1,
            bt_shared_single_ant:1,
            internal_wimax_coex:1,
            host_interrupt_operation_mode:1,
@@ -400,6 +451,36 @@ struct iwl_cfg {
        u32 extra_phy_cfg_flags;
 };
 
+static const struct iwl_csr_params iwl_csr_v1 = {
+       .flag_mac_clock_ready = 0,
+       .flag_val_mac_access_en = 0,
+       .flag_init_done = 2,
+       .flag_mac_access_req = 3,
+       .flag_sw_reset = 7,
+       .flag_master_dis = 8,
+       .flag_stop_master = 9,
+       .addr_sw_reset = (CSR_BASE + 0x020),
+       .mac_addr0_otp = 0x380,
+       .mac_addr1_otp = 0x384,
+       .mac_addr0_strap = 0x388,
+       .mac_addr1_strap = 0x38C
+};
+
+static const struct iwl_csr_params iwl_csr_v2 = {
+       .flag_init_done = 6,
+       .flag_mac_clock_ready = 20,
+       .flag_val_mac_access_en = 20,
+       .flag_mac_access_req = 21,
+       .flag_master_dis = 28,
+       .flag_stop_master = 29,
+       .flag_sw_reset = 31,
+       .addr_sw_reset = (CSR_BASE + 0x024),
+       .mac_addr0_otp = 0x30,
+       .mac_addr1_otp = 0x34,
+       .mac_addr0_strap = 0x38,
+       .mac_addr1_strap = 0x3C
+};
+
 /*
  * This list declares the config structures for all devices.
  */
index 4f0d070eda54816dfdfd7ec8f67c6a2917942a4a..ba971d3946e2cce79ef6dd0630ac554a5fd635f6 100644 (file)
@@ -8,6 +8,7 @@
  * Copyright(c) 2005 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH
  * Copyright(c) 2016        Intel Deutschland GmbH
+ * Copyright(c) 2018 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -34,6 +35,7 @@
  *
  * Copyright(c) 2005 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH
+ * Copyright(c) 2018 Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
 /* RESET */
 #define CSR_RESET_REG_FLAG_NEVO_RESET                (0x00000001)
 #define CSR_RESET_REG_FLAG_FORCE_NMI                 (0x00000002)
-#define CSR_RESET_REG_FLAG_SW_RESET                  (0x00000080)
 #define CSR_RESET_REG_FLAG_MASTER_DISABLED           (0x00000100)
 #define CSR_RESET_REG_FLAG_STOP_MASTER               (0x00000200)
 #define CSR_RESET_LINK_PWR_MGMT_DISABLED             (0x80000000)
  *     4:  GOING_TO_SLEEP
  *         Indicates MAC is entering a power-saving sleep power-down.
  *         Not a good time to access device-internal resources.
- *     3:  MAC_ACCESS_REQ
- *         Host sets this to request and maintain MAC wakeup, to allow host
- *         access to device-internal resources.  Host must wait for
- *         MAC_CLOCK_READY (and !GOING_TO_SLEEP) before accessing non-CSR
- *         device registers.
- *     2:  INIT_DONE
- *         Host sets this to put device into fully operational D0 power mode.
- *         Host resets this after SW_RESET to put device into low power mode.
- *     0:  MAC_CLOCK_READY
- *         Indicates MAC (ucode processor, etc.) is powered up and can run.
- *         Internal resources are accessible.
- *         NOTE:  This does not indicate that the processor is actually running.
- *         NOTE:  This does not indicate that device has completed
- *                init or post-power-down restore of internal SRAM memory.
- *                Use CSR_UCODE_DRV_GP1_BIT_MAC_SLEEP as indication that
- *                SRAM is restored and uCode is in normal operation mode.
- *                Later devices (5xxx/6xxx/1xxx) use non-volatile SRAM, and
- *                do not need to save/restore it.
- *         NOTE:  After device reset, this bit remains "0" until host sets
- *                INIT_DONE
  */
-#define CSR_GP_CNTRL_REG_FLAG_MAC_CLOCK_READY        (0x00000001)
-#define CSR_GP_CNTRL_REG_FLAG_INIT_DONE              (0x00000004)
-#define CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ         (0x00000008)
 #define CSR_GP_CNTRL_REG_FLAG_GOING_TO_SLEEP         (0x00000010)
 #define CSR_GP_CNTRL_REG_FLAG_XTAL_ON               (0x00000400)
 
-#define CSR_GP_CNTRL_REG_VAL_MAC_ACCESS_EN           (0x00000001)
-
 #define CSR_GP_CNTRL_REG_MSK_POWER_SAVE_TYPE         (0x07000000)
 #define CSR_GP_CNTRL_REG_FLAG_RFKILL_WAKE_L1A_EN     (0x04000000)
 #define CSR_GP_CNTRL_REG_FLAG_HW_RF_KILL_SW          (0x08000000)
index aa2d5c14e202d6a72f04ca17725a3de47f1699de..c59ce4f8a5edbdc2451d28f392dfbd38fba1a248 100644 (file)
@@ -179,6 +179,7 @@ static void iwl_dealloc_ucode(struct iwl_drv *drv)
        for (i = 0; i < ARRAY_SIZE(drv->fw.dbg_trigger_tlv); i++)
                kfree(drv->fw.dbg_trigger_tlv[i]);
        kfree(drv->fw.dbg_mem_tlv);
+       kfree(drv->fw.iml);
 
        for (i = 0; i < IWL_UCODE_TYPE_MAX; i++)
                iwl_free_fw_img(drv, drv->fw.img + i);
@@ -1126,6 +1127,13 @@ static int iwl_parse_tlv_firmware(struct iwl_drv *drv,
                        pieces->n_dbg_mem_tlv++;
                        break;
                        }
+               case IWL_UCODE_TLV_IML: {
+                       drv->fw.iml_len = tlv_len;
+                       drv->fw.iml = kmemdup(tlv_data, tlv_len, GFP_KERNEL);
+                       if (!drv->fw.iml)
+                               return -ENOMEM;
+                       break;
+                       }
                default:
                        IWL_DEBUG_INFO(drv, "unknown TLV: %d\n", tlv_type);
                        break;
@@ -1842,3 +1850,9 @@ MODULE_PARM_DESC(d0i3_timeout, "Timeout to D0i3 entry when idle (ms)");
 
 module_param_named(disable_11ac, iwlwifi_mod_params.disable_11ac, bool, 0444);
 MODULE_PARM_DESC(disable_11ac, "Disable VHT capabilities (default: false)");
+
+module_param_named(remove_when_gone,
+                  iwlwifi_mod_params.remove_when_gone, bool,
+                  0444);
+MODULE_PARM_DESC(remove_when_gone,
+                "Remove dev from PCIe bus if it is deemed inaccessible (default: false)");
index f2cea1c7befcb2582197369ddc110891b42a1008..ac965c34a2f89b44cee3575129babcd6239db1f8 100644 (file)
@@ -6,6 +6,7 @@
  * GPL LICENSE SUMMARY
  *
  * Copyright(c) 2008 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2018 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -31,6 +32,7 @@
  * BSD LICENSE
  *
  * Copyright(c) 2005 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2018 Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -199,12 +201,12 @@ static int iwl_init_otp_access(struct iwl_trans *trans)
        /* Enable 40MHz radio clock */
        iwl_write32(trans, CSR_GP_CNTRL,
                    iwl_read32(trans, CSR_GP_CNTRL) |
-                   CSR_GP_CNTRL_REG_FLAG_INIT_DONE);
+                   BIT(trans->cfg->csr->flag_init_done));
 
        /* wait for clock to be ready */
        ret = iwl_poll_bit(trans, CSR_GP_CNTRL,
-                          CSR_GP_CNTRL_REG_FLAG_MAC_CLOCK_READY,
-                          CSR_GP_CNTRL_REG_FLAG_MAC_CLOCK_READY,
+                          BIT(trans->cfg->csr->flag_mac_clock_ready),
+                          BIT(trans->cfg->csr->flag_mac_clock_ready),
                           25000);
        if (ret < 0) {
                IWL_ERR(trans, "Time out access OTP\n");
index a41c46e63eb1cdaef7e3759c880d962a4cc9fb32..a7dd8a8cddf92c0fa039fdd53466666222de61ea 100644 (file)
@@ -122,6 +122,7 @@ enum iwl_uapsd_disable {
  * @lar_disable: disable LAR (regulatory), default = 0
  * @fw_monitor: allow to use firmware monitor
  * @disable_11ac: disable VHT capabilities, default = false.
+ * @remove_when_gone: remove an inaccessible device from the PCIe bus.
  */
 struct iwl_mod_params {
        int swcrypto;
@@ -143,6 +144,7 @@ struct iwl_mod_params {
        bool lar_disable;
        bool fw_monitor;
        bool disable_11ac;
+       bool remove_when_gone;
 };
 
 #endif /* #__iwl_modparams_h__ */
index 6d33c14579d9204cb46ee8c724efad836f9fdb0e..0f9d56420c42447431bbb5dfe74099536b8e8a60 100644 (file)
@@ -579,8 +579,12 @@ static void iwl_flip_hw_address(__le32 mac_addr0, __le32 mac_addr1, u8 *dest)
 static void iwl_set_hw_address_from_csr(struct iwl_trans *trans,
                                        struct iwl_nvm_data *data)
 {
-       __le32 mac_addr0 = cpu_to_le32(iwl_read32(trans, CSR_MAC_ADDR0_STRAP));
-       __le32 mac_addr1 = cpu_to_le32(iwl_read32(trans, CSR_MAC_ADDR1_STRAP));
+       __le32 mac_addr0 =
+               cpu_to_le32(iwl_read32(trans,
+                                      trans->cfg->csr->mac_addr0_strap));
+       __le32 mac_addr1 =
+               cpu_to_le32(iwl_read32(trans,
+                                      trans->cfg->csr->mac_addr1_strap));
 
        iwl_flip_hw_address(mac_addr0, mac_addr1, data->hw_addr);
        /*
@@ -590,8 +594,10 @@ static void iwl_set_hw_address_from_csr(struct iwl_trans *trans,
        if (is_valid_ether_addr(data->hw_addr))
                return;
 
-       mac_addr0 = cpu_to_le32(iwl_read32(trans, CSR_MAC_ADDR0_OTP));
-       mac_addr1 = cpu_to_le32(iwl_read32(trans, CSR_MAC_ADDR1_OTP));
+       mac_addr0 = cpu_to_le32(iwl_read32(trans,
+                                          trans->cfg->csr->mac_addr0_otp));
+       mac_addr1 = cpu_to_le32(iwl_read32(trans,
+                                          trans->cfg->csr->mac_addr1_otp));
 
        iwl_flip_hw_address(mac_addr0, mac_addr1, data->hw_addr);
 }
@@ -855,21 +861,31 @@ static u32 iwl_nvm_get_regdom_bw_flags(const u8 *nvm_chan,
        return flags;
 }
 
+struct regdb_ptrs {
+       struct ieee80211_wmm_rule *rule;
+       u32 token;
+};
+
 struct ieee80211_regdomain *
 iwl_parse_nvm_mcc_info(struct device *dev, const struct iwl_cfg *cfg,
-                      int num_of_ch, __le32 *channels, u16 fw_mcc)
+                      int num_of_ch, __le32 *channels, u16 fw_mcc,
+                      u16 geo_info)
 {
        int ch_idx;
        u16 ch_flags;
        u32 reg_rule_flags, prev_reg_rule_flags = 0;
        const u8 *nvm_chan = cfg->nvm_type == IWL_NVM_EXT ?
                             iwl_ext_nvm_channels : iwl_nvm_channels;
-       struct ieee80211_regdomain *regd;
-       int size_of_regd;
+       struct ieee80211_regdomain *regd, *copy_rd;
+       int size_of_regd, regd_to_copy, wmms_to_copy;
+       int size_of_wmms = 0;
        struct ieee80211_reg_rule *rule;
+       struct ieee80211_wmm_rule *wmm_rule, *d_wmm, *s_wmm;
+       struct regdb_ptrs *regdb_ptrs;
        enum nl80211_band band;
        int center_freq, prev_center_freq = 0;
-       int valid_rules = 0;
+       int valid_rules = 0, n_wmms = 0;
+       int i;
        bool new_rule;
        int max_num_ch = cfg->nvm_type == IWL_NVM_EXT ?
                         IWL_NVM_NUM_CHANNELS_EXT : IWL_NVM_NUM_CHANNELS;
@@ -888,10 +904,26 @@ iwl_parse_nvm_mcc_info(struct device *dev, const struct iwl_cfg *cfg,
                sizeof(struct ieee80211_regdomain) +
                num_of_ch * sizeof(struct ieee80211_reg_rule);
 
-       regd = kzalloc(size_of_regd, GFP_KERNEL);
+       if (geo_info & GEO_WMM_ETSI_5GHZ_INFO)
+               size_of_wmms =
+                       num_of_ch * sizeof(struct ieee80211_wmm_rule);
+
+       regd = kzalloc(size_of_regd + size_of_wmms, GFP_KERNEL);
        if (!regd)
                return ERR_PTR(-ENOMEM);
 
+       regdb_ptrs = kcalloc(num_of_ch, sizeof(*regdb_ptrs), GFP_KERNEL);
+       if (!regdb_ptrs) {
+               copy_rd = ERR_PTR(-ENOMEM);
+               goto out;
+       }
+
+       /* set alpha2 from FW. */
+       regd->alpha2[0] = fw_mcc >> 8;
+       regd->alpha2[1] = fw_mcc & 0xff;
+
+       wmm_rule = (struct ieee80211_wmm_rule *)((u8 *)regd + size_of_regd);
+
        for (ch_idx = 0; ch_idx < num_of_ch; ch_idx++) {
                ch_flags = (u16)__le32_to_cpup(channels + ch_idx);
                band = (ch_idx < NUM_2GHZ_CHANNELS) ?
@@ -940,15 +972,67 @@ iwl_parse_nvm_mcc_info(struct device *dev, const struct iwl_cfg *cfg,
 
                iwl_nvm_print_channel_flags(dev, IWL_DL_LAR,
                                            nvm_chan[ch_idx], ch_flags);
+
+               if (!(geo_info & GEO_WMM_ETSI_5GHZ_INFO) ||
+                   band == NL80211_BAND_2GHZ)
+                       continue;
+
+               if (!reg_query_regdb_wmm(regd->alpha2, center_freq,
+                                        &regdb_ptrs[n_wmms].token, wmm_rule)) {
+                       /* Add only new rules */
+                       for (i = 0; i < n_wmms; i++) {
+                               if (regdb_ptrs[i].token ==
+                                   regdb_ptrs[n_wmms].token) {
+                                       rule->wmm_rule = regdb_ptrs[i].rule;
+                                       break;
+                               }
+                       }
+                       if (i == n_wmms) {
+                               rule->wmm_rule = wmm_rule;
+                               regdb_ptrs[n_wmms++].rule = wmm_rule;
+                               wmm_rule++;
+                       }
+               }
        }
 
        regd->n_reg_rules = valid_rules;
+       regd->n_wmm_rules = n_wmms;
 
-       /* set alpha2 from FW. */
-       regd->alpha2[0] = fw_mcc >> 8;
-       regd->alpha2[1] = fw_mcc & 0xff;
+       /*
+        * Narrow down regdom for unused regulatory rules to prevent hole
+        * between reg rules to wmm rules.
+        */
+       regd_to_copy = sizeof(struct ieee80211_regdomain) +
+               valid_rules * sizeof(struct ieee80211_reg_rule);
+
+       wmms_to_copy = sizeof(struct ieee80211_wmm_rule) * n_wmms;
+
+       copy_rd = kzalloc(regd_to_copy + wmms_to_copy, GFP_KERNEL);
+       if (!copy_rd) {
+               copy_rd = ERR_PTR(-ENOMEM);
+               goto out;
+       }
 
-       return regd;
+       memcpy(copy_rd, regd, regd_to_copy);
+       memcpy((u8 *)copy_rd + regd_to_copy, (u8 *)regd + size_of_regd,
+              wmms_to_copy);
+
+       d_wmm = (struct ieee80211_wmm_rule *)((u8 *)copy_rd + regd_to_copy);
+       s_wmm = (struct ieee80211_wmm_rule *)((u8 *)regd + size_of_regd);
+
+       for (i = 0; i < regd->n_reg_rules; i++) {
+               if (!regd->reg_rules[i].wmm_rule)
+                       continue;
+
+               copy_rd->reg_rules[i].wmm_rule = d_wmm +
+                       (regd->reg_rules[i].wmm_rule - s_wmm) /
+                       sizeof(struct ieee80211_wmm_rule);
+       }
+
+out:
+       kfree(regdb_ptrs);
+       kfree(regd);
+       return copy_rd;
 }
 IWL_EXPORT_SYMBOL(iwl_parse_nvm_mcc_info);
 
index 60c7586f8342dae08e5fb7dda47aaa9b1367c2d6..234d1009a9de40a81a660cfeb8bf77d5753ab0fd 100644 (file)
@@ -100,13 +100,15 @@ iwl_parse_nvm_data(struct iwl_trans *trans, const struct iwl_cfg *cfg,
  *
  * This function parses the regulatory channel data received as a
  * MCC_UPDATE_CMD command. It returns a newly allocation regulatory domain,
- * to be fed into the regulatory core. An ERR_PTR is returned on error.
+ * to be fed into the regulatory core. In case the geo_info is set handle
+ * accordingly. An ERR_PTR is returned on error.
  * If not given to the regulatory core, the user is responsible for freeing
  * the regdomain returned here with kfree.
  */
 struct ieee80211_regdomain *
 iwl_parse_nvm_mcc_info(struct device *dev, const struct iwl_cfg *cfg,
-                      int num_of_ch, __le32 *channels, u16 fw_mcc);
+                      int num_of_ch, __le32 *channels, u16 fw_mcc,
+                      u16 geo_info);
 
 /**
  * struct iwl_nvm_section - describes an NVM section in memory.
index a928327116838ff16d98603712631a24bd154324..1b9c627ee34d9ca2ba96a7d217d4fe5fdae8e3bd 100644 (file)
@@ -691,6 +691,8 @@ enum iwl_plat_pm_mode {
  * @wide_cmd_header: true when ucode supports wide command header format
  * @num_rx_queues: number of RX queues allocated by the transport;
  *     the transport must set this before calling iwl_drv_start()
+ * @iml_len: the length of the image loader
+ * @iml: a pointer to the image loader itself
  * @dev_cmd_pool: pool for Tx cmd allocation - for internal use only.
  *     The user should use iwl_trans_{alloc,free}_tx_cmd.
  * @rx_mpdu_cmd: MPDU RX command ID, must be assigned by opmode before
@@ -735,6 +737,9 @@ struct iwl_trans {
 
        u8 num_rx_queues;
 
+       size_t iml_len;
+       u8 *iml;
+
        /* The following fields are internal only */
        struct kmem_cache *dev_cmd_pool;
        char dev_cmd_pool_name[50];
index 80a9a7cb83bebc26c62be6fc388192e5e3e7f587..3fcf489f3120f19db48f38d4eaf39baddd17ca70 100644 (file)
@@ -8,6 +8,7 @@
  * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
+ * Copyright(c) 2018        Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110,
- * USA
- *
  * The full GNU General Public License is included in this distribution
  * in the file called COPYING.
  *
@@ -35,6 +31,7 @@
  * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
+ * Copyright(c) 2018        Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -693,6 +690,14 @@ iwl_mvm_get_wowlan_config(struct iwl_mvm *mvm,
                                    IWL_WOWLAN_WAKEUP_LINK_CHANGE);
        }
 
+       if (wowlan->any) {
+               wowlan_config_cmd->wakeup_filter |=
+                       cpu_to_le32(IWL_WOWLAN_WAKEUP_BEACON_MISS |
+                                   IWL_WOWLAN_WAKEUP_LINK_CHANGE |
+                                   IWL_WOWLAN_WAKEUP_RX_FRAME |
+                                   IWL_WOWLAN_WAKEUP_BCN_FILTERING);
+       }
+
        return 0;
 }
 
index 08318bdaaf2ecebd174caa0b3ffe3129b8035d8a..5f0701c992a40192b2e89a3eb57b6c7f1401775f 100644 (file)
@@ -311,7 +311,8 @@ struct ieee80211_regdomain *iwl_mvm_get_regdomain(struct wiphy *wiphy,
        regd = iwl_parse_nvm_mcc_info(mvm->trans->dev, mvm->cfg,
                                      __le32_to_cpu(resp->n_channels),
                                      resp->channels,
-                                     __le16_to_cpu(resp->mcc));
+                                     __le16_to_cpu(resp->mcc),
+                                     __le16_to_cpu(resp->geo_info));
        /* Store the return source id */
        src_id = resp->source_id;
        kfree(resp);
index de46e6258a5cdb7e0f0f5efca8b4b3706d1297cc..ff1e518096c511297c107ccf83d685d9ea81bf0e 100644 (file)
@@ -739,6 +739,9 @@ iwl_op_mode_mvm_start(struct iwl_trans *trans, const struct iwl_cfg *cfg,
               sizeof(trans->dbg_conf_tlv));
        trans->dbg_trigger_tlv = mvm->fw->dbg_trigger_tlv;
 
+       trans->iml = mvm->fw->iml;
+       trans->iml_len = mvm->fw->iml_len;
+
        /* set up notification wait support */
        iwl_notification_wait_init(&mvm->notif_wait);
 
index 34791628cfb3985a43479fb26d48341e99fab764..bb63e75a9b7f37911f89cf6320bd2987fa7d02a5 100644 (file)
@@ -151,17 +151,9 @@ static void iwl_mvm_create_skb(struct sk_buff *skb, struct ieee80211_hdr *hdr,
        unsigned int hdrlen = ieee80211_hdrlen(hdr->frame_control);
 
        if (desc->mac_flags2 & IWL_RX_MPDU_MFLG2_PAD) {
+               len -= 2;
                pad_len = 2;
-
-               /*
-                * If the device inserted padding it means that (it thought)
-                * the 802.11 header wasn't a multiple of 4 bytes long. In
-                * this case, reserve two bytes at the start of the SKB to
-                * align the payload properly in case we end up copying it.
-                */
-               skb_reserve(skb, pad_len);
        }
-       len -= pad_len;
 
        /* If frame is small enough to fit in skb->head, pull it completely.
         * If not, only pull ieee80211_hdr (including crypto if present, and
@@ -866,6 +858,16 @@ void iwl_mvm_rx_mpdu_mq(struct iwl_mvm *mvm, struct napi_struct *napi,
                return;
        }
 
+       if (desc->mac_flags2 & IWL_RX_MPDU_MFLG2_PAD) {
+               /*
+                * If the device inserted padding it means that (it thought)
+                * the 802.11 header wasn't a multiple of 4 bytes long. In
+                * this case, reserve two bytes at the start of the SKB to
+                * align the payload properly in case we end up copying it.
+                */
+               skb_reserve(skb, 2);
+       }
+
        rx_status = IEEE80211_SKB_RXCB(skb);
 
        if (iwl_mvm_rx_crypto(mvm, hdr, rx_status, desc,
index d0916f2552e2b6c00552dbaf5a21c9a544c0d994..df4c60496f72f908df7cebf21da649e3e3c777f2 100644 (file)
@@ -803,7 +803,6 @@ static int iwl_mvm_tx_tso(struct iwl_mvm *mvm, struct sk_buff *skb,
                return iwl_mvm_tx_tso_segment(skb, 1, netdev_flags, mpdus_skb);
 
        if (iwl_mvm_vif_low_latency(iwl_mvm_vif_from_mac80211(mvmsta->vif)) ||
-           tid_to_mac80211_ac[tid] < IEEE80211_AC_BE ||
            !(mvmsta->amsdu_enabled & BIT(tid)))
                return iwl_mvm_tx_tso_segment(skb, 1, netdev_flags, mpdus_skb);
 
index 0497c7a44def82b47b762e2fc9231de079b3c25e..b002a7afb5f591d8434b0858c37610392a2ec5ea 100644 (file)
@@ -8,6 +8,7 @@
  * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH
  * Copyright (C) 2015 - 2017 Intel Deutschland GmbH
+ * Copyright(c) 2018 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -35,6 +36,7 @@
  * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH
  * Copyright (C) 2015 - 2017 Intel Deutschland GmbH
+ * Copyright(c) 2018 Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -520,15 +522,15 @@ static void iwl_mvm_dump_lmac_error_log(struct iwl_mvm *mvm, u32 base)
 
                /* set INIT_DONE flag */
                iwl_set_bit(trans, CSR_GP_CNTRL,
-                           CSR_GP_CNTRL_REG_FLAG_INIT_DONE);
+                           BIT(trans->cfg->csr->flag_init_done));
 
                /* and wait for clock stabilization */
                if (trans->cfg->device_family == IWL_DEVICE_FAMILY_8000)
                        udelay(2);
 
                err = iwl_poll_bit(trans, CSR_GP_CNTRL,
-                                  CSR_GP_CNTRL_REG_FLAG_MAC_CLOCK_READY,
-                                  CSR_GP_CNTRL_REG_FLAG_MAC_CLOCK_READY,
+                                  BIT(trans->cfg->csr->flag_mac_clock_ready),
+                                  BIT(trans->cfg->csr->flag_mac_clock_ready),
                                   25000);
                if (err < 0) {
                        IWL_DEBUG_INFO(trans,
index cda66340d357ec80f07637ae4a740550f32c001f..45ea32796cdaa7b030f55267f434f0084d1d8c44 100644 (file)
@@ -383,6 +383,8 @@ struct iwl_self_init_dram {
  * @hw_init_mask: initial unmasked hw causes
  * @fh_mask: current unmasked fh causes
  * @hw_mask: current unmasked hw causes
+ * @in_rescan: true if we have triggered a device rescan
+ * @scheduled_for_removal: true if we have scheduled a device removal
  */
 struct iwl_trans_pcie {
        struct iwl_rxq *rxq;
@@ -464,6 +466,9 @@ struct iwl_trans_pcie {
        u32 fh_mask;
        u32 hw_mask;
        cpumask_t affinity_mask[IWL_MAX_RX_HW_QUEUES];
+       u16 tx_cmd_queue_size;
+       bool in_rescan;
+       bool scheduled_for_removal;
 };
 
 static inline struct iwl_trans_pcie *
index f25ce3a1ea50347678c5662e5868a3e37b9d139e..f772d70a65e43742ae8d3d7ab9e6b78818e91e6c 100644 (file)
@@ -3,6 +3,7 @@
  * Copyright(c) 2003 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
+ * Copyright(c) 2018 Intel Corporation
  *
  * Portions of this file are derived from the ipw3945 project, as well
  * as portions of the ieee80211 subsystem header files.
@@ -201,7 +202,7 @@ static void iwl_pcie_rxq_inc_wr_ptr(struct iwl_trans *trans,
                        IWL_DEBUG_INFO(trans, "Rx queue requesting wakeup, GP1 = 0x%x\n",
                                       reg);
                        iwl_set_bit(trans, CSR_GP_CNTRL,
-                                   CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ);
+                                   BIT(trans->cfg->csr->flag_mac_access_req));
                        rxq->need_update = true;
                        return;
                }
index cb4012541f450200a5dd2fbc52d17178bb25df87..b8e8dac2895de3d2e513f13ca5cac9d30f1fa963 100644 (file)
@@ -6,6 +6,7 @@
  * GPL LICENSE SUMMARY
  *
  * Copyright(c) 2017 Intel Deutschland GmbH
+ * Copyright(c) 2018 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -19,6 +20,7 @@
  * BSD LICENSE
  *
  * Copyright(c) 2017 Intel Deutschland GmbH
+ * Copyright(c) 2018 Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -92,7 +94,8 @@ static int iwl_pcie_gen2_apm_init(struct iwl_trans *trans)
         * Set "initialization complete" bit to move adapter from
         * D0U* --> D0A* (powered-up active) state.
         */
-       iwl_set_bit(trans, CSR_GP_CNTRL, CSR_GP_CNTRL_REG_FLAG_INIT_DONE);
+       iwl_set_bit(trans, CSR_GP_CNTRL,
+                   BIT(trans->cfg->csr->flag_init_done));
 
        /*
         * Wait for clock stabilization; once stabilized, access to
@@ -100,8 +103,9 @@ static int iwl_pcie_gen2_apm_init(struct iwl_trans *trans)
         * and accesses to uCode SRAM.
         */
        ret = iwl_poll_bit(trans, CSR_GP_CNTRL,
-                          CSR_GP_CNTRL_REG_FLAG_MAC_CLOCK_READY,
-                          CSR_GP_CNTRL_REG_FLAG_MAC_CLOCK_READY, 25000);
+                          BIT(trans->cfg->csr->flag_mac_clock_ready),
+                          BIT(trans->cfg->csr->flag_mac_clock_ready),
+                          25000);
        if (ret < 0) {
                IWL_DEBUG_INFO(trans, "Failed to init the card\n");
                return ret;
@@ -143,7 +147,8 @@ static void iwl_pcie_gen2_apm_stop(struct iwl_trans *trans, bool op_mode_leave)
         * Clear "initialization complete" bit to move adapter from
         * D0A* (powered-up Active) --> D0U* (Uninitialized) state.
         */
-       iwl_clear_bit(trans, CSR_GP_CNTRL, CSR_GP_CNTRL_REG_FLAG_INIT_DONE);
+       iwl_clear_bit(trans, CSR_GP_CNTRL,
+                     BIT(trans->cfg->csr->flag_init_done));
 }
 
 void _iwl_trans_pcie_gen2_stop_device(struct iwl_trans *trans, bool low_power)
@@ -187,7 +192,7 @@ void _iwl_trans_pcie_gen2_stop_device(struct iwl_trans *trans, bool low_power)
 
        /* Make sure (redundant) we've released our request to stay awake */
        iwl_clear_bit(trans, CSR_GP_CNTRL,
-                     CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ);
+                     BIT(trans->cfg->csr->flag_mac_access_req));
 
        /* Stop the device, and put it in low power state */
        iwl_pcie_gen2_apm_stop(trans, false);
index f8a0234d332c2abeb90db93b616c491f4e886cae..6e9a9ecfb11ce039eb98ae713728c342eb1d0d27 100644 (file)
@@ -8,6 +8,7 @@
  * Copyright(c) 2007 - 2015 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
+ * Copyright(c) 2018 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -35,6 +36,7 @@
  * Copyright(c) 2005 - 2015 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
+ * Copyright(c) 2018 Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -73,6 +75,7 @@
 #include <linux/gfp.h>
 #include <linux/vmalloc.h>
 #include <linux/pm_runtime.h>
+#include <linux/module.h>
 
 #include "iwl-drv.h"
 #include "iwl-trans.h"
@@ -179,7 +182,8 @@ static void iwl_trans_pcie_dump_regs(struct iwl_trans *trans)
 static void iwl_trans_pcie_sw_reset(struct iwl_trans *trans)
 {
        /* Reset entire device - do controller reset (results in SHRD_HW_RST) */
-       iwl_set_bit(trans, CSR_RESET, CSR_RESET_REG_FLAG_SW_RESET);
+       iwl_set_bit(trans, trans->cfg->csr->addr_sw_reset,
+                   BIT(trans->cfg->csr->flag_sw_reset));
        usleep_range(5000, 6000);
 }
 
@@ -372,7 +376,8 @@ static int iwl_pcie_apm_init(struct iwl_trans *trans)
         * Set "initialization complete" bit to move adapter from
         * D0U* --> D0A* (powered-up active) state.
         */
-       iwl_set_bit(trans, CSR_GP_CNTRL, CSR_GP_CNTRL_REG_FLAG_INIT_DONE);
+       iwl_set_bit(trans, CSR_GP_CNTRL,
+                   BIT(trans->cfg->csr->flag_init_done));
 
        /*
         * Wait for clock stabilization; once stabilized, access to
@@ -380,8 +385,9 @@ static int iwl_pcie_apm_init(struct iwl_trans *trans)
         * and accesses to uCode SRAM.
         */
        ret = iwl_poll_bit(trans, CSR_GP_CNTRL,
-                          CSR_GP_CNTRL_REG_FLAG_MAC_CLOCK_READY,
-                          CSR_GP_CNTRL_REG_FLAG_MAC_CLOCK_READY, 25000);
+                          BIT(trans->cfg->csr->flag_mac_clock_ready),
+                          BIT(trans->cfg->csr->flag_mac_clock_ready),
+                          25000);
        if (ret < 0) {
                IWL_ERR(trans, "Failed to init the card\n");
                return ret;
@@ -459,15 +465,16 @@ static void iwl_pcie_apm_lp_xtal_enable(struct iwl_trans *trans)
         * Set "initialization complete" bit to move adapter from
         * D0U* --> D0A* (powered-up active) state.
         */
-       iwl_set_bit(trans, CSR_GP_CNTRL, CSR_GP_CNTRL_REG_FLAG_INIT_DONE);
+       iwl_set_bit(trans, CSR_GP_CNTRL,
+                   BIT(trans->cfg->csr->flag_init_done));
 
        /*
         * Wait for clock stabilization; once stabilized, access to
         * device-internal resources is possible.
         */
        ret = iwl_poll_bit(trans, CSR_GP_CNTRL,
-                          CSR_GP_CNTRL_REG_FLAG_MAC_CLOCK_READY,
-                          CSR_GP_CNTRL_REG_FLAG_MAC_CLOCK_READY,
+                          BIT(trans->cfg->csr->flag_mac_clock_ready),
+                          BIT(trans->cfg->csr->flag_mac_clock_ready),
                           25000);
        if (WARN_ON(ret < 0)) {
                IWL_ERR(trans, "Access time out - failed to enable LP XTAL\n");
@@ -519,7 +526,7 @@ static void iwl_pcie_apm_lp_xtal_enable(struct iwl_trans *trans)
         * D0A* (powered-up Active) --> D0U* (Uninitialized) state.
         */
        iwl_clear_bit(trans, CSR_GP_CNTRL,
-                     CSR_GP_CNTRL_REG_FLAG_INIT_DONE);
+                     BIT(trans->cfg->csr->flag_init_done));
 
        /* Activates XTAL resources monitor */
        __iwl_trans_pcie_set_bit(trans, CSR_MONITOR_CFG_REG,
@@ -541,11 +548,12 @@ void iwl_pcie_apm_stop_master(struct iwl_trans *trans)
        int ret;
 
        /* stop device's busmaster DMA activity */
-       iwl_set_bit(trans, CSR_RESET, CSR_RESET_REG_FLAG_STOP_MASTER);
+       iwl_set_bit(trans, trans->cfg->csr->addr_sw_reset,
+                   BIT(trans->cfg->csr->flag_stop_master));
 
-       ret = iwl_poll_bit(trans, CSR_RESET,
-                          CSR_RESET_REG_FLAG_MASTER_DISABLED,
-                          CSR_RESET_REG_FLAG_MASTER_DISABLED, 100);
+       ret = iwl_poll_bit(trans, trans->cfg->csr->addr_sw_reset,
+                          BIT(trans->cfg->csr->flag_master_dis),
+                          BIT(trans->cfg->csr->flag_master_dis), 100);
        if (ret < 0)
                IWL_WARN(trans, "Master Disable Timed Out, 100 usec\n");
 
@@ -594,7 +602,7 @@ static void iwl_pcie_apm_stop(struct iwl_trans *trans, bool op_mode_leave)
         * D0A* (powered-up Active) --> D0U* (Uninitialized) state.
         */
        iwl_clear_bit(trans, CSR_GP_CNTRL,
-                     CSR_GP_CNTRL_REG_FLAG_INIT_DONE);
+                     BIT(trans->cfg->csr->flag_init_done));
 }
 
 static int iwl_pcie_nic_init(struct iwl_trans *trans)
@@ -1267,7 +1275,7 @@ static void _iwl_trans_pcie_stop_device(struct iwl_trans *trans, bool low_power)
 
        /* Make sure (redundant) we've released our request to stay awake */
        iwl_clear_bit(trans, CSR_GP_CNTRL,
-                     CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ);
+                     BIT(trans->cfg->csr->flag_mac_access_req));
 
        /* Stop the device, and put it in low power state */
        iwl_pcie_apm_stop(trans, false);
@@ -1497,9 +1505,9 @@ static void iwl_trans_pcie_d3_suspend(struct iwl_trans *trans, bool test,
        iwl_pcie_synchronize_irqs(trans);
 
        iwl_clear_bit(trans, CSR_GP_CNTRL,
-                     CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ);
+                     BIT(trans->cfg->csr->flag_mac_access_req));
        iwl_clear_bit(trans, CSR_GP_CNTRL,
-                     CSR_GP_CNTRL_REG_FLAG_INIT_DONE);
+                     BIT(trans->cfg->csr->flag_init_done));
 
        iwl_pcie_enable_rx_wake(trans, false);
 
@@ -1543,15 +1551,17 @@ static int iwl_trans_pcie_d3_resume(struct iwl_trans *trans,
                iwl_pcie_reset_ict(trans);
        iwl_enable_interrupts(trans);
 
-       iwl_set_bit(trans, CSR_GP_CNTRL, CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ);
-       iwl_set_bit(trans, CSR_GP_CNTRL, CSR_GP_CNTRL_REG_FLAG_INIT_DONE);
+       iwl_set_bit(trans, CSR_GP_CNTRL,
+                   BIT(trans->cfg->csr->flag_mac_access_req));
+       iwl_set_bit(trans, CSR_GP_CNTRL,
+                   BIT(trans->cfg->csr->flag_init_done));
 
        if (trans->cfg->device_family >= IWL_DEVICE_FAMILY_8000)
                udelay(2);
 
        ret = iwl_poll_bit(trans, CSR_GP_CNTRL,
-                          CSR_GP_CNTRL_REG_FLAG_MAC_CLOCK_READY,
-                          CSR_GP_CNTRL_REG_FLAG_MAC_CLOCK_READY,
+                          BIT(trans->cfg->csr->flag_mac_clock_ready),
+                          BIT(trans->cfg->csr->flag_mac_clock_ready),
                           25000);
        if (ret < 0) {
                IWL_ERR(trans, "Failed to resume the device (mac ready)\n");
@@ -1562,7 +1572,7 @@ static int iwl_trans_pcie_d3_resume(struct iwl_trans *trans,
 
        if (!reset) {
                iwl_clear_bit(trans, CSR_GP_CNTRL,
-                             CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ);
+                             BIT(trans->cfg->csr->flag_mac_access_req));
        } else {
                iwl_trans_pcie_tx_reset(trans);
 
@@ -1926,6 +1936,29 @@ static void iwl_trans_pcie_set_pmi(struct iwl_trans *trans, bool state)
                clear_bit(STATUS_TPOWER_PMI, &trans->status);
 }
 
+struct iwl_trans_pcie_removal {
+       struct pci_dev *pdev;
+       struct work_struct work;
+};
+
+static void iwl_trans_pcie_removal_wk(struct work_struct *wk)
+{
+       struct iwl_trans_pcie_removal *removal =
+               container_of(wk, struct iwl_trans_pcie_removal, work);
+       struct pci_dev *pdev = removal->pdev;
+       char *prop[] = {"EVENT=INACCESSIBLE", NULL};
+
+       dev_err(&pdev->dev, "Device gone - attempting removal\n");
+       kobject_uevent_env(&pdev->dev.kobj, KOBJ_CHANGE, prop);
+       pci_lock_rescan_remove();
+       pci_dev_put(pdev);
+       pci_stop_and_remove_bus_device(pdev);
+       pci_unlock_rescan_remove();
+
+       kfree(removal);
+       module_put(THIS_MODULE);
+}
+
 static bool iwl_trans_pcie_grab_nic_access(struct iwl_trans *trans,
                                           unsigned long *flags)
 {
@@ -1939,7 +1972,7 @@ static bool iwl_trans_pcie_grab_nic_access(struct iwl_trans *trans,
 
        /* this bit wakes up the NIC */
        __iwl_trans_pcie_set_bit(trans, CSR_GP_CNTRL,
-                                CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ);
+                                BIT(trans->cfg->csr->flag_mac_access_req));
        if (trans->cfg->device_family >= IWL_DEVICE_FAMILY_8000)
                udelay(2);
 
@@ -1964,15 +1997,59 @@ static bool iwl_trans_pcie_grab_nic_access(struct iwl_trans *trans,
         * and do not save/restore SRAM when power cycling.
         */
        ret = iwl_poll_bit(trans, CSR_GP_CNTRL,
-                          CSR_GP_CNTRL_REG_VAL_MAC_ACCESS_EN,
-                          (CSR_GP_CNTRL_REG_FLAG_MAC_CLOCK_READY |
+                          BIT(trans->cfg->csr->flag_val_mac_access_en),
+                          (BIT(trans->cfg->csr->flag_mac_clock_ready) |
                            CSR_GP_CNTRL_REG_FLAG_GOING_TO_SLEEP), 15000);
        if (unlikely(ret < 0)) {
-               iwl_trans_pcie_dump_regs(trans);
-               iwl_write32(trans, CSR_RESET, CSR_RESET_REG_FLAG_FORCE_NMI);
+               u32 cntrl = iwl_read32(trans, CSR_GP_CNTRL);
+
                WARN_ONCE(1,
                          "Timeout waiting for hardware access (CSR_GP_CNTRL 0x%08x)\n",
-                         iwl_read32(trans, CSR_GP_CNTRL));
+                         cntrl);
+
+               iwl_trans_pcie_dump_regs(trans);
+
+               if (iwlwifi_mod_params.remove_when_gone && cntrl == ~0U) {
+                       struct iwl_trans_pcie_removal *removal;
+
+                       if (trans_pcie->scheduled_for_removal)
+                               goto err;
+
+                       IWL_ERR(trans, "Device gone - scheduling removal!\n");
+
+                       /*
+                        * get a module reference to avoid doing this
+                        * while unloading anyway and to avoid
+                        * scheduling a work with code that's being
+                        * removed.
+                        */
+                       if (!try_module_get(THIS_MODULE)) {
+                               IWL_ERR(trans,
+                                       "Module is being unloaded - abort\n");
+                               goto err;
+                       }
+
+                       removal = kzalloc(sizeof(*removal), GFP_ATOMIC);
+                       if (!removal) {
+                               module_put(THIS_MODULE);
+                               goto err;
+                       }
+                       /*
+                        * we don't need to clear this flag, because
+                        * the trans will be freed and reallocated.
+                       */
+                       trans_pcie->scheduled_for_removal = true;
+
+                       removal->pdev = to_pci_dev(trans->dev);
+                       INIT_WORK(&removal->work, iwl_trans_pcie_removal_wk);
+                       pci_dev_get(removal->pdev);
+                       schedule_work(&removal->work);
+               } else {
+                       iwl_write32(trans, CSR_RESET,
+                                   CSR_RESET_REG_FLAG_FORCE_NMI);
+               }
+
+err:
                spin_unlock_irqrestore(&trans_pcie->reg_lock, *flags);
                return false;
        }
@@ -2003,7 +2080,7 @@ static void iwl_trans_pcie_release_nic_access(struct iwl_trans *trans,
                goto out;
 
        __iwl_trans_pcie_clear_bit(trans, CSR_GP_CNTRL,
-                                  CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ);
+                                  BIT(trans->cfg->csr->flag_mac_access_req));
        /*
         * Above we read the CSR_GP_CNTRL register, which will flush
         * any previous writes, but we need the write that clears the
@@ -3232,12 +3309,12 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev,
                 * id located at the AUX bus MISC address space.
                 */
                iwl_set_bit(trans, CSR_GP_CNTRL,
-                           CSR_GP_CNTRL_REG_FLAG_INIT_DONE);
+                           BIT(trans->cfg->csr->flag_init_done));
                udelay(2);
 
                ret = iwl_poll_bit(trans, CSR_GP_CNTRL,
-                                  CSR_GP_CNTRL_REG_FLAG_MAC_CLOCK_READY,
-                                  CSR_GP_CNTRL_REG_FLAG_MAC_CLOCK_READY,
+                                  BIT(trans->cfg->csr->flag_mac_clock_ready),
+                                  BIT(trans->cfg->csr->flag_mac_clock_ready),
                                   25000);
                if (ret < 0) {
                        IWL_DEBUG_INFO(trans, "Failed to wake up the nic\n");
index cf468b9f5a82fc4351ef34b0dcd9de2c7e048575..473fe7ccb07c40fe70ebe966bd8dd5d22c2e2d9f 100644 (file)
@@ -3,6 +3,7 @@
  * Copyright(c) 2003 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
+ * Copyright(c) 2018 Intel Corporation
  *
  * Portions of this file are derived from the ipw3945 project, as well
  * as portions of the ieee80211 subsystem header files.
@@ -273,7 +274,7 @@ static void iwl_pcie_txq_inc_wr_ptr(struct iwl_trans *trans,
                        IWL_DEBUG_INFO(trans, "Tx queue %d requesting wakeup, GP1 = 0x%x\n",
                                       txq_id, reg);
                        iwl_set_bit(trans, CSR_GP_CNTRL,
-                                   CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ);
+                                   BIT(trans->cfg->csr->flag_mac_access_req));
                        txq->need_update = true;
                        return;
                }
@@ -611,7 +612,7 @@ static void iwl_pcie_clear_cmd_in_flight(struct iwl_trans *trans)
 
        trans_pcie->cmd_hold_nic_awake = false;
        __iwl_trans_pcie_clear_bit(trans, CSR_GP_CNTRL,
-                                  CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ);
+                                  BIT(trans->cfg->csr->flag_mac_access_req));
 }
 
 /*
@@ -1171,6 +1172,7 @@ static int iwl_pcie_set_cmd_in_flight(struct iwl_trans *trans,
                                      const struct iwl_host_cmd *cmd)
 {
        struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+       const struct iwl_cfg *cfg = trans->cfg;
        int ret;
 
        lockdep_assert_held(&trans_pcie->reg_lock);
@@ -1188,19 +1190,19 @@ static int iwl_pcie_set_cmd_in_flight(struct iwl_trans *trans,
         * returned. This needs to be done only on NICs that have
         * apmg_wake_up_wa set.
         */
-       if (trans->cfg->base_params->apmg_wake_up_wa &&
+       if (cfg->base_params->apmg_wake_up_wa &&
            !trans_pcie->cmd_hold_nic_awake) {
                __iwl_trans_pcie_set_bit(trans, CSR_GP_CNTRL,
-                                        CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ);
+                                        BIT(cfg->csr->flag_mac_access_req));
 
                ret = iwl_poll_bit(trans, CSR_GP_CNTRL,
-                                  CSR_GP_CNTRL_REG_VAL_MAC_ACCESS_EN,
-                                  (CSR_GP_CNTRL_REG_FLAG_MAC_CLOCK_READY |
+                                  BIT(cfg->csr->flag_val_mac_access_en),
+                                  (BIT(cfg->csr->flag_mac_clock_ready) |
                                    CSR_GP_CNTRL_REG_FLAG_GOING_TO_SLEEP),
                                   15000);
                if (ret < 0) {
                        __iwl_trans_pcie_clear_bit(trans, CSR_GP_CNTRL,
-                                       CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ);
+                                       BIT(cfg->csr->flag_mac_access_req));
                        IWL_ERR(trans, "Failed to wake NIC for hcmd\n");
                        return -EIO;
                }
@@ -2395,7 +2397,13 @@ int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb,
                goto out_err;
        iwl_pcie_txq_build_tfd(trans, txq, tb1_phys, tb1_len, false);
 
-       if (amsdu) {
+       /*
+        * If gso_size wasn't set, don't give the frame "amsdu treatment"
+        * (adding subframes, etc.).
+        * This can happen in some testing flows when the amsdu was already
+        * pre-built, and we just need to send the resulting skb.
+        */
+       if (amsdu && skb_shinfo(skb)->gso_size) {
                if (unlikely(iwl_fill_data_tbs_amsdu(trans, skb, txq, hdr_len,
                                                     out_meta, dev_cmd,
                                                     tb1_len)))
index 96d26cfae90bd2acf55854f4818f11d4b14f2f5a..4a017a0d71ea47ae270119593f4c9e1db3632513 100644 (file)
@@ -3236,6 +3236,7 @@ static int hwsim_new_radio_nl(struct sk_buff *msg, struct genl_info *info)
                        GENL_SET_ERR_MSG(info,"MAC is no valid source addr");
                        NL_SET_BAD_ATTR(info->extack,
                                        info->attrs[HWSIM_ATTR_PERM_ADDR]);
+                       kfree(hwname);
                        return -EINVAL;
                }
 
index 4857b75e54a75e9f99cb5d6b94b946f1d3f2f3e0..54a2297010d2cab5501a6c2f94e447df0e8a2671 100644 (file)
@@ -929,7 +929,7 @@ mwifiex_init_new_priv_params(struct mwifiex_private *priv,
        adapter->rx_locked = false;
        spin_unlock_irqrestore(&adapter->rx_proc_lock, flags);
 
-       mwifiex_set_mac_address(priv, dev);
+       mwifiex_set_mac_address(priv, dev, false, NULL);
 
        return 0;
 }
@@ -2979,7 +2979,7 @@ struct wireless_dev *mwifiex_add_virtual_intf(struct wiphy *wiphy,
        priv->netdev = dev;
 
        if (!adapter->mfg_mode) {
-               mwifiex_set_mac_address(priv, dev);
+               mwifiex_set_mac_address(priv, dev, false, NULL);
 
                ret = mwifiex_send_cmd(priv, HostCmd_CMD_SET_BSS_MODE,
                                       HostCmd_ACT_GEN_SET, 0, NULL, true);
index 7014f440e6f8e86c9d4e26b46edfaff1eee86f82..9cfcdf6bec52c1b24fe67001b77a369a5f7a7ad2 100644 (file)
@@ -25,7 +25,6 @@
 #include "main.h"
 #include "wmm.h"
 #include "11n.h"
-#include "11ac.h"
 
 static void mwifiex_cancel_pending_ioctl(struct mwifiex_adapter *adapter);
 
index b6484582845a61c9dbb1b57381287cb67fd1a7fa..510f6b8e717d7f52eb2cdbb6e334e062f49053af 100644 (file)
@@ -858,7 +858,7 @@ mwifiex_clone_skb_for_tx_status(struct mwifiex_private *priv,
 /*
  * CFG802.11 network device handler for data transmission.
  */
-static int
+static netdev_tx_t
 mwifiex_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
        struct mwifiex_private *priv = mwifiex_netdev_get_priv(dev);
@@ -940,28 +940,32 @@ mwifiex_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 }
 
 int mwifiex_set_mac_address(struct mwifiex_private *priv,
-                           struct net_device *dev)
+                           struct net_device *dev, bool external,
+                           u8 *new_mac)
 {
        int ret;
        u64 mac_addr, old_mac_addr;
 
-       if (priv->bss_type == MWIFIEX_BSS_TYPE_ANY)
-               return -ENOTSUPP;
+       old_mac_addr = ether_addr_to_u64(priv->curr_addr);
 
-       mac_addr = ether_addr_to_u64(priv->curr_addr);
-       old_mac_addr = mac_addr;
+       if (external) {
+               mac_addr = ether_addr_to_u64(new_mac);
+       } else {
+               /* Internal mac address change */
+               if (priv->bss_type == MWIFIEX_BSS_TYPE_ANY)
+                       return -ENOTSUPP;
 
-       if (priv->bss_type == MWIFIEX_BSS_TYPE_P2P)
-               mac_addr |= BIT_ULL(MWIFIEX_MAC_LOCAL_ADMIN_BIT);
+               mac_addr = old_mac_addr;
 
-       if (mwifiex_get_intf_num(priv->adapter, priv->bss_type) > 1) {
-               /* Set mac address based on bss_type/bss_num */
-               mac_addr ^= BIT_ULL(priv->bss_type + 8);
-               mac_addr += priv->bss_num;
-       }
+               if (priv->bss_type == MWIFIEX_BSS_TYPE_P2P)
+                       mac_addr |= BIT_ULL(MWIFIEX_MAC_LOCAL_ADMIN_BIT);
 
-       if (mac_addr == old_mac_addr)
-               goto done;
+               if (mwifiex_get_intf_num(priv->adapter, priv->bss_type) > 1) {
+                       /* Set mac address based on bss_type/bss_num */
+                       mac_addr ^= BIT_ULL(priv->bss_type + 8);
+                       mac_addr += priv->bss_num;
+               }
+       }
 
        u64_to_ether_addr(mac_addr, priv->curr_addr);
 
@@ -976,7 +980,6 @@ int mwifiex_set_mac_address(struct mwifiex_private *priv,
                return ret;
        }
 
-done:
        ether_addr_copy(dev->dev_addr, priv->curr_addr);
        return 0;
 }
@@ -989,8 +992,7 @@ mwifiex_ndo_set_mac_address(struct net_device *dev, void *addr)
        struct mwifiex_private *priv = mwifiex_netdev_get_priv(dev);
        struct sockaddr *hw_addr = addr;
 
-       memcpy(priv->curr_addr, hw_addr->sa_data, ETH_ALEN);
-       return mwifiex_set_mac_address(priv, dev);
+       return mwifiex_set_mac_address(priv, dev, true, hw_addr->sa_data);
 }
 
 /*
@@ -1331,7 +1333,10 @@ void mwifiex_init_priv_params(struct mwifiex_private *priv,
        priv->assocresp_idx = MWIFIEX_AUTO_IDX_MASK;
        priv->gen_idx = MWIFIEX_AUTO_IDX_MASK;
        priv->num_tx_timeout = 0;
-       ether_addr_copy(priv->curr_addr, priv->adapter->perm_addr);
+       if (is_valid_ether_addr(dev->dev_addr))
+               ether_addr_copy(priv->curr_addr, dev->dev_addr);
+       else
+               ether_addr_copy(priv->curr_addr, priv->adapter->perm_addr);
 
        if (GET_BSS_ROLE(priv) == MWIFIEX_BSS_ROLE_STA ||
            GET_BSS_ROLE(priv) == MWIFIEX_BSS_ROLE_UAP) {
index 9bde181700dc25de9a6303f749b91383454fa41d..8ae74ed78805f8b367fd05f398e8a11964436d94 100644 (file)
@@ -84,8 +84,8 @@ enum {
 #define MWIFIEX_TIMER_10S                      10000
 #define MWIFIEX_TIMER_1S                       1000
 
-#define MAX_TX_PENDING      100
-#define LOW_TX_PENDING      80
+#define MAX_TX_PENDING      400
+#define LOW_TX_PENDING      380
 
 #define HIGH_RX_PENDING     50
 #define LOW_RX_PENDING      20
@@ -1709,7 +1709,8 @@ void mwifiex_process_multi_chan_event(struct mwifiex_private *priv,
                                      struct sk_buff *event_skb);
 void mwifiex_multi_chan_resync(struct mwifiex_adapter *adapter);
 int mwifiex_set_mac_address(struct mwifiex_private *priv,
-                           struct net_device *dev);
+                           struct net_device *dev,
+                           bool external, u8 *new_mac);
 void mwifiex_devdump_tmo_func(unsigned long function_context);
 
 #ifdef CONFIG_DEBUG_FS
index e8c8728db15aa37cf62a63abef0264edb3894ba1..e86217a6b9ca193a49764824540ba2672a5d6749 100644 (file)
@@ -108,7 +108,7 @@ int mwifiex_process_uap_event(struct mwifiex_private *priv)
        struct mwifiex_adapter *adapter = priv->adapter;
        int len, i;
        u32 eventcause = adapter->event_cause;
-       struct station_info sinfo;
+       struct station_info *sinfo;
        struct mwifiex_assoc_event *event;
        struct mwifiex_sta_node *node;
        u8 *deauth_mac;
@@ -117,7 +117,10 @@ int mwifiex_process_uap_event(struct mwifiex_private *priv)
 
        switch (eventcause) {
        case EVENT_UAP_STA_ASSOC:
-               memset(&sinfo, 0, sizeof(sinfo));
+               sinfo = kzalloc(sizeof(*sinfo), GFP_KERNEL);
+               if (!sinfo)
+                       return -ENOMEM;
+
                event = (struct mwifiex_assoc_event *)
                        (adapter->event_body + MWIFIEX_UAP_EVENT_EXTRA_HEADER);
                if (le16_to_cpu(event->type) == TLV_TYPE_UAP_MGMT_FRAME) {
@@ -132,28 +135,31 @@ int mwifiex_process_uap_event(struct mwifiex_private *priv)
                                len = ETH_ALEN;
 
                        if (len != -1) {
-                               sinfo.assoc_req_ies = &event->data[len];
-                               len = (u8 *)sinfo.assoc_req_ies -
+                               sinfo->assoc_req_ies = &event->data[len];
+                               len = (u8 *)sinfo->assoc_req_ies -
                                      (u8 *)&event->frame_control;
-                               sinfo.assoc_req_ies_len =
+                               sinfo->assoc_req_ies_len =
                                        le16_to_cpu(event->len) - (u16)len;
                        }
                }
-               cfg80211_new_sta(priv->netdev, event->sta_addr, &sinfo,
+               cfg80211_new_sta(priv->netdev, event->sta_addr, sinfo,
                                 GFP_KERNEL);
 
                node = mwifiex_add_sta_entry(priv, event->sta_addr);
                if (!node) {
                        mwifiex_dbg(adapter, ERROR,
                                    "could not create station entry!\n");
+                       kfree(sinfo);
                        return -1;
                }
 
-               if (!priv->ap_11n_enabled)
+               if (!priv->ap_11n_enabled) {
+                       kfree(sinfo);
                        break;
+               }
 
-               mwifiex_set_sta_ht_cap(priv, sinfo.assoc_req_ies,
-                                      sinfo.assoc_req_ies_len, node);
+               mwifiex_set_sta_ht_cap(priv, sinfo->assoc_req_ies,
+                                      sinfo->assoc_req_ies_len, node);
 
                for (i = 0; i < MAX_NUM_TID; i++) {
                        if (node->is_11n_enabled)
@@ -163,6 +169,7 @@ int mwifiex_process_uap_event(struct mwifiex_private *priv)
                                node->ampdu_sta[i] = BA_STREAM_NOT_ALLOWED;
                }
                memset(node->rx_seq, 0xff, sizeof(node->rx_seq));
+               kfree(sinfo);
                break;
        case EVENT_UAP_STA_DEAUTH:
                deauth_mac = adapter->event_body +
index dbf4057d2d3e163357c6d21d17e266dda1282890..b67acc6189bf391f7bc6241fa19d2b775fe26bf3 100644 (file)
@@ -103,6 +103,7 @@ mt76_rx_aggr_reorder_work(struct work_struct *work)
        __skb_queue_head_init(&frames);
 
        local_bh_disable();
+       rcu_read_lock();
 
        spin_lock(&tid->lock);
        mt76_rx_aggr_check_release(tid, &frames);
@@ -114,6 +115,7 @@ mt76_rx_aggr_reorder_work(struct work_struct *work)
                                             REORDER_TIMEOUT);
        mt76_rx_complete(dev, &frames, -1);
 
+       rcu_read_unlock();
        local_bh_enable();
 }
 
@@ -266,6 +268,8 @@ static void mt76_rx_aggr_shutdown(struct mt76_dev *dev, struct mt76_rx_tid *tid)
        u8 size = tid->size;
        int i;
 
+       cancel_delayed_work(&tid->reorder_work);
+
        spin_lock_bh(&tid->lock);
 
        tid->stopped = true;
@@ -280,8 +284,6 @@ static void mt76_rx_aggr_shutdown(struct mt76_dev *dev, struct mt76_rx_tid *tid)
        }
 
        spin_unlock_bh(&tid->lock);
-
-       cancel_delayed_work(&tid->reorder_work);
 }
 
 void mt76_rx_aggr_stop(struct mt76_dev *dev, struct mt76_wcid *wcid, u8 tidno)
index eb49e0a6758c063106eb775d405d0818970d23ba..915e617331313a77b3ac37e5e6b164d4678b41f9 100644 (file)
@@ -561,6 +561,7 @@ void mt76_rx_complete(struct mt76_dev *dev, struct sk_buff_head *frames,
        if (queue >= 0)
            napi = &dev->napi[queue];
 
+       spin_lock(&dev->rx_lock);
        while ((skb = __skb_dequeue(frames)) != NULL) {
                if (mt76_check_ccmp_pn(skb)) {
                        dev_kfree_skb(skb);
@@ -570,6 +571,7 @@ void mt76_rx_complete(struct mt76_dev *dev, struct sk_buff_head *frames,
                sta = mt76_rx_convert(skb);
                ieee80211_rx_napi(dev->hw, sta, skb, napi);
        }
+       spin_unlock(&dev->rx_lock);
 }
 
 void mt76_rx_poll_complete(struct mt76_dev *dev, enum mt76_rxq_id q)
index 065ff78059c38948a389d8c9434354b53dcb7e5c..a74e6eef51e97088400bebfa84648f3d469a87a2 100644 (file)
@@ -241,6 +241,7 @@ struct mt76_dev {
        struct device *dev;
 
        struct net_device napi_dev;
+       spinlock_t rx_lock;
        struct napi_struct napi[__MT_RXQ_MAX];
        struct sk_buff_head rx_skb[__MT_RXQ_MAX];
 
index 783b8122ec3c9b0121029bf5f294ae45f794ac70..a5d1255e4b9c8319d583043ea67a756fd3843d8f 100644 (file)
@@ -117,7 +117,6 @@ struct mt76x2_dev {
        u8 beacon_mask;
        u8 beacon_data_mask;
 
-       u32 rev;
        u32 rxfilter;
 
        u16 chainmask;
@@ -151,7 +150,7 @@ struct mt76x2_sta {
 
 static inline bool is_mt7612(struct mt76x2_dev *dev)
 {
-       return (dev->rev >> 16) == 0x7612;
+       return mt76_chip(&dev->mt76) == 0x7612;
 }
 
 void mt76x2_set_irq_mask(struct mt76x2_dev *dev, u32 clear, u32 set);
index 359b105235b3ac15111886e9b612cb3f66f93566..dd4c1127797efcf4b557c8bad3843cdcac776278 100644 (file)
@@ -370,12 +370,12 @@ void mt76x2_mac_stop(struct mt76x2_dev *dev, bool force)
 
        /* Wait for MAC to become idle */
        for (i = 0; i < 300; i++) {
-               if (mt76_rr(dev, MT_MAC_STATUS) &
-                   (MT_MAC_STATUS_RX | MT_MAC_STATUS_TX))
-                       continue;
-
-               if (mt76_rr(dev, MT_BBP(IBI, 12)))
+               if ((mt76_rr(dev, MT_MAC_STATUS) &
+                    (MT_MAC_STATUS_RX | MT_MAC_STATUS_TX)) ||
+                   mt76_rr(dev, MT_BBP(IBI, 12))) {
+                       usleep_range(10, 20);
                        continue;
+               }
 
                stopped = true;
                break;
@@ -645,6 +645,7 @@ struct mt76x2_dev *mt76x2_alloc_device(struct device *pdev)
        dev->mt76.drv = &drv_ops;
        mutex_init(&dev->mutex);
        spin_lock_init(&dev->irq_lock);
+       spin_lock_init(&dev->mt76.rx_lock);
 
        return dev;
 }
index d183156525837bb7448090c451197e67a11fc158..dab7137560044c6c34ba433b8615be4e1ce5863c 100644 (file)
@@ -410,7 +410,6 @@ mt76x2_mac_process_tx_rate(struct ieee80211_tx_rate *txrate, u16 rate,
                break;
        default:
                return -EINVAL;
-               break;
        }
 
        if (rate & MT_RXWI_RATE_SGI)
index 038d1fe6c72670e3084a70fda6859cbdc5f20356..c1c38ca3330a0c62d8b4bdec33ee2853b7e283ab 100644 (file)
@@ -508,8 +508,10 @@ mt76x2_phy_update_channel_gain(struct mt76x2_dev *dev)
        u8 gain_delta;
        int low_gain;
 
-       dev->cal.avg_rssi[0] = (dev->cal.avg_rssi[0] * 15) / 16 + (rssi0 << 8);
-       dev->cal.avg_rssi[1] = (dev->cal.avg_rssi[1] * 15) / 16 + (rssi1 << 8);
+       dev->cal.avg_rssi[0] = (dev->cal.avg_rssi[0] * 15) / 16 +
+                              (rssi0 << 8) / 16;
+       dev->cal.avg_rssi[1] = (dev->cal.avg_rssi[1] * 15) / 16 +
+                              (rssi1 << 8) / 16;
        dev->cal.avg_rssi_all = (dev->cal.avg_rssi[0] +
                                 dev->cal.avg_rssi[1]) / 512;
 
index 4eef69bd8a9eadb8b1d5d139e733f2a45796ba33..7ecd2d7c5db4211c77bc936130980bf6c8378b06 100644 (file)
@@ -385,6 +385,10 @@ mt76_txq_schedule_list(struct mt76_dev *dev, struct mt76_queue *hwq)
                bool empty = false;
                int cur;
 
+               if (test_bit(MT76_SCANNING, &dev->state) ||
+                   test_bit(MT76_RESET, &dev->state))
+                       return -EBUSY;
+
                mtxq = list_first_entry(&hwq->swq, struct mt76_txq, list);
                if (mtxq->send_bar && mtxq->aggr) {
                        struct ieee80211_txq *txq = mtxq_to_txq(mtxq);
@@ -422,12 +426,14 @@ void mt76_txq_schedule(struct mt76_dev *dev, struct mt76_queue *hwq)
 {
        int len;
 
+       rcu_read_lock();
        do {
                if (hwq->swq_queued >= 4 || list_empty(&hwq->swq))
                        break;
 
                len = mt76_txq_schedule_list(dev, hwq);
        } while (len > 0);
+       rcu_read_unlock();
 }
 EXPORT_SYMBOL_GPL(mt76_txq_schedule);
 
index cf26c15a84f8e299ef6d6fe28acec7bd002daa65..b3bfb4faa9186b2251bc84e9016e5c42e326993f 100644 (file)
@@ -76,7 +76,7 @@ static int qtnf_netdev_close(struct net_device *ndev)
 
 /* Netdev handler for data transmission.
  */
-static int
+static netdev_tx_t
 qtnf_netdev_hard_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 {
        struct qtnf_vif *vif;
index cb2a6c12f870a50c8fed2175c8bba46d7568271a..16617c44f81b59c7aa52bb188fb6e302e4ba6b88 100644 (file)
@@ -34,12 +34,13 @@ qtnf_event_handle_sta_assoc(struct qtnf_wmac *mac, struct qtnf_vif *vif,
 {
        const u8 *sta_addr;
        u16 frame_control;
-       struct station_info sinfo = { 0 };
+       struct station_info *sinfo;
        size_t payload_len;
        u16 tlv_type;
        u16 tlv_value_len;
        size_t tlv_full_len;
        const struct qlink_tlv_hdr *tlv;
+       int ret = 0;
 
        if (unlikely(len < sizeof(*sta_assoc))) {
                pr_err("VIF%u.%u: payload is too short (%u < %zu)\n",
@@ -53,6 +54,10 @@ qtnf_event_handle_sta_assoc(struct qtnf_wmac *mac, struct qtnf_vif *vif,
                return -EPROTO;
        }
 
+       sinfo = kzalloc(sizeof(*sinfo), GFP_KERNEL);
+       if (!sinfo)
+               return -ENOMEM;
+
        sta_addr = sta_assoc->sta_addr;
        frame_control = le16_to_cpu(sta_assoc->frame_control);
 
@@ -61,9 +66,9 @@ qtnf_event_handle_sta_assoc(struct qtnf_wmac *mac, struct qtnf_vif *vif,
 
        qtnf_sta_list_add(vif, sta_addr);
 
-       sinfo.assoc_req_ies = NULL;
-       sinfo.assoc_req_ies_len = 0;
-       sinfo.generation = vif->generation;
+       sinfo->assoc_req_ies = NULL;
+       sinfo->assoc_req_ies_len = 0;
+       sinfo->generation = vif->generation;
 
        payload_len = len - sizeof(*sta_assoc);
        tlv = (const struct qlink_tlv_hdr *)sta_assoc->ies;
@@ -73,23 +78,27 @@ qtnf_event_handle_sta_assoc(struct qtnf_wmac *mac, struct qtnf_vif *vif,
                tlv_value_len = le16_to_cpu(tlv->len);
                tlv_full_len = tlv_value_len + sizeof(struct qlink_tlv_hdr);
 
-               if (tlv_full_len > payload_len)
-                       return -EINVAL;
+               if (tlv_full_len > payload_len) {
+                       ret = -EINVAL;
+                       goto out;
+               }
 
                if (tlv_type == QTN_TLV_ID_IE_SET) {
                        const struct qlink_tlv_ie_set *ie_set;
                        unsigned int ie_len;
 
-                       if (payload_len < sizeof(*ie_set))
-                               return -EINVAL;
+                       if (payload_len < sizeof(*ie_set)) {
+                               ret = -EINVAL;
+                               goto out;
+                       }
 
                        ie_set = (const struct qlink_tlv_ie_set *)tlv;
                        ie_len = tlv_value_len -
                                (sizeof(*ie_set) - sizeof(ie_set->hdr));
 
                        if (ie_set->type == QLINK_IE_SET_ASSOC_REQ && ie_len) {
-                               sinfo.assoc_req_ies = ie_set->ie_data;
-                               sinfo.assoc_req_ies_len = ie_len;
+                               sinfo->assoc_req_ies = ie_set->ie_data;
+                               sinfo->assoc_req_ies_len = ie_len;
                        }
                }
 
@@ -97,13 +106,17 @@ qtnf_event_handle_sta_assoc(struct qtnf_wmac *mac, struct qtnf_vif *vif,
                tlv = (struct qlink_tlv_hdr *)(tlv->val + tlv_value_len);
        }
 
-       if (payload_len)
-               return -EINVAL;
+       if (payload_len) {
+               ret = -EINVAL;
+               goto out;
+       }
 
-       cfg80211_new_sta(vif->netdev, sta_assoc->sta_addr, &sinfo,
+       cfg80211_new_sta(vif->netdev, sta_assoc->sta_addr, sinfo,
                         GFP_KERNEL);
 
-       return 0;
+out:
+       kfree(sinfo);
+       return ret;
 }
 
 static int
index 7a133e94b1bf4738f0b1b76462b63a4d8cc17c52..b05ed2f3025a7734c3826856cef1ee76dc1f2906 100644 (file)
 #define TX_PWR_CFG_3_MCS13             FIELD32(0x000000f0)
 #define TX_PWR_CFG_3_MCS14             FIELD32(0x00000f00)
 #define TX_PWR_CFG_3_MCS15             FIELD32(0x0000f000)
-#define TX_PWR_CFG_3_UKNOWN1           FIELD32(0x000f0000)
-#define TX_PWR_CFG_3_UKNOWN2           FIELD32(0x00f00000)
-#define TX_PWR_CFG_3_UKNOWN3           FIELD32(0x0f000000)
-#define TX_PWR_CFG_3_UKNOWN4           FIELD32(0xf0000000)
+#define TX_PWR_CFG_3_UNKNOWN1          FIELD32(0x000f0000)
+#define TX_PWR_CFG_3_UNKNOWN2          FIELD32(0x00f00000)
+#define TX_PWR_CFG_3_UNKNOWN3          FIELD32(0x0f000000)
+#define TX_PWR_CFG_3_UNKNOWN4          FIELD32(0xf0000000)
 /* bits for 3T devices */
 #define TX_PWR_CFG_3_MCS12_CH0         FIELD32(0x0000000f)
 #define TX_PWR_CFG_3_MCS12_CH1         FIELD32(0x000000f0)
  * TX_PWR_CFG_4:
  */
 #define TX_PWR_CFG_4                   0x1324
-#define TX_PWR_CFG_4_UKNOWN5           FIELD32(0x0000000f)
-#define TX_PWR_CFG_4_UKNOWN6           FIELD32(0x000000f0)
-#define TX_PWR_CFG_4_UKNOWN7           FIELD32(0x00000f00)
-#define TX_PWR_CFG_4_UKNOWN8           FIELD32(0x0000f000)
+#define TX_PWR_CFG_4_UNKNOWN5          FIELD32(0x0000000f)
+#define TX_PWR_CFG_4_UNKNOWN6          FIELD32(0x000000f0)
+#define TX_PWR_CFG_4_UNKNOWN7          FIELD32(0x00000f00)
+#define TX_PWR_CFG_4_UNKNOWN8          FIELD32(0x0000f000)
 /* bits for 3T devices */
 #define TX_PWR_CFG_4_STBC4_CH0         FIELD32(0x0000000f)
 #define TX_PWR_CFG_4_STBC4_CH1         FIELD32(0x000000f0)
index e827dc522580f4a38b0820dcabc519a6fe9b9f4c..a567bc273ffc6c725473710ecea9c82e5076284e 100644 (file)
@@ -1557,12 +1557,13 @@ static void rt2800_set_max_psdu_len(struct rt2x00_dev *rt2x00dev)
        rt2800_register_write(rt2x00dev, MAX_LEN_CFG, reg);
 }
 
-int rt2800_sta_add(struct rt2x00_dev *rt2x00dev, struct ieee80211_vif *vif,
+int rt2800_sta_add(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
                   struct ieee80211_sta *sta)
 {
-       int wcid;
-       struct rt2x00_sta *sta_priv = sta_to_rt2x00_sta(sta);
+       struct rt2x00_dev *rt2x00dev = hw->priv;
        struct rt2800_drv_data *drv_data = rt2x00dev->drv_data;
+       struct rt2x00_sta *sta_priv = sta_to_rt2x00_sta(sta);
+       int wcid;
 
        /*
         * Limit global maximum TX AMPDU length to smallest value of all
@@ -1608,8 +1609,10 @@ int rt2800_sta_add(struct rt2x00_dev *rt2x00dev, struct ieee80211_vif *vif,
 }
 EXPORT_SYMBOL_GPL(rt2800_sta_add);
 
-int rt2800_sta_remove(struct rt2x00_dev *rt2x00dev, struct ieee80211_sta *sta)
+int rt2800_sta_remove(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+                     struct ieee80211_sta *sta)
 {
+       struct rt2x00_dev *rt2x00dev = hw->priv;
        struct rt2800_drv_data *drv_data = rt2x00dev->drv_data;
        struct rt2x00_sta *sta_priv = sta_to_rt2x00_sta(sta);
        int wcid = sta_priv->wcid;
index 275e3969abddb3015e8d19b88d7b479469ee570b..51d9c2a932cc4181efc4ac1248da5133a3c68836 100644 (file)
@@ -208,9 +208,10 @@ int rt2800_config_shared_key(struct rt2x00_dev *rt2x00dev,
 int rt2800_config_pairwise_key(struct rt2x00_dev *rt2x00dev,
                               struct rt2x00lib_crypto *crypto,
                               struct ieee80211_key_conf *key);
-int rt2800_sta_add(struct rt2x00_dev *rt2x00dev, struct ieee80211_vif *vif,
+int rt2800_sta_add(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
                   struct ieee80211_sta *sta);
-int rt2800_sta_remove(struct rt2x00_dev *rt2x00dev, struct ieee80211_sta *sta);
+int rt2800_sta_remove(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+                     struct ieee80211_sta *sta);
 void rt2800_config_filter(struct rt2x00_dev *rt2x00dev,
                          const unsigned int filter_flags);
 void rt2800_config_intf(struct rt2x00_dev *rt2x00dev, struct rt2x00_intf *intf,
index 1123e2bed8036e449035f1ccccd8fbec890840d9..e1a7ed7e4892efb62138268149228ad20cddc876 100644 (file)
@@ -600,6 +600,7 @@ void rt2800mmio_kick_queue(struct data_queue *queue)
        case QID_AC_VI:
        case QID_AC_BE:
        case QID_AC_BK:
+               WARN_ON_ONCE(rt2x00queue_empty(queue));
                entry = rt2x00queue_get_entry(queue, Q_INDEX);
                rt2x00mmio_register_write(rt2x00dev, TX_CTX_IDX(queue->qid),
                                          entry->entry_idx);
index 1172eefd1c1a860d8d4abc59a4532970baf577b9..71b1affc38856be9cb6ef8a584b6244463c1a733 100644 (file)
@@ -311,8 +311,8 @@ static const struct ieee80211_ops rt2800pci_mac80211_ops = {
        .get_stats              = rt2x00mac_get_stats,
        .get_key_seq            = rt2800_get_key_seq,
        .set_rts_threshold      = rt2800_set_rts_threshold,
-       .sta_add                = rt2x00mac_sta_add,
-       .sta_remove             = rt2x00mac_sta_remove,
+       .sta_add                = rt2800_sta_add,
+       .sta_remove             = rt2800_sta_remove,
        .bss_info_changed       = rt2x00mac_bss_info_changed,
        .conf_tx                = rt2800_conf_tx,
        .get_tsf                = rt2800_get_tsf,
@@ -377,8 +377,6 @@ static const struct rt2x00lib_ops rt2800pci_rt2x00_ops = {
        .config_erp             = rt2800_config_erp,
        .config_ant             = rt2800_config_ant,
        .config                 = rt2800_config,
-       .sta_add                = rt2800_sta_add,
-       .sta_remove             = rt2800_sta_remove,
 };
 
 static const struct rt2x00_ops rt2800pci_ops = {
index 6848ebc83534a385a7ecae687bc077a70021d64d..a502816214ab0b53cecf749c42c6c1c2440c8812 100644 (file)
@@ -150,8 +150,8 @@ static const struct ieee80211_ops rt2800soc_mac80211_ops = {
        .get_stats              = rt2x00mac_get_stats,
        .get_key_seq            = rt2800_get_key_seq,
        .set_rts_threshold      = rt2800_set_rts_threshold,
-       .sta_add                = rt2x00mac_sta_add,
-       .sta_remove             = rt2x00mac_sta_remove,
+       .sta_add                = rt2800_sta_add,
+       .sta_remove             = rt2800_sta_remove,
        .bss_info_changed       = rt2x00mac_bss_info_changed,
        .conf_tx                = rt2800_conf_tx,
        .get_tsf                = rt2800_get_tsf,
@@ -216,8 +216,6 @@ static const struct rt2x00lib_ops rt2800soc_rt2x00_ops = {
        .config_erp             = rt2800_config_erp,
        .config_ant             = rt2800_config_ant,
        .config                 = rt2800_config,
-       .sta_add                = rt2800_sta_add,
-       .sta_remove             = rt2800_sta_remove,
 };
 
 static const struct rt2x00_ops rt2800soc_ops = {
index d901a41d36e41b10610e6e5ec8529ec201e2af2c..98a7313fea4aeee3bb49d01d871ab309b60f61f3 100644 (file)
@@ -797,8 +797,8 @@ static const struct ieee80211_ops rt2800usb_mac80211_ops = {
        .get_stats              = rt2x00mac_get_stats,
        .get_key_seq            = rt2800_get_key_seq,
        .set_rts_threshold      = rt2800_set_rts_threshold,
-       .sta_add                = rt2x00mac_sta_add,
-       .sta_remove             = rt2x00mac_sta_remove,
+       .sta_add                = rt2800_sta_add,
+       .sta_remove             = rt2800_sta_remove,
        .bss_info_changed       = rt2x00mac_bss_info_changed,
        .conf_tx                = rt2800_conf_tx,
        .get_tsf                = rt2800_get_tsf,
@@ -858,8 +858,6 @@ static const struct rt2x00lib_ops rt2800usb_rt2x00_ops = {
        .config_erp             = rt2800_config_erp,
        .config_ant             = rt2800_config_ant,
        .config                 = rt2800_config,
-       .sta_add                = rt2800_sta_add,
-       .sta_remove             = rt2800_sta_remove,
 };
 
 static void rt2800usb_queue_init(struct data_queue *queue)
index 1f38c338ca7a11cc610e6ca14673da7caafb1060..a279a4363bc15a2e0f502dd2d0428291df59eba3 100644 (file)
@@ -1457,10 +1457,6 @@ int rt2x00mac_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
 #else
 #define rt2x00mac_set_key      NULL
 #endif /* CONFIG_RT2X00_LIB_CRYPTO */
-int rt2x00mac_sta_add(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
-                     struct ieee80211_sta *sta);
-int rt2x00mac_sta_remove(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
-                        struct ieee80211_sta *sta);
 void rt2x00mac_sw_scan_start(struct ieee80211_hw *hw,
                             struct ieee80211_vif *vif,
                             const u8 *mac_addr);
index a971bc7a6b63963bbacbd257164e3e157f51f7b5..c380c1f56ba6f1a9a8982d4a3543fc2eeb9e8f4e 100644 (file)
@@ -739,8 +739,7 @@ void rt2x00mac_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
                return;
 
        tx_queue_for_each(rt2x00dev, queue)
-               if (!rt2x00queue_empty(queue))
-                       rt2x00queue_flush_queue(queue, drop);
+               rt2x00queue_flush_queue(queue, drop);
 }
 EXPORT_SYMBOL_GPL(rt2x00mac_flush);
 
index a6884e73d2abfb2228de1e83f91e2226c2b0b65c..7c1f8f561d4a909c694a486005df957aa0a4f3f0 100644 (file)
@@ -1000,6 +1000,8 @@ void rt2x00queue_flush_queue(struct data_queue *queue, bool drop)
                (queue->qid == QID_AC_BE) ||
                (queue->qid == QID_AC_BK);
 
+       if (rt2x00queue_empty(queue))
+               return;
 
        /*
         * If we are not supposed to drop any pending
index 8fce371749d34f8c8544e09cd641ee53079a8fe6..f22fec093f1dce6b92656c06925f6e516696d083 100644 (file)
@@ -1783,7 +1783,7 @@ static void btc8192e2ant_tdma_duration_adjust(struct btc_coexist *btcoexist,
                bool scan = false, link = false, roam = false;
 
                RT_TRACE(rtlpriv, COMP_BT_COEXIST, DBG_LOUD,
-                        "[BTCoex], PsTdma type dismatch!!!, ");
+                        "[BTCoex], PsTdma type mismatch!!!, ");
                RT_TRACE(rtlpriv, COMP_BT_COEXIST, DBG_LOUD,
                         "curPsTdma=%d, recordPsTdma=%d\n",
                         coex_dm->cur_ps_tdma, coex_dm->tdma_adj_type);
index 73ec31972944ed08ac8ac369be0dae00d1ecceed..279fe01bb55ea2b8c53d982442f3be4727f894f1 100644 (file)
@@ -2766,7 +2766,7 @@ static void btc8723b2ant_tdma_duration_adjust(struct btc_coexist *btcoexist,
        if (coex_dm->cur_ps_tdma != coex_dm->ps_tdma_du_adj_type) {
                bool scan = false, link = false, roam = false;
                RT_TRACE(rtlpriv, COMP_BT_COEXIST, DBG_LOUD,
-                        "[BTCoex], PsTdma type dismatch!!!, curPsTdma=%d, recordPsTdma=%d\n",
+                        "[BTCoex], PsTdma type mismatch!!!, curPsTdma=%d, recordPsTdma=%d\n",
                         coex_dm->cur_ps_tdma, coex_dm->ps_tdma_du_adj_type);
 
                btcoexist->btc_get(btcoexist, BTC_GET_BL_WIFI_SCAN, &scan);
index 2202d5e18977c028dfbbdcbb6df7e79926f784cb..01a9d303603b560ee0bba0551e4b571352a8e835 100644 (file)
@@ -2614,7 +2614,7 @@ static void btc8821a2ant_tdma_duration_adjust(struct btc_coexist *btcoexist,
                bool scan = false, link = false, roam = false;
 
                RT_TRACE(rtlpriv, COMP_BT_COEXIST, DBG_LOUD,
-                        "[BTCoex], PsTdma type dismatch!!!, cur_ps_tdma = %d, recordPsTdma = %d\n",
+                        "[BTCoex], PsTdma type mismatch!!!, cur_ps_tdma = %d, recordPsTdma = %d\n",
                         coex_dm->cur_ps_tdma, coex_dm->ps_tdma_du_adj_type);
 
                btcoexist->btc_get(btcoexist, BTC_GET_BL_WIFI_SCAN, &scan);
index 8b6b07a936f5a34318dce68158251a4e0e2bc36f..b026e80940a4dc6fa57d790ca9787b59a57d7c06 100644 (file)
@@ -158,16 +158,6 @@ static u8 halbtc_get_wifi_central_chnl(struct btc_coexist *btcoexist)
 
 static u8 rtl_get_hwpg_single_ant_path(struct rtl_priv *rtlpriv)
 {
-       struct rtl_mod_params *mod_params = rtlpriv->cfg->mod_params;
-
-       /* override ant_num / ant_path */
-       if (mod_params->ant_sel) {
-               rtlpriv->btcoexist.btc_info.ant_num =
-                       (mod_params->ant_sel == 1 ? ANT_X2 : ANT_X1);
-
-               rtlpriv->btcoexist.btc_info.single_ant_path =
-                       (mod_params->ant_sel == 1 ? 0 : 1);
-       }
        return rtlpriv->btcoexist.btc_info.single_ant_path;
 }
 
@@ -178,7 +168,6 @@ static u8 rtl_get_hwpg_bt_type(struct rtl_priv *rtlpriv)
 
 static u8 rtl_get_hwpg_ant_num(struct rtl_priv *rtlpriv)
 {
-       struct rtl_mod_params *mod_params = rtlpriv->cfg->mod_params;
        u8 num;
 
        if (rtlpriv->btcoexist.btc_info.ant_num == ANT_X2)
@@ -186,10 +175,6 @@ static u8 rtl_get_hwpg_ant_num(struct rtl_priv *rtlpriv)
        else
                num = 1;
 
-       /* override ant_num / ant_path */
-       if (mod_params->ant_sel)
-               num = (mod_params->ant_sel == 1 ? ANT_X2 : ANT_X1) + 1;
-
        return num;
 }
 
index fd7928fdbd1a9606cd9abafbe255c2ee9f6c4c57..3f2295fdb02d9dcec1afde59fd4100f308abe8a2 100644 (file)
@@ -2574,11 +2574,11 @@ void rtl92ee_read_bt_coexist_info_from_hwpg(struct ieee80211_hw *hw,
                        rtlpriv->btcoexist.btc_info.btcoexist = 0;
 
                rtlpriv->btcoexist.btc_info.bt_type = BT_RTL8192E;
-               rtlpriv->btcoexist.btc_info.ant_num = ANT_TOTAL_X2;
+               rtlpriv->btcoexist.btc_info.ant_num = ANT_X2;
        } else {
                rtlpriv->btcoexist.btc_info.btcoexist = 1;
                rtlpriv->btcoexist.btc_info.bt_type = BT_RTL8192E;
-               rtlpriv->btcoexist.btc_info.ant_num = ANT_TOTAL_X1;
+               rtlpriv->btcoexist.btc_info.ant_num = ANT_X1;
        }
 }
 
index e7bbbc95cdb1f6ef5611c93f4bf97e269624bdc3..b4f3f91b590eae6569a07f55062363c704d3b73b 100644 (file)
@@ -848,6 +848,9 @@ static bool _rtl8723be_init_mac(struct ieee80211_hw *hw)
                return false;
        }
 
+       if (rtlpriv->cfg->ops->get_btc_status())
+               rtlpriv->btcoexist.btc_ops->btc_power_on_setting(rtlpriv);
+
        bytetmp = rtl_read_byte(rtlpriv, REG_MULTI_FUNC_CTRL);
        rtl_write_byte(rtlpriv, REG_MULTI_FUNC_CTRL, bytetmp | BIT(3));
 
@@ -2696,21 +2699,21 @@ void rtl8723be_read_bt_coexist_info_from_hwpg(struct ieee80211_hw *hw,
                rtlpriv->btcoexist.btc_info.bt_type = BT_RTL8723B;
                rtlpriv->btcoexist.btc_info.ant_num = (value & 0x1);
                rtlpriv->btcoexist.btc_info.single_ant_path =
-                        (value & 0x40);        /*0xc3[6]*/
+                        (value & 0x40 ? ANT_AUX : ANT_MAIN);   /*0xc3[6]*/
        } else {
                rtlpriv->btcoexist.btc_info.btcoexist = 0;
                rtlpriv->btcoexist.btc_info.bt_type = BT_RTL8723B;
                rtlpriv->btcoexist.btc_info.ant_num = ANT_X2;
-               rtlpriv->btcoexist.btc_info.single_ant_path = 0;
+               rtlpriv->btcoexist.btc_info.single_ant_path = ANT_MAIN;
        }
 
        /* override ant_num / ant_path */
        if (mod_params->ant_sel) {
                rtlpriv->btcoexist.btc_info.ant_num =
-                       (mod_params->ant_sel == 1 ? ANT_X2 : ANT_X1);
+                       (mod_params->ant_sel == 1 ? ANT_X1 : ANT_X2);
 
                rtlpriv->btcoexist.btc_info.single_ant_path =
-                       (mod_params->ant_sel == 1 ? 0 : 1);
+                       (mod_params->ant_sel == 1 ? ANT_AUX : ANT_MAIN);
        }
 }
 
index d27e33960e77fb32232df67fb706669464bb12e5..208010fcde21d46c5946e3918b0fb3b8ce5d9e70 100644 (file)
@@ -2823,6 +2823,11 @@ enum bt_ant_num {
        ANT_X1 = 1,
 };
 
+enum bt_ant_path {
+       ANT_MAIN = 0,
+       ANT_AUX = 1,
+};
+
 enum bt_co_type {
        BT_2WIRE = 0,
        BT_ISSC_3WIRE = 1,
@@ -2837,11 +2842,6 @@ enum bt_co_type {
        BT_RTL8812A = 11,
 };
 
-enum bt_total_ant_num {
-       ANT_TOTAL_X2 = 0,
-       ANT_TOTAL_X1 = 1
-};
-
 enum bt_cur_state {
        BT_OFF = 0,
        BT_ON = 1,
index d055099dadf17f29c5867b1fe27412f0f83fad9f..c8ba148f8c6cf03054385c2ef5077cec18a26e1c 100644 (file)
@@ -73,6 +73,7 @@ int rsi_coex_recv_pkt(struct rsi_common *common, u8 *msg)
        switch (msg_type) {
        case COMMON_CARD_READY_IND:
                rsi_dbg(INFO_ZONE, "common card ready received\n");
+               common->hibernate_resume = false;
                rsi_handle_card_ready(common, msg);
                break;
        case SLEEP_NOTIFY_IND:
index 1f1b97220d4315073a8d5ec6ea3ea0168e5f0187..3644d7d994638cc20ba25562090802ca81946f76 100644 (file)
@@ -485,7 +485,7 @@ void rsi_core_xmit(struct rsi_common *common, struct sk_buff *skb)
        }
 
        rsi_core_queue_pkt(common, skb);
-       rsi_dbg(DATA_TX_ZONE, "%s: ===> Scheduling TX thead <===\n", __func__);
+       rsi_dbg(DATA_TX_ZONE, "%s: ===> Scheduling TX thread <===\n", __func__);
        rsi_set_event(&common->tx_thread.event);
 
        return;
index 766d874cc6e2acb05a1308d9ca0116a348b846c8..3faa0449a5ef1f4058c341615f6511827793a933 100644 (file)
@@ -911,14 +911,14 @@ static int rsi_hal_key_config(struct ieee80211_hw *hw,
                }
        }
 
-       return rsi_hal_load_key(adapter->priv,
-                               key->key,
-                               key->keylen,
-                               key_type,
-                               key->keyidx,
-                               key->cipher,
-                               sta_id,
-                               vif);
+       status = rsi_hal_load_key(adapter->priv,
+                                 key->key,
+                                 key->keylen,
+                                 key_type,
+                                 key->keyidx,
+                                 key->cipher,
+                                 sta_id,
+                                 vif);
        if (status)
                return status;
 
@@ -1804,15 +1804,21 @@ int rsi_config_wowlan(struct rsi_hw *adapter, struct cfg80211_wowlan *wowlan)
        struct rsi_common *common = adapter->priv;
        u16 triggers = 0;
        u16 rx_filter_word = 0;
-       struct ieee80211_bss_conf *bss = &adapter->vifs[0]->bss_conf;
+       struct ieee80211_bss_conf *bss = NULL;
 
        rsi_dbg(INFO_ZONE, "Config WoWLAN to device\n");
 
+       if (!adapter->vifs[0])
+               return -EINVAL;
+
+       bss = &adapter->vifs[0]->bss_conf;
+
        if (WARN_ON(!wowlan)) {
                rsi_dbg(ERR_ZONE, "WoW triggers not enabled\n");
                return -EINVAL;
        }
 
+       common->wow_flags |= RSI_WOW_ENABLED;
        triggers = rsi_wow_map_triggers(common, wowlan);
        if (!triggers) {
                rsi_dbg(ERR_ZONE, "%s:No valid WoW triggers\n", __func__);
@@ -1835,7 +1841,6 @@ int rsi_config_wowlan(struct rsi_hw *adapter, struct cfg80211_wowlan *wowlan)
 
        rx_filter_word = (ALLOW_DATA_ASSOC_PEER | DISALLOW_BEACONS);
        rsi_send_rx_filter_frame(common, rx_filter_word);
-       common->wow_flags |= RSI_WOW_ENABLED;
 
        return 0;
 }
index b065438f51b24b991cb21225a4801fcb4e49dc68..6ce6b754df12c120d057d4d3c2374f4397893027 100644 (file)
@@ -687,9 +687,10 @@ static int rsi_reset_card(struct rsi_hw *adapter)
         */
        msleep(100);
 
-       if (rsi_usb_master_reg_write(adapter, SWBL_REGOUT,
-                                    RSI_FW_WDT_DISABLE_REQ,
-                                    RSI_COMMON_REG_SIZE) < 0) {
+       ret = rsi_usb_master_reg_write(adapter, SWBL_REGOUT,
+                                      RSI_FW_WDT_DISABLE_REQ,
+                                      RSI_COMMON_REG_SIZE);
+       if (ret < 0) {
                rsi_dbg(ERR_ZONE, "Disabling firmware watchdog timer failed\n");
                goto fail;
        }
index 1f727babbea0e8fc15f1b0709cb6395a237afd1d..6dbe61d47dc35a504cec8045b5626c7d2a5791b8 100644 (file)
@@ -155,17 +155,11 @@ static int wl12xx_sdio_power_on(struct wl12xx_sdio_glue *glue)
        struct mmc_card *card = func->card;
 
        ret = pm_runtime_get_sync(&card->dev);
-       if (ret) {
-               /*
-                * Runtime PM might be temporarily disabled, or the device
-                * might have a positive reference counter. Make sure it is
-                * really powered on.
-                */
-               ret = mmc_power_restore_host(card->host);
-               if (ret < 0) {
-                       pm_runtime_put_sync(&card->dev);
-                       goto out;
-               }
+       if (ret < 0) {
+               pm_runtime_put_noidle(&card->dev);
+               dev_err(glue->dev, "%s: failed to get_sync(%d)\n",
+                       __func__, ret);
+               goto out;
        }
 
        sdio_claim_host(func);
@@ -178,7 +172,6 @@ static int wl12xx_sdio_power_on(struct wl12xx_sdio_glue *glue)
 
 static int wl12xx_sdio_power_off(struct wl12xx_sdio_glue *glue)
 {
-       int ret;
        struct sdio_func *func = dev_to_sdio_func(glue->dev);
        struct mmc_card *card = func->card;
 
@@ -186,16 +179,8 @@ static int wl12xx_sdio_power_off(struct wl12xx_sdio_glue *glue)
        sdio_disable_func(func);
        sdio_release_host(func);
 
-       /* Power off the card manually in case it wasn't powered off above */
-       ret = mmc_power_save_host(card->host);
-       if (ret < 0)
-               goto out;
-
        /* Let runtime PM know the card is powered off */
-       pm_runtime_put_sync(&card->dev);
-
-out:
-       return ret;
+       return pm_runtime_put_sync(&card->dev);
 }
 
 static int wl12xx_sdio_set_power(struct device *child, bool enable)
index 85997184e04734d09538176a2a0798145e1435b6..9d36473dc2a24f2100d4e748cb5540ce14931c0c 100644 (file)
@@ -103,8 +103,7 @@ config NVDIMM_DAX
          Select Y if unsure
 
 config OF_PMEM
-       # FIXME: make tristate once OF_NUMA dependency removed
-       bool "Device-tree support for persistent memory regions"
+       tristate "Device-tree support for persistent memory regions"
        depends on OF
        default LIBNVDIMM
        help
index e00d45522b80e80fb4de68a8c506a3c8e68e6548..8d348b22ba453a58938d9fa921dd890109a4c7de 100644 (file)
@@ -88,9 +88,9 @@ int nvdimm_init_nsarea(struct nvdimm_drvdata *ndd)
 int nvdimm_init_config_data(struct nvdimm_drvdata *ndd)
 {
        struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(ndd->dev);
+       int rc = validate_dimm(ndd), cmd_rc = 0;
        struct nd_cmd_get_config_data_hdr *cmd;
        struct nvdimm_bus_descriptor *nd_desc;
-       int rc = validate_dimm(ndd);
        u32 max_cmd_size, config_size;
        size_t offset;
 
@@ -124,9 +124,11 @@ int nvdimm_init_config_data(struct nvdimm_drvdata *ndd)
                cmd->in_offset = offset;
                rc = nd_desc->ndctl(nd_desc, to_nvdimm(ndd->dev),
                                ND_CMD_GET_CONFIG_DATA, cmd,
-                               cmd->in_length + sizeof(*cmd), NULL);
-               if (rc || cmd->status) {
-                       rc = -ENXIO;
+                               cmd->in_length + sizeof(*cmd), &cmd_rc);
+               if (rc < 0)
+                       break;
+               if (cmd_rc < 0) {
+                       rc = cmd_rc;
                        break;
                }
                memcpy(ndd->data + offset, cmd->out_buf, cmd->in_length);
@@ -140,9 +142,9 @@ int nvdimm_init_config_data(struct nvdimm_drvdata *ndd)
 int nvdimm_set_config_data(struct nvdimm_drvdata *ndd, size_t offset,
                void *buf, size_t len)
 {
-       int rc = validate_dimm(ndd);
        size_t max_cmd_size, buf_offset;
        struct nd_cmd_set_config_hdr *cmd;
+       int rc = validate_dimm(ndd), cmd_rc = 0;
        struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(ndd->dev);
        struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc;
 
@@ -164,7 +166,6 @@ int nvdimm_set_config_data(struct nvdimm_drvdata *ndd, size_t offset,
        for (buf_offset = 0; len; len -= cmd->in_length,
                        buf_offset += cmd->in_length) {
                size_t cmd_size;
-               u32 *status;
 
                cmd->in_offset = offset + buf_offset;
                cmd->in_length = min(max_cmd_size, len);
@@ -172,12 +173,13 @@ int nvdimm_set_config_data(struct nvdimm_drvdata *ndd, size_t offset,
 
                /* status is output in the last 4-bytes of the command buffer */
                cmd_size = sizeof(*cmd) + cmd->in_length + sizeof(u32);
-               status = ((void *) cmd) + cmd_size - sizeof(u32);
 
                rc = nd_desc->ndctl(nd_desc, to_nvdimm(ndd->dev),
-                               ND_CMD_SET_CONFIG_DATA, cmd, cmd_size, NULL);
-               if (rc || *status) {
-                       rc = rc ? rc : -ENXIO;
+                               ND_CMD_SET_CONFIG_DATA, cmd, cmd_size, &cmd_rc);
+               if (rc < 0)
+                       break;
+               if (cmd_rc < 0) {
+                       rc = cmd_rc;
                        break;
                }
        }
index 85013bad35de4e176f82f253723f91600c084ea3..0a701837dfc0b9bd6c011f5ee092feb101a15acb 100644 (file)
@@ -67,7 +67,7 @@ static int of_pmem_region_probe(struct platform_device *pdev)
                 */
                memset(&ndr_desc, 0, sizeof(ndr_desc));
                ndr_desc.attr_groups = region_attr_groups;
-               ndr_desc.numa_node = of_node_to_nid(np);
+               ndr_desc.numa_node = dev_to_node(&pdev->dev);
                ndr_desc.res = &pdev->resource[i];
                ndr_desc.of_node = np;
                set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags);
index b979cf3bce65f48602df440bb0bbfaea00c37bec..88a8b5916624ae6b0805366ee8ed8325a3191a08 100644 (file)
@@ -27,7 +27,7 @@ config NVME_FABRICS
 
 config NVME_RDMA
        tristate "NVM Express over Fabrics RDMA host driver"
-       depends on INFINIBAND && BLOCK
+       depends on INFINIBAND && INFINIBAND_ADDR_TRANS && BLOCK
        select NVME_CORE
        select NVME_FABRICS
        select SG_POOL
index 9df4f71e58caa0f93e8a6175cb48ec8d78f164e0..99b857e5a7a9c7dcde1e031eeab99f3737f026c3 100644 (file)
@@ -99,6 +99,7 @@ static struct class *nvme_subsys_class;
 
 static void nvme_ns_remove(struct nvme_ns *ns);
 static int nvme_revalidate_disk(struct gendisk *disk);
+static void nvme_put_subsystem(struct nvme_subsystem *subsys);
 
 int nvme_reset_ctrl(struct nvme_ctrl *ctrl)
 {
@@ -117,7 +118,8 @@ int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl)
        ret = nvme_reset_ctrl(ctrl);
        if (!ret) {
                flush_work(&ctrl->reset_work);
-               if (ctrl->state != NVME_CTRL_LIVE)
+               if (ctrl->state != NVME_CTRL_LIVE &&
+                   ctrl->state != NVME_CTRL_ADMIN_ONLY)
                        ret = -ENETRESET;
        }
 
@@ -350,6 +352,7 @@ static void nvme_free_ns_head(struct kref *ref)
        ida_simple_remove(&head->subsys->ns_ida, head->instance);
        list_del_init(&head->entry);
        cleanup_srcu_struct(&head->srcu);
+       nvme_put_subsystem(head->subsys);
        kfree(head);
 }
 
@@ -764,6 +767,7 @@ static int nvme_submit_user_cmd(struct request_queue *q,
                                ret = PTR_ERR(meta);
                                goto out_unmap;
                        }
+                       req->cmd_flags |= REQ_INTEGRITY;
                }
        }
 
@@ -2860,6 +2864,9 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl,
                goto out_cleanup_srcu;
 
        list_add_tail(&head->entry, &ctrl->subsys->nsheads);
+
+       kref_get(&ctrl->subsys->ref);
+
        return head;
 out_cleanup_srcu:
        cleanup_srcu_struct(&head->srcu);
@@ -2997,31 +3004,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
        if (nvme_init_ns_head(ns, nsid, id))
                goto out_free_id;
        nvme_setup_streams_ns(ctrl, ns);
-       
-#ifdef CONFIG_NVME_MULTIPATH
-       /*
-        * If multipathing is enabled we need to always use the subsystem
-        * instance number for numbering our devices to avoid conflicts
-        * between subsystems that have multiple controllers and thus use
-        * the multipath-aware subsystem node and those that have a single
-        * controller and use the controller node directly.
-        */
-       if (ns->head->disk) {
-               sprintf(disk_name, "nvme%dc%dn%d", ctrl->subsys->instance,
-                               ctrl->cntlid, ns->head->instance);
-               flags = GENHD_FL_HIDDEN;
-       } else {
-               sprintf(disk_name, "nvme%dn%d", ctrl->subsys->instance,
-                               ns->head->instance);
-       }
-#else
-       /*
-        * But without the multipath code enabled, multiple controller per
-        * subsystems are visible as devices and thus we cannot use the
-        * subsystem instance.
-        */
-       sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->head->instance);
-#endif
+       nvme_set_disk_name(disk_name, ns, ctrl, &flags);
 
        if ((ctrl->quirks & NVME_QUIRK_LIGHTNVM) && id->vs[0] == 0x1) {
                if (nvme_nvm_register(ns, disk_name, node)) {
index 124c458806dfc2c910785c53b56ef295686afeae..7ae732a77fe8fac8818bc3482f6b5ddece95c732 100644 (file)
@@ -668,6 +668,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
                                ret = -ENOMEM;
                                goto out;
                        }
+                       kfree(opts->transport);
                        opts->transport = p;
                        break;
                case NVMF_OPT_NQN:
@@ -676,6 +677,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
                                ret = -ENOMEM;
                                goto out;
                        }
+                       kfree(opts->subsysnqn);
                        opts->subsysnqn = p;
                        nqnlen = strlen(opts->subsysnqn);
                        if (nqnlen >= NVMF_NQN_SIZE) {
@@ -698,6 +700,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
                                ret = -ENOMEM;
                                goto out;
                        }
+                       kfree(opts->traddr);
                        opts->traddr = p;
                        break;
                case NVMF_OPT_TRSVCID:
@@ -706,6 +709,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
                                ret = -ENOMEM;
                                goto out;
                        }
+                       kfree(opts->trsvcid);
                        opts->trsvcid = p;
                        break;
                case NVMF_OPT_QUEUE_SIZE:
@@ -792,6 +796,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
                                ret = -EINVAL;
                                goto out;
                        }
+                       nvmf_host_put(opts->host);
                        opts->host = nvmf_host_add(p);
                        kfree(p);
                        if (!opts->host) {
@@ -817,6 +822,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
                                ret = -ENOMEM;
                                goto out;
                        }
+                       kfree(opts->host_traddr);
                        opts->host_traddr = p;
                        break;
                case NVMF_OPT_HOST_ID:
index 956e0b8e9c4d726b9d8d8d1025f04ca75252cbd2..d7b664ae5923e1217a493d68f1dac96c8a3cce4c 100644 (file)
 #include "nvme.h"
 
 static bool multipath = true;
-module_param(multipath, bool, 0644);
+module_param(multipath, bool, 0444);
 MODULE_PARM_DESC(multipath,
        "turn on native support for multiple controllers per subsystem");
 
+/*
+ * If multipathing is enabled we need to always use the subsystem instance
+ * number for numbering our devices to avoid conflicts between subsystems that
+ * have multiple controllers and thus use the multipath-aware subsystem node
+ * and those that have a single controller and use the controller node
+ * directly.
+ */
+void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns,
+                       struct nvme_ctrl *ctrl, int *flags)
+{
+       if (!multipath) {
+               sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->head->instance);
+       } else if (ns->head->disk) {
+               sprintf(disk_name, "nvme%dc%dn%d", ctrl->subsys->instance,
+                               ctrl->cntlid, ns->head->instance);
+               *flags = GENHD_FL_HIDDEN;
+       } else {
+               sprintf(disk_name, "nvme%dn%d", ctrl->subsys->instance,
+                               ns->head->instance);
+       }
+}
+
 void nvme_failover_req(struct request *req)
 {
        struct nvme_ns *ns = req->q->queuedata;
index 061fecfd44f59046ead7fdc40a9469da38ae47bd..17d2f7cf3fed6fd6a47dc1e5357c40774d73880c 100644 (file)
@@ -84,6 +84,11 @@ enum nvme_quirks {
         * Supports the LighNVM command set if indicated in vs[1].
         */
        NVME_QUIRK_LIGHTNVM                     = (1 << 6),
+
+       /*
+        * Set MEDIUM priority on SQ creation
+        */
+       NVME_QUIRK_MEDIUM_PRIO_SQ               = (1 << 7),
 };
 
 /*
@@ -436,6 +441,8 @@ extern const struct attribute_group nvme_ns_id_attr_group;
 extern const struct block_device_operations nvme_ns_head_ops;
 
 #ifdef CONFIG_NVME_MULTIPATH
+void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns,
+                       struct nvme_ctrl *ctrl, int *flags);
 void nvme_failover_req(struct request *req);
 bool nvme_req_needs_failover(struct request *req, blk_status_t error);
 void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl);
@@ -461,6 +468,16 @@ static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
 }
 
 #else
+/*
+ * Without the multipath code enabled, multiple controller per subsystems are
+ * visible as devices and thus we cannot use the subsystem instance.
+ */
+static inline void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns,
+                                     struct nvme_ctrl *ctrl, int *flags)
+{
+       sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->head->instance);
+}
+
 static inline void nvme_failover_req(struct request *req)
 {
 }
index fbc71fac6f1e077ad21d336c8e8ca3e3bc311cbe..17a0190bd88fb23e9d525a7f97c877d4b8c2dbc1 100644 (file)
@@ -1093,9 +1093,18 @@ static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid,
 static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid,
                                                struct nvme_queue *nvmeq)
 {
+       struct nvme_ctrl *ctrl = &dev->ctrl;
        struct nvme_command c;
        int flags = NVME_QUEUE_PHYS_CONTIG;
 
+       /*
+        * Some drives have a bug that auto-enables WRRU if MEDIUM isn't
+        * set. Since URGENT priority is zeroes, it makes all queues
+        * URGENT.
+        */
+       if (ctrl->quirks & NVME_QUIRK_MEDIUM_PRIO_SQ)
+               flags |= NVME_SQ_PRIO_MEDIUM;
+
        /*
         * Note: we (ab)use the fact that the prp fields survive if no data
         * is attached to the request.
@@ -2701,7 +2710,8 @@ static const struct pci_device_id nvme_id_table[] = {
                .driver_data = NVME_QUIRK_STRIPE_SIZE |
                                NVME_QUIRK_DEALLOCATE_ZEROES, },
        { PCI_VDEVICE(INTEL, 0xf1a5),   /* Intel 600P/P3100 */
-               .driver_data = NVME_QUIRK_NO_DEEPEST_PS },
+               .driver_data = NVME_QUIRK_NO_DEEPEST_PS |
+                               NVME_QUIRK_MEDIUM_PRIO_SQ },
        { PCI_VDEVICE(INTEL, 0x5845),   /* Qemu emulated controller */
                .driver_data = NVME_QUIRK_IDENTIFY_CNS, },
        { PCI_DEVICE(0x1c58, 0x0003),   /* HGST adapter */
index 5f4f8b16685f4ff9225b5f040e67c1a48dd94c14..3c7b61ddb0d186a017196d56378184a9988d525a 100644 (file)
@@ -27,7 +27,7 @@ config NVME_TARGET_LOOP
 
 config NVME_TARGET_RDMA
        tristate "NVMe over Fabrics RDMA target support"
-       depends on INFINIBAND
+       depends on INFINIBAND && INFINIBAND_ADDR_TRANS
        depends on NVME_TARGET
        select SGL_ALLOC
        help
index 31fdfba556a844958ee19933f33f2b5f3ed378f3..27a8561c0cb97596d25b53620493978040082124 100644 (file)
@@ -469,6 +469,12 @@ static void nvme_loop_reset_ctrl_work(struct work_struct *work)
        nvme_stop_ctrl(&ctrl->ctrl);
        nvme_loop_shutdown_ctrl(ctrl);
 
+       if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
+               /* state change failure should never happen */
+               WARN_ON_ONCE(1);
+               return;
+       }
+
        ret = nvme_loop_configure_admin_queue(ctrl);
        if (ret)
                goto out_disable;
index 84aa9d6763753029f45c39483c9b84ca59299409..6da20b9688f7419aae4f806798eaa137c8c4f649 100644 (file)
@@ -942,7 +942,7 @@ int __init early_init_dt_scan_chosen_stdout(void)
        int offset;
        const char *p, *q, *options = NULL;
        int l;
-       const struct earlycon_id *match;
+       const struct earlycon_id **p_match;
        const void *fdt = initial_boot_params;
 
        offset = fdt_path_offset(fdt, "/chosen");
@@ -969,7 +969,10 @@ int __init early_init_dt_scan_chosen_stdout(void)
                return 0;
        }
 
-       for (match = __earlycon_table; match < __earlycon_table_end; match++) {
+       for (p_match = __earlycon_table; p_match < __earlycon_table_end;
+            p_match++) {
+               const struct earlycon_id *match = *p_match;
+
                if (!match->compatible[0])
                        continue;
 
index 8c0c92712fc9ec4f4db2890987d663547b578b8e..d963baf8e53a22b53125a60b8c5c4dba1b1d5a33 100644 (file)
@@ -204,6 +204,9 @@ int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np)
        bool scanphys = false;
        int addr, rc;
 
+       if (!np)
+               return mdiobus_register(mdio);
+
        /* Do not continue if the node is disabled */
        if (!of_device_is_available(np))
                return -ENODEV;
index b35fe88f18514d13eaf9460885f42657bd52ed61..7baa53e5b1d74d469959341945e3bc239cf7d5c7 100644 (file)
@@ -102,12 +102,28 @@ static DEFINE_IDR(ovcs_idr);
 
 static BLOCKING_NOTIFIER_HEAD(overlay_notify_chain);
 
+/**
+ * of_overlay_notifier_register() - Register notifier for overlay operations
+ * @nb:                Notifier block to register
+ *
+ * Register for notification on overlay operations on device tree nodes. The
+ * reported actions definied by @of_reconfig_change. The notifier callback
+ * furthermore receives a pointer to the affected device tree node.
+ *
+ * Note that a notifier callback is not supposed to store pointers to a device
+ * tree node or its content beyond @OF_OVERLAY_POST_REMOVE corresponding to the
+ * respective node it received.
+ */
 int of_overlay_notifier_register(struct notifier_block *nb)
 {
        return blocking_notifier_chain_register(&overlay_notify_chain, nb);
 }
 EXPORT_SYMBOL_GPL(of_overlay_notifier_register);
 
+/**
+ * of_overlay_notifier_register() - Unregister notifier for overlay operations
+ * @nb:                Notifier block to unregister
+ */
 int of_overlay_notifier_unregister(struct notifier_block *nb)
 {
        return blocking_notifier_chain_unregister(&overlay_notify_chain, nb);
@@ -671,17 +687,13 @@ static void free_overlay_changeset(struct overlay_changeset *ovcs)
                of_node_put(ovcs->fragments[i].overlay);
        }
        kfree(ovcs->fragments);
-
        /*
-        * TODO
-        *
-        * would like to: kfree(ovcs->overlay_tree);
-        * but can not since drivers may have pointers into this data
-        *
-        * would like to: kfree(ovcs->fdt);
-        * but can not since drivers may have pointers into this data
+        * There should be no live pointers into ovcs->overlay_tree and
+        * ovcs->fdt due to the policy that overlay notifiers are not allowed
+        * to retain pointers into the overlay devicetree.
         */
-
+       kfree(ovcs->overlay_tree);
+       kfree(ovcs->fdt);
        kfree(ovcs);
 }
 
index acba1f56af3e439647f6c3884a37ab78b233e1b3..297599fcbc3251bf284512dae20727c188befa67 100644 (file)
@@ -1195,7 +1195,7 @@ void * ccio_get_iommu(const struct parisc_device *dev)
  * to/from certain pages.  To avoid this happening, we mark these pages
  * as `used', and ensure that nothing will try to allocate from them.
  */
-void ccio_cujo20_fixup(struct parisc_device *cujo, u32 iovp)
+void __init ccio_cujo20_fixup(struct parisc_device *cujo, u32 iovp)
 {
        unsigned int idx;
        struct parisc_device *dev = parisc_parent(cujo);
@@ -1263,7 +1263,7 @@ static struct parisc_driver ccio_driver __refdata = {
  * I/O Page Directory, the resource map, and initalizing the
  * U2/Uturn chip into virtual mode.
  */
-static void
+static void __init
 ccio_ioc_init(struct ioc *ioc)
 {
        int i;
index a6b88c7f6e3ed1446a1384a049fc73821da50b24..d2970a009eb59a854d91187fcece5c9b1ffda7b9 100644 (file)
@@ -486,7 +486,7 @@ static int kirin_pcie_probe(struct platform_device *pdev)
                return ret;
 
        kirin_pcie->gpio_id_reset = of_get_named_gpio(dev->of_node,
-                                                     "reset-gpio", 0);
+                                                     "reset-gpios", 0);
        if (kirin_pcie->gpio_id_reset < 0)
                return -ENODEV;
 
index b04d37b3c5deea7b2aafdf34f636adb720a978bd..9abf549631b4d4ebed2ef0207225658150c3c151 100644 (file)
@@ -29,6 +29,7 @@
 #define     PCIE_CORE_DEV_CTRL_STATS_MAX_PAYLOAD_SZ_SHIFT      5
 #define     PCIE_CORE_DEV_CTRL_STATS_SNOOP_DISABLE             (0 << 11)
 #define     PCIE_CORE_DEV_CTRL_STATS_MAX_RD_REQ_SIZE_SHIFT     12
+#define     PCIE_CORE_DEV_CTRL_STATS_MAX_RD_REQ_SZ             0x2
 #define PCIE_CORE_LINK_CTRL_STAT_REG                           0xd0
 #define     PCIE_CORE_LINK_L0S_ENTRY                           BIT(0)
 #define     PCIE_CORE_LINK_TRAINING                            BIT(5)
 #define PCIE_ISR1_MASK_REG                     (CONTROL_BASE_ADDR + 0x4C)
 #define     PCIE_ISR1_POWER_STATE_CHANGE       BIT(4)
 #define     PCIE_ISR1_FLUSH                    BIT(5)
-#define     PCIE_ISR1_ALL_MASK                 GENMASK(5, 4)
+#define     PCIE_ISR1_INTX_ASSERT(val)         BIT(8 + (val))
+#define     PCIE_ISR1_ALL_MASK                 GENMASK(11, 4)
 #define PCIE_MSI_ADDR_LOW_REG                  (CONTROL_BASE_ADDR + 0x50)
 #define PCIE_MSI_ADDR_HIGH_REG                 (CONTROL_BASE_ADDR + 0x54)
 #define PCIE_MSI_STATUS_REG                    (CONTROL_BASE_ADDR + 0x58)
 #define PCIE_CONFIG_WR_TYPE0                   0xa
 #define PCIE_CONFIG_WR_TYPE1                   0xb
 
-/* PCI_BDF shifts 8bit, so we need extra 4bit shift */
-#define PCIE_BDF(dev)                          (dev << 4)
 #define PCIE_CONF_BUS(bus)                     (((bus) & 0xff) << 20)
 #define PCIE_CONF_DEV(dev)                     (((dev) & 0x1f) << 15)
 #define PCIE_CONF_FUNC(fun)                    (((fun) & 0x7)  << 12)
@@ -296,7 +296,8 @@ static void advk_pcie_setup_hw(struct advk_pcie *pcie)
        reg = PCIE_CORE_DEV_CTRL_STATS_RELAX_ORDER_DISABLE |
                (7 << PCIE_CORE_DEV_CTRL_STATS_MAX_PAYLOAD_SZ_SHIFT) |
                PCIE_CORE_DEV_CTRL_STATS_SNOOP_DISABLE |
-               PCIE_CORE_DEV_CTRL_STATS_MAX_RD_REQ_SIZE_SHIFT;
+               (PCIE_CORE_DEV_CTRL_STATS_MAX_RD_REQ_SZ <<
+                PCIE_CORE_DEV_CTRL_STATS_MAX_RD_REQ_SIZE_SHIFT);
        advk_writel(pcie, reg, PCIE_CORE_DEV_CTRL_STATS_REG);
 
        /* Program PCIe Control 2 to disable strict ordering */
@@ -437,7 +438,7 @@ static int advk_pcie_rd_conf(struct pci_bus *bus, u32 devfn,
        u32 reg;
        int ret;
 
-       if (PCI_SLOT(devfn) != 0) {
+       if ((bus->number == pcie->root_bus_nr) && PCI_SLOT(devfn) != 0) {
                *val = 0xffffffff;
                return PCIBIOS_DEVICE_NOT_FOUND;
        }
@@ -456,7 +457,7 @@ static int advk_pcie_rd_conf(struct pci_bus *bus, u32 devfn,
        advk_writel(pcie, reg, PIO_CTRL);
 
        /* Program the address registers */
-       reg = PCIE_BDF(devfn) | PCIE_CONF_REG(where);
+       reg = PCIE_CONF_ADDR(bus->number, devfn, where);
        advk_writel(pcie, reg, PIO_ADDR_LS);
        advk_writel(pcie, 0, PIO_ADDR_MS);
 
@@ -491,7 +492,7 @@ static int advk_pcie_wr_conf(struct pci_bus *bus, u32 devfn,
        int offset;
        int ret;
 
-       if (PCI_SLOT(devfn) != 0)
+       if ((bus->number == pcie->root_bus_nr) && PCI_SLOT(devfn) != 0)
                return PCIBIOS_DEVICE_NOT_FOUND;
 
        if (where % size)
@@ -609,9 +610,9 @@ static void advk_pcie_irq_mask(struct irq_data *d)
        irq_hw_number_t hwirq = irqd_to_hwirq(d);
        u32 mask;
 
-       mask = advk_readl(pcie, PCIE_ISR0_MASK_REG);
-       mask |= PCIE_ISR0_INTX_ASSERT(hwirq);
-       advk_writel(pcie, mask, PCIE_ISR0_MASK_REG);
+       mask = advk_readl(pcie, PCIE_ISR1_MASK_REG);
+       mask |= PCIE_ISR1_INTX_ASSERT(hwirq);
+       advk_writel(pcie, mask, PCIE_ISR1_MASK_REG);
 }
 
 static void advk_pcie_irq_unmask(struct irq_data *d)
@@ -620,9 +621,9 @@ static void advk_pcie_irq_unmask(struct irq_data *d)
        irq_hw_number_t hwirq = irqd_to_hwirq(d);
        u32 mask;
 
-       mask = advk_readl(pcie, PCIE_ISR0_MASK_REG);
-       mask &= ~PCIE_ISR0_INTX_ASSERT(hwirq);
-       advk_writel(pcie, mask, PCIE_ISR0_MASK_REG);
+       mask = advk_readl(pcie, PCIE_ISR1_MASK_REG);
+       mask &= ~PCIE_ISR1_INTX_ASSERT(hwirq);
+       advk_writel(pcie, mask, PCIE_ISR1_MASK_REG);
 }
 
 static int advk_pcie_irq_map(struct irq_domain *h,
@@ -765,29 +766,35 @@ static void advk_pcie_handle_msi(struct advk_pcie *pcie)
 
 static void advk_pcie_handle_int(struct advk_pcie *pcie)
 {
-       u32 val, mask, status;
+       u32 isr0_val, isr0_mask, isr0_status;
+       u32 isr1_val, isr1_mask, isr1_status;
        int i, virq;
 
-       val = advk_readl(pcie, PCIE_ISR0_REG);
-       mask = advk_readl(pcie, PCIE_ISR0_MASK_REG);
-       status = val & ((~mask) & PCIE_ISR0_ALL_MASK);
+       isr0_val = advk_readl(pcie, PCIE_ISR0_REG);
+       isr0_mask = advk_readl(pcie, PCIE_ISR0_MASK_REG);
+       isr0_status = isr0_val & ((~isr0_mask) & PCIE_ISR0_ALL_MASK);
+
+       isr1_val = advk_readl(pcie, PCIE_ISR1_REG);
+       isr1_mask = advk_readl(pcie, PCIE_ISR1_MASK_REG);
+       isr1_status = isr1_val & ((~isr1_mask) & PCIE_ISR1_ALL_MASK);
 
-       if (!status) {
-               advk_writel(pcie, val, PCIE_ISR0_REG);
+       if (!isr0_status && !isr1_status) {
+               advk_writel(pcie, isr0_val, PCIE_ISR0_REG);
+               advk_writel(pcie, isr1_val, PCIE_ISR1_REG);
                return;
        }
 
        /* Process MSI interrupts */
-       if (status & PCIE_ISR0_MSI_INT_PENDING)
+       if (isr0_status & PCIE_ISR0_MSI_INT_PENDING)
                advk_pcie_handle_msi(pcie);
 
        /* Process legacy interrupts */
        for (i = 0; i < PCI_NUM_INTX; i++) {
-               if (!(status & PCIE_ISR0_INTX_ASSERT(i)))
+               if (!(isr1_status & PCIE_ISR1_INTX_ASSERT(i)))
                        continue;
 
-               advk_writel(pcie, PCIE_ISR0_INTX_ASSERT(i),
-                           PCIE_ISR0_REG);
+               advk_writel(pcie, PCIE_ISR1_INTX_ASSERT(i),
+                           PCIE_ISR1_REG);
 
                virq = irq_find_mapping(pcie->irq_domain, i);
                generic_handle_irq(virq);
index 6ace47099fc59451422daaf7d38c52f470c9c500..b9a131137e64b86be6660d9d94225e5ff0b8aeb9 100644 (file)
@@ -958,10 +958,11 @@ static int pci_pm_freeze(struct device *dev)
         * devices should not be touched during freeze/thaw transitions,
         * however.
         */
-       if (!dev_pm_test_driver_flags(dev, DPM_FLAG_SMART_SUSPEND))
+       if (!dev_pm_smart_suspend_and_suspended(dev)) {
                pm_runtime_resume(dev);
+               pci_dev->state_saved = false;
+       }
 
-       pci_dev->state_saved = false;
        if (pm->freeze) {
                int error;
 
index e597655a5643b6bc92be2a40ed3fa2b11bd093dd..dbfe7c4f3776890b9a57401dec13cfdf7729e876 100644 (file)
@@ -1910,7 +1910,7 @@ void pci_pme_active(struct pci_dev *dev, bool enable)
 EXPORT_SYMBOL(pci_pme_active);
 
 /**
- * pci_enable_wake - enable PCI device as wakeup event source
+ * __pci_enable_wake - enable PCI device as wakeup event source
  * @dev: PCI device affected
  * @state: PCI state from which device will issue wakeup events
  * @enable: True to enable event generation; false to disable
@@ -1928,7 +1928,7 @@ EXPORT_SYMBOL(pci_pme_active);
  * Error code depending on the platform is returned if both the platform and
  * the native mechanism fail to enable the generation of wake-up events
  */
-int pci_enable_wake(struct pci_dev *dev, pci_power_t state, bool enable)
+static int __pci_enable_wake(struct pci_dev *dev, pci_power_t state, bool enable)
 {
        int ret = 0;
 
@@ -1969,6 +1969,23 @@ int pci_enable_wake(struct pci_dev *dev, pci_power_t state, bool enable)
 
        return ret;
 }
+
+/**
+ * pci_enable_wake - change wakeup settings for a PCI device
+ * @pci_dev: Target device
+ * @state: PCI state from which device will issue wakeup events
+ * @enable: Whether or not to enable event generation
+ *
+ * If @enable is set, check device_may_wakeup() for the device before calling
+ * __pci_enable_wake() for it.
+ */
+int pci_enable_wake(struct pci_dev *pci_dev, pci_power_t state, bool enable)
+{
+       if (enable && !device_may_wakeup(&pci_dev->dev))
+               return -EINVAL;
+
+       return __pci_enable_wake(pci_dev, state, enable);
+}
 EXPORT_SYMBOL(pci_enable_wake);
 
 /**
@@ -1981,9 +1998,9 @@ EXPORT_SYMBOL(pci_enable_wake);
  * should not be called twice in a row to enable wake-up due to PCI PM vs ACPI
  * ordering constraints.
  *
- * This function only returns error code if the device is not capable of
- * generating PME# from both D3_hot and D3_cold, and the platform is unable to
- * enable wake-up power for it.
+ * This function only returns error code if the device is not allowed to wake
+ * up the system from sleep or it is not capable of generating PME# from both
+ * D3_hot and D3_cold and the platform is unable to enable wake-up power for it.
  */
 int pci_wake_from_d3(struct pci_dev *dev, bool enable)
 {
@@ -2114,7 +2131,7 @@ int pci_finish_runtime_suspend(struct pci_dev *dev)
 
        dev->runtime_d3cold = target_state == PCI_D3cold;
 
-       pci_enable_wake(dev, target_state, pci_dev_run_wake(dev));
+       __pci_enable_wake(dev, target_state, pci_dev_run_wake(dev));
 
        error = pci_set_power_state(dev, target_state);
 
@@ -2138,16 +2155,16 @@ bool pci_dev_run_wake(struct pci_dev *dev)
 {
        struct pci_bus *bus = dev->bus;
 
-       if (device_can_wakeup(&dev->dev))
-               return true;
-
        if (!dev->pme_support)
                return false;
 
        /* PME-capable in principle, but not from the target power state */
-       if (!pci_pme_capable(dev, pci_target_state(dev, false)))
+       if (!pci_pme_capable(dev, pci_target_state(dev, true)))
                return false;
 
+       if (device_can_wakeup(&dev->dev))
+               return true;
+
        while (bus->parent) {
                struct pci_dev *bridge = bus->self;
 
@@ -5273,11 +5290,11 @@ void pcie_print_link_status(struct pci_dev *dev)
        bw_avail = pcie_bandwidth_available(dev, &limiting_dev, &speed, &width);
 
        if (bw_avail >= bw_cap)
-               pci_info(dev, "%u.%03u Gb/s available bandwidth (%s x%d link)\n",
+               pci_info(dev, "%u.%03u Gb/s available PCIe bandwidth (%s x%d link)\n",
                         bw_cap / 1000, bw_cap % 1000,
                         PCIE_SPEED2STR(speed_cap), width_cap);
        else
-               pci_info(dev, "%u.%03u Gb/s available bandwidth, limited by %s x%d link at %s (capable of %u.%03u Gb/s with %s x%d link)\n",
+               pci_info(dev, "%u.%03u Gb/s available PCIe bandwidth, limited by %s x%d link at %s (capable of %u.%03u Gb/s with %s x%d link)\n",
                         bw_avail / 1000, bw_avail % 1000,
                         PCIE_SPEED2STR(speed), width,
                         limiting_dev ? pci_name(limiting_dev) : "<unknown>",
index a0d522154cdf93d07fd2d6926d976a52ae0ba8d0..4ef429250d7b2afe05ffd4aa4201e2091d85773f 100644 (file)
@@ -135,19 +135,25 @@ struct mvebu_comhy_conf {
 static const struct mvebu_comhy_conf mvebu_comphy_cp110_modes[] = {
        /* lane 0 */
        MVEBU_COMPHY_CONF(0, 1, PHY_MODE_SGMII, 0x1),
+       MVEBU_COMPHY_CONF(0, 1, PHY_MODE_2500SGMII, 0x1),
        /* lane 1 */
        MVEBU_COMPHY_CONF(1, 2, PHY_MODE_SGMII, 0x1),
+       MVEBU_COMPHY_CONF(1, 2, PHY_MODE_2500SGMII, 0x1),
        /* lane 2 */
        MVEBU_COMPHY_CONF(2, 0, PHY_MODE_SGMII, 0x1),
+       MVEBU_COMPHY_CONF(2, 0, PHY_MODE_2500SGMII, 0x1),
        MVEBU_COMPHY_CONF(2, 0, PHY_MODE_10GKR, 0x1),
        /* lane 3 */
        MVEBU_COMPHY_CONF(3, 1, PHY_MODE_SGMII, 0x2),
+       MVEBU_COMPHY_CONF(3, 1, PHY_MODE_2500SGMII, 0x2),
        /* lane 4 */
        MVEBU_COMPHY_CONF(4, 0, PHY_MODE_SGMII, 0x2),
+       MVEBU_COMPHY_CONF(4, 0, PHY_MODE_2500SGMII, 0x2),
        MVEBU_COMPHY_CONF(4, 0, PHY_MODE_10GKR, 0x2),
        MVEBU_COMPHY_CONF(4, 1, PHY_MODE_SGMII, 0x1),
        /* lane 5 */
        MVEBU_COMPHY_CONF(5, 2, PHY_MODE_SGMII, 0x1),
+       MVEBU_COMPHY_CONF(5, 2, PHY_MODE_2500SGMII, 0x1),
 };
 
 struct mvebu_comphy_priv {
@@ -206,6 +212,10 @@ static void mvebu_comphy_ethernet_init_reset(struct mvebu_comphy_lane *lane,
        if (mode == PHY_MODE_10GKR)
                val |= MVEBU_COMPHY_SERDES_CFG0_GEN_RX(0xe) |
                       MVEBU_COMPHY_SERDES_CFG0_GEN_TX(0xe);
+       else if (mode == PHY_MODE_2500SGMII)
+               val |= MVEBU_COMPHY_SERDES_CFG0_GEN_RX(0x8) |
+                      MVEBU_COMPHY_SERDES_CFG0_GEN_TX(0x8) |
+                      MVEBU_COMPHY_SERDES_CFG0_HALF_BUS;
        else if (mode == PHY_MODE_SGMII)
                val |= MVEBU_COMPHY_SERDES_CFG0_GEN_RX(0x6) |
                       MVEBU_COMPHY_SERDES_CFG0_GEN_TX(0x6) |
@@ -296,13 +306,13 @@ static int mvebu_comphy_init_plls(struct mvebu_comphy_lane *lane,
        return 0;
 }
 
-static int mvebu_comphy_set_mode_sgmii(struct phy *phy)
+static int mvebu_comphy_set_mode_sgmii(struct phy *phy, enum phy_mode mode)
 {
        struct mvebu_comphy_lane *lane = phy_get_drvdata(phy);
        struct mvebu_comphy_priv *priv = lane->priv;
        u32 val;
 
-       mvebu_comphy_ethernet_init_reset(lane, PHY_MODE_SGMII);
+       mvebu_comphy_ethernet_init_reset(lane, mode);
 
        val = readl(priv->base + MVEBU_COMPHY_RX_CTRL1(lane->id));
        val &= ~MVEBU_COMPHY_RX_CTRL1_CLK8T_EN;
@@ -487,7 +497,8 @@ static int mvebu_comphy_power_on(struct phy *phy)
 
        switch (lane->mode) {
        case PHY_MODE_SGMII:
-               ret = mvebu_comphy_set_mode_sgmii(phy);
+       case PHY_MODE_2500SGMII:
+               ret = mvebu_comphy_set_mode_sgmii(phy, lane->mode);
                break;
        case PHY_MODE_10GKR:
                ret = mvebu_comphy_set_mode_10gkr(phy);
index b1ae1618fefea7cd14811b71e05823b70e4c614d..fee9225ca559e6860bf28adc7d746d56fb499c34 100644 (file)
@@ -1622,22 +1622,30 @@ static int chv_gpio_probe(struct chv_pinctrl *pctrl, int irq)
 
        if (!need_valid_mask) {
                irq_base = devm_irq_alloc_descs(pctrl->dev, -1, 0,
-                                               chip->ngpio, NUMA_NO_NODE);
+                                               community->npins, NUMA_NO_NODE);
                if (irq_base < 0) {
                        dev_err(pctrl->dev, "Failed to allocate IRQ numbers\n");
                        return irq_base;
                }
-       } else {
-               irq_base = 0;
        }
 
-       ret = gpiochip_irqchip_add(chip, &chv_gpio_irqchip, irq_base,
+       ret = gpiochip_irqchip_add(chip, &chv_gpio_irqchip, 0,
                                   handle_bad_irq, IRQ_TYPE_NONE);
        if (ret) {
                dev_err(pctrl->dev, "failed to add IRQ chip\n");
                return ret;
        }
 
+       if (!need_valid_mask) {
+               for (i = 0; i < community->ngpio_ranges; i++) {
+                       range = &community->gpio_ranges[i];
+
+                       irq_domain_associate_many(chip->irq.domain, irq_base,
+                                                 range->base, range->npins);
+                       irq_base += range->npins;
+               }
+       }
+
        gpiochip_set_chained_irqchip(chip, &chv_gpio_irqchip, irq,
                                     chv_gpio_irq_handler);
        return 0;
index 8870a4100164616a9bfcc3492468641cb9bfabbd..fee3435a6f15853edb6ee5e2db3ec649e78b554f 100644 (file)
                .npins = ((e) - (s) + 1),               \
        }
 
+#define SPTH_GPP(r, s, e, g)                           \
+       {                                               \
+               .reg_num = (r),                         \
+               .base = (s),                            \
+               .size = ((e) - (s) + 1),                \
+               .gpio_base = (g),                       \
+       }
+
+#define SPTH_COMMUNITY(b, s, e, g)                     \
+       {                                               \
+               .barno = (b),                           \
+               .padown_offset = SPT_PAD_OWN,           \
+               .padcfglock_offset = SPT_PADCFGLOCK,    \
+               .hostown_offset = SPT_HOSTSW_OWN,       \
+               .ie_offset = SPT_GPI_IE,                \
+               .pin_base = (s),                        \
+               .npins = ((e) - (s) + 1),               \
+               .gpps = (g),                            \
+               .ngpps = ARRAY_SIZE(g),                 \
+       }
+
 /* Sunrisepoint-LP */
 static const struct pinctrl_pin_desc sptlp_pins[] = {
        /* GPP_A */
@@ -531,10 +552,28 @@ static const struct intel_function spth_functions[] = {
        FUNCTION("i2c2", spth_i2c2_groups),
 };
 
+static const struct intel_padgroup spth_community0_gpps[] = {
+       SPTH_GPP(0, 0, 23, 0),          /* GPP_A */
+       SPTH_GPP(1, 24, 47, 24),        /* GPP_B */
+};
+
+static const struct intel_padgroup spth_community1_gpps[] = {
+       SPTH_GPP(0, 48, 71, 48),        /* GPP_C */
+       SPTH_GPP(1, 72, 95, 72),        /* GPP_D */
+       SPTH_GPP(2, 96, 108, 96),       /* GPP_E */
+       SPTH_GPP(3, 109, 132, 120),     /* GPP_F */
+       SPTH_GPP(4, 133, 156, 144),     /* GPP_G */
+       SPTH_GPP(5, 157, 180, 168),     /* GPP_H */
+};
+
+static const struct intel_padgroup spth_community3_gpps[] = {
+       SPTH_GPP(0, 181, 191, 192),     /* GPP_I */
+};
+
 static const struct intel_community spth_communities[] = {
-       SPT_COMMUNITY(0, 0, 47),
-       SPT_COMMUNITY(1, 48, 180),
-       SPT_COMMUNITY(2, 181, 191),
+       SPTH_COMMUNITY(0, 0, 47, spth_community0_gpps),
+       SPTH_COMMUNITY(1, 48, 180, spth_community1_gpps),
+       SPTH_COMMUNITY(2, 181, 191, spth_community3_gpps),
 };
 
 static const struct intel_pinctrl_soc_data spth_soc_data = {
index 4b91ff74779bead16a60f96bf2566e947989e179..99a6ceac8e53ca680eb14f681d3f247622d8eda4 100644 (file)
@@ -898,7 +898,7 @@ static struct meson_bank meson_axg_periphs_banks[] = {
 
 static struct meson_bank meson_axg_aobus_banks[] = {
        /*   name    first      last      irq   pullen  pull    dir     out     in  */
-       BANK("AO",   GPIOAO_0,  GPIOAO_9, 0, 13, 0,  16,  0, 0,  0,  0,  0, 16,  1,  0),
+       BANK("AO",   GPIOAO_0,  GPIOAO_13, 0, 13, 0,  16,  0, 0,  0,  0,  0, 16,  1,  0),
 };
 
 static struct meson_pmx_bank meson_axg_periphs_pmx_banks[] = {
index 39d06dd1f63a8c94710df2a716cfa6faf6a0aabb..566644bb496ac82bf3263bb6ea285c7ab1fb070f 100644 (file)
@@ -154,7 +154,7 @@ config DELL_LAPTOP
        depends on ACPI_VIDEO || ACPI_VIDEO = n
        depends on RFKILL || RFKILL = n
        depends on SERIO_I8042
-       select DELL_SMBIOS
+       depends on DELL_SMBIOS
        select POWER_SUPPLY
        select LEDS_CLASS
        select NEW_LEDS
@@ -168,8 +168,8 @@ config DELL_WMI
        depends on DMI
        depends on INPUT
        depends on ACPI_VIDEO || ACPI_VIDEO = n
+       depends on DELL_SMBIOS
        select DELL_WMI_DESCRIPTOR
-       select DELL_SMBIOS
        select INPUT_SPARSEKMAP
        ---help---
          Say Y here if you want to support WMI-based hotkeys on Dell laptops.
index d4aeac3477f55086b69aafa0098ad2f3d617d508..f086469ea740987dec7c9996b982968491fa3140 100644 (file)
@@ -178,8 +178,10 @@ static int asus_wireless_remove(struct acpi_device *adev)
 {
        struct asus_wireless_data *data = acpi_driver_data(adev);
 
-       if (data->wq)
+       if (data->wq) {
+               devm_led_classdev_unregister(&adev->dev, &data->led);
                destroy_workqueue(data->wq);
+       }
        return 0;
 }
 
index b3285175f20f0042d9101f1470cc4afe2b65e7d8..78ccf936d3560bff698a0eb2d0e2be5e9e45051d 100644 (file)
@@ -452,7 +452,6 @@ static int ptp_pch_adjtime(struct ptp_clock_info *ptp, s64 delta)
 static int ptp_pch_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts)
 {
        u64 ns;
-       u32 remainder;
        unsigned long flags;
        struct pch_dev *pch_dev = container_of(ptp, struct pch_dev, caps);
        struct pch_ts_regs __iomem *regs = pch_dev->regs;
@@ -461,8 +460,7 @@ static int ptp_pch_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts)
        ns = pch_systime_read(regs);
        spin_unlock_irqrestore(&pch_dev->register_lock, flags);
 
-       ts->tv_sec = div_u64_rem(ns, 1000000000, &remainder);
-       ts->tv_nsec = remainder;
+       *ts = ns_to_timespec64(ns);
        return 0;
 }
 
@@ -474,8 +472,7 @@ static int ptp_pch_settime(struct ptp_clock_info *ptp,
        struct pch_dev *pch_dev = container_of(ptp, struct pch_dev, caps);
        struct pch_ts_regs __iomem *regs = pch_dev->regs;
 
-       ns = ts->tv_sec * 1000000000ULL;
-       ns += ts->tv_nsec;
+       ns = timespec64_to_ns(ts);
 
        spin_lock_irqsave(&pch_dev->register_lock, flags);
        pch_systime_write(regs, ns);
index 9d27016c899ed29e49bb6395bc8686323ea2cd90..0434ab7b649709cd6fcbf5192ca697b6f0148982 100644 (file)
@@ -740,10 +740,7 @@ static int do_dma_request(struct mport_dma_req *req,
        tx->callback = dma_xfer_callback;
        tx->callback_param = req;
 
-       req->dmach = chan;
-       req->sync = sync;
        req->status = DMA_IN_PROGRESS;
-       init_completion(&req->req_comp);
        kref_get(&req->refcount);
 
        cookie = dmaengine_submit(tx);
@@ -831,13 +828,20 @@ rio_dma_transfer(struct file *filp, u32 transfer_mode,
        if (!req)
                return -ENOMEM;
 
-       kref_init(&req->refcount);
-
        ret = get_dma_channel(priv);
        if (ret) {
                kfree(req);
                return ret;
        }
+       chan = priv->dmach;
+
+       kref_init(&req->refcount);
+       init_completion(&req->req_comp);
+       req->dir = dir;
+       req->filp = filp;
+       req->priv = priv;
+       req->dmach = chan;
+       req->sync = sync;
 
        /*
         * If parameter loc_addr != NULL, we are transferring data from/to
@@ -925,11 +929,6 @@ rio_dma_transfer(struct file *filp, u32 transfer_mode,
                                xfer->offset, xfer->length);
        }
 
-       req->dir = dir;
-       req->filp = filp;
-       req->priv = priv;
-       chan = priv->dmach;
-
        nents = dma_map_sg(chan->device->dev,
                           req->sgt.sgl, req->sgt.nents, dir);
        if (nents == 0) {
index 8e70a627e0bbc70ba1040a51c5cc71a93b0ddaab..cbbafdcaaecb7576da27039233c72c47a7bcf5ae 100644 (file)
@@ -1083,6 +1083,7 @@ static int q6v5_alloc_memory_region(struct q6v5 *qproc)
                dev_err(qproc->dev, "unable to resolve mba region\n");
                return ret;
        }
+       of_node_put(node);
 
        qproc->mba_phys = r.start;
        qproc->mba_size = resource_size(&r);
@@ -1100,6 +1101,7 @@ static int q6v5_alloc_memory_region(struct q6v5 *qproc)
                dev_err(qproc->dev, "unable to resolve mpss region\n");
                return ret;
        }
+       of_node_put(node);
 
        qproc->mpss_phys = qproc->mpss_reloc = r.start;
        qproc->mpss_size = resource_size(&r);
index 6d9c5832ce47d3052568f10794652686237c00e8..a9609d971f7f36f2ebb5e5e9c17d53ae04a269b0 100644 (file)
@@ -1163,7 +1163,7 @@ int rproc_trigger_recovery(struct rproc *rproc)
        if (ret)
                return ret;
 
-       ret = rproc_stop(rproc, false);
+       ret = rproc_stop(rproc, true);
        if (ret)
                goto unlock_mutex;
 
@@ -1316,7 +1316,7 @@ void rproc_shutdown(struct rproc *rproc)
        if (!atomic_dec_and_test(&rproc->power))
                goto out;
 
-       ret = rproc_stop(rproc, true);
+       ret = rproc_stop(rproc, false);
        if (ret) {
                atomic_inc(&rproc->power);
                goto out;
index 360e06b20c5345528aaf089cf358f761f57a2b9e..ac18f2f27881091d529b746c96c6dff2ab3f99d3 100644 (file)
@@ -110,7 +110,7 @@ static const struct uniphier_reset_data uniphier_ld20_sys_reset_data[] = {
        UNIPHIER_RESETX(4, 0x200c, 2),          /* eMMC */
        UNIPHIER_RESETX(6, 0x200c, 6),          /* Ether */
        UNIPHIER_RESETX(8, 0x200c, 8),          /* STDMAC (HSC) */
-       UNIPHIER_RESETX(12, 0x200c, 5),         /* GIO (PCIe, USB3) */
+       UNIPHIER_RESETX(14, 0x200c, 5),         /* USB30 */
        UNIPHIER_RESETX(16, 0x200c, 12),        /* USB30-PHY0 */
        UNIPHIER_RESETX(17, 0x200c, 13),        /* USB30-PHY1 */
        UNIPHIER_RESETX(18, 0x200c, 14),        /* USB30-PHY2 */
@@ -127,8 +127,8 @@ static const struct uniphier_reset_data uniphier_pxs3_sys_reset_data[] = {
        UNIPHIER_RESETX(6, 0x200c, 9),          /* Ether0 */
        UNIPHIER_RESETX(7, 0x200c, 10),         /* Ether1 */
        UNIPHIER_RESETX(8, 0x200c, 12),         /* STDMAC */
-       UNIPHIER_RESETX(12, 0x200c, 4),         /* USB30 link (GIO0) */
-       UNIPHIER_RESETX(13, 0x200c, 5),         /* USB31 link (GIO1) */
+       UNIPHIER_RESETX(12, 0x200c, 4),         /* USB30 link */
+       UNIPHIER_RESETX(13, 0x200c, 5),         /* USB31 link */
        UNIPHIER_RESETX(16, 0x200c, 16),        /* USB30-PHY0 */
        UNIPHIER_RESETX(17, 0x200c, 18),        /* USB30-PHY1 */
        UNIPHIER_RESETX(18, 0x200c, 20),        /* USB30-PHY2 */
index 64b6de9763ee2f92e7e615915ffcc16afeacb6c5..1efdf9ff8679902af6f922eda1a15f883babba3c 100644 (file)
@@ -581,4 +581,6 @@ static void rpmsg_chrdev_exit(void)
        unregister_chrdev_region(rpmsg_major, RPMSG_DEV_MAX);
 }
 module_exit(rpmsg_chrdev_exit);
+
+MODULE_ALIAS("rpmsg:rpmsg_chrdev");
 MODULE_LICENSE("GPL v2");
index 304e891e35fcb060a8ff26f78122fa3c63a1eb87..60f2250fd96bec2cec1dd33f652edaac474d2182 100644 (file)
@@ -57,7 +57,7 @@ static void tm_to_opal(struct rtc_time *tm, u32 *y_m_d, u64 *h_m_s_ms)
 
 static int opal_get_rtc_time(struct device *dev, struct rtc_time *tm)
 {
-       long rc = OPAL_BUSY;
+       s64 rc = OPAL_BUSY;
        int retries = 10;
        u32 y_m_d;
        u64 h_m_s_ms;
@@ -66,13 +66,17 @@ static int opal_get_rtc_time(struct device *dev, struct rtc_time *tm)
 
        while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
                rc = opal_rtc_read(&__y_m_d, &__h_m_s_ms);
-               if (rc == OPAL_BUSY_EVENT)
+               if (rc == OPAL_BUSY_EVENT) {
+                       msleep(OPAL_BUSY_DELAY_MS);
                        opal_poll_events(NULL);
-               else if (retries-- && (rc == OPAL_HARDWARE
-                                      || rc == OPAL_INTERNAL_ERROR))
-                       msleep(10);
-               else if (rc != OPAL_BUSY && rc != OPAL_BUSY_EVENT)
-                       break;
+               } else if (rc == OPAL_BUSY) {
+                       msleep(OPAL_BUSY_DELAY_MS);
+               } else if (rc == OPAL_HARDWARE || rc == OPAL_INTERNAL_ERROR) {
+                       if (retries--) {
+                               msleep(10); /* Wait 10ms before retry */
+                               rc = OPAL_BUSY; /* go around again */
+                       }
+               }
        }
 
        if (rc != OPAL_SUCCESS)
@@ -87,21 +91,26 @@ static int opal_get_rtc_time(struct device *dev, struct rtc_time *tm)
 
 static int opal_set_rtc_time(struct device *dev, struct rtc_time *tm)
 {
-       long rc = OPAL_BUSY;
+       s64 rc = OPAL_BUSY;
        int retries = 10;
        u32 y_m_d = 0;
        u64 h_m_s_ms = 0;
 
        tm_to_opal(tm, &y_m_d, &h_m_s_ms);
+
        while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
                rc = opal_rtc_write(y_m_d, h_m_s_ms);
-               if (rc == OPAL_BUSY_EVENT)
+               if (rc == OPAL_BUSY_EVENT) {
+                       msleep(OPAL_BUSY_DELAY_MS);
                        opal_poll_events(NULL);
-               else if (retries-- && (rc == OPAL_HARDWARE
-                                      || rc == OPAL_INTERNAL_ERROR))
-                       msleep(10);
-               else if (rc != OPAL_BUSY && rc != OPAL_BUSY_EVENT)
-                       break;
+               } else if (rc == OPAL_BUSY) {
+                       msleep(OPAL_BUSY_DELAY_MS);
+               } else if (rc == OPAL_HARDWARE || rc == OPAL_INTERNAL_ERROR) {
+                       if (retries--) {
+                               msleep(10); /* Wait 10ms before retry */
+                               rc = OPAL_BUSY; /* go around again */
+                       }
+               }
        }
 
        return rc == OPAL_SUCCESS ? 0 : -EIO;
index 62f5f04d8f615e393c88824606f58d13801c5e6b..5e963fe0e38d4c2125c43ae801ca7e9b28d98d07 100644 (file)
@@ -592,13 +592,22 @@ static int _schedule_lcu_update(struct alias_lcu *lcu,
 int dasd_alias_add_device(struct dasd_device *device)
 {
        struct dasd_eckd_private *private = device->private;
-       struct alias_lcu *lcu;
+       __u8 uaddr = private->uid.real_unit_addr;
+       struct alias_lcu *lcu = private->lcu;
        unsigned long flags;
        int rc;
 
-       lcu = private->lcu;
        rc = 0;
        spin_lock_irqsave(&lcu->lock, flags);
+       /*
+        * Check if device and lcu type differ. If so, the uac data may be
+        * outdated and needs to be updated.
+        */
+       if (private->uid.type !=  lcu->uac->unit[uaddr].ua_type) {
+               lcu->flags |= UPDATE_PENDING;
+               DBF_DEV_EVENT(DBF_WARNING, device, "%s",
+                             "uid type mismatch - trigger rescan");
+       }
        if (!(lcu->flags & UPDATE_PENDING)) {
                rc = _add_device_to_lcu(lcu, device, device);
                if (rc)
index 6652a49a49b17210228fcdd674faa6e81baffd06..9029804dcd225b85d5609c8af6c6ebc95522f12e 100644 (file)
@@ -452,6 +452,7 @@ static void chsc_process_sei_link_incident(struct chsc_sei_nt0_area *sei_area)
 
 static void chsc_process_sei_res_acc(struct chsc_sei_nt0_area *sei_area)
 {
+       struct channel_path *chp;
        struct chp_link link;
        struct chp_id chpid;
        int status;
@@ -464,10 +465,17 @@ static void chsc_process_sei_res_acc(struct chsc_sei_nt0_area *sei_area)
        chpid.id = sei_area->rsid;
        /* allocate a new channel path structure, if needed */
        status = chp_get_status(chpid);
-       if (status < 0)
-               chp_new(chpid);
-       else if (!status)
+       if (!status)
                return;
+
+       if (status < 0) {
+               chp_new(chpid);
+       } else {
+               chp = chpid_to_chp(chpid);
+               mutex_lock(&chp->lock);
+               chp_update_desc(chp);
+               mutex_unlock(&chp->lock);
+       }
        memset(&link, 0, sizeof(struct chp_link));
        link.chpid = chpid;
        if ((sei_area->vf & 0xc0) != 0) {
index 439991d71b146698c90540c5633cfdd0fbe1bc16..4c14ce428e92d8927fedb8702d3d9dd26801ddad 100644 (file)
@@ -141,7 +141,7 @@ static int __qdio_allocate_qs(struct qdio_q **irq_ptr_qs, int nr_queues)
        int i;
 
        for (i = 0; i < nr_queues; i++) {
-               q = kmem_cache_alloc(qdio_q_cache, GFP_KERNEL);
+               q = kmem_cache_zalloc(qdio_q_cache, GFP_KERNEL);
                if (!q)
                        return -ENOMEM;
 
@@ -456,7 +456,6 @@ int qdio_setup_irq(struct qdio_initialize *init_data)
 {
        struct ciw *ciw;
        struct qdio_irq *irq_ptr = init_data->cdev->private->qdio_data;
-       int rc;
 
        memset(&irq_ptr->qib, 0, sizeof(irq_ptr->qib));
        memset(&irq_ptr->siga_flag, 0, sizeof(irq_ptr->siga_flag));
@@ -493,16 +492,14 @@ int qdio_setup_irq(struct qdio_initialize *init_data)
        ciw = ccw_device_get_ciw(init_data->cdev, CIW_TYPE_EQUEUE);
        if (!ciw) {
                DBF_ERROR("%4x NO EQ", irq_ptr->schid.sch_no);
-               rc = -EINVAL;
-               goto out_err;
+               return -EINVAL;
        }
        irq_ptr->equeue = *ciw;
 
        ciw = ccw_device_get_ciw(init_data->cdev, CIW_TYPE_AQUEUE);
        if (!ciw) {
                DBF_ERROR("%4x NO AQ", irq_ptr->schid.sch_no);
-               rc = -EINVAL;
-               goto out_err;
+               return -EINVAL;
        }
        irq_ptr->aqueue = *ciw;
 
@@ -512,9 +509,6 @@ int qdio_setup_irq(struct qdio_initialize *init_data)
        init_data->cdev->handler = qdio_int_handler;
        spin_unlock_irq(get_ccwdev_lock(irq_ptr->cdev));
        return 0;
-out_err:
-       qdio_release_memory(irq_ptr);
-       return rc;
 }
 
 void qdio_print_subchannel_info(struct qdio_irq *irq_ptr,
index 2c7550797ec2f51130c6e580a7b10d2bfc1155c5..dce92b2a895d6ff3bbe38104ed08ea32c7979432 100644 (file)
@@ -715,6 +715,10 @@ void cp_free(struct channel_program *cp)
  * and stores the result to ccwchain list. @cp must have been
  * initialized by a previous call with cp_init(). Otherwise, undefined
  * behavior occurs.
+ * For each chain composing the channel program:
+ * - On entry ch_len holds the count of CCWs to be translated.
+ * - On exit ch_len is adjusted to the count of successfully translated CCWs.
+ * This allows cp_free to find in ch_len the count of CCWs to free in a chain.
  *
  * The S/390 CCW Translation APIS (prefixed by 'cp_') are introduced
  * as helpers to do ccw chain translation inside the kernel. Basically
@@ -749,11 +753,18 @@ int cp_prefetch(struct channel_program *cp)
                for (idx = 0; idx < len; idx++) {
                        ret = ccwchain_fetch_one(chain, idx, cp);
                        if (ret)
-                               return ret;
+                               goto out_err;
                }
        }
 
        return 0;
+out_err:
+       /* Only cleanup the chain elements that were actually translated. */
+       chain->ch_len = idx;
+       list_for_each_entry_continue(chain, &cp->ccwchain_list, next) {
+               chain->ch_len = 0;
+       }
+       return ret;
 }
 
 /**
index ff6963ad6e393f658feb887f6e8ce5f4eeac62cd..3c800642134e4330d62bb8c0053df62618840ff3 100644 (file)
@@ -20,12 +20,12 @@ static int fsm_io_helper(struct vfio_ccw_private *private)
        int ccode;
        __u8 lpm;
        unsigned long flags;
+       int ret;
 
        sch = private->sch;
 
        spin_lock_irqsave(sch->lock, flags);
        private->state = VFIO_CCW_STATE_BUSY;
-       spin_unlock_irqrestore(sch->lock, flags);
 
        orb = cp_get_orb(&private->cp, (u32)(addr_t)sch, sch->lpm);
 
@@ -38,10 +38,12 @@ static int fsm_io_helper(struct vfio_ccw_private *private)
                 * Initialize device status information
                 */
                sch->schib.scsw.cmd.actl |= SCSW_ACTL_START_PEND;
-               return 0;
+               ret = 0;
+               break;
        case 1:         /* Status pending */
        case 2:         /* Busy */
-               return -EBUSY;
+               ret = -EBUSY;
+               break;
        case 3:         /* Device/path not operational */
        {
                lpm = orb->cmd.lpm;
@@ -51,13 +53,16 @@ static int fsm_io_helper(struct vfio_ccw_private *private)
                        sch->lpm = 0;
 
                if (cio_update_schib(sch))
-                       return -ENODEV;
-
-               return sch->lpm ? -EACCES : -ENODEV;
+                       ret = -ENODEV;
+               else
+                       ret = sch->lpm ? -EACCES : -ENODEV;
+               break;
        }
        default:
-               return ccode;
+               ret = ccode;
        }
+       spin_unlock_irqrestore(sch->lock, flags);
+       return ret;
 }
 
 static void fsm_notoper(struct vfio_ccw_private *private,
index 0ee8f33efb544877c37af645d348918078d88957..2d9fe7e4ee400d8df5d7837f46d4c4569d1780ed 100644 (file)
@@ -1928,6 +1928,8 @@ lcs_portno_store (struct device *dev, struct device_attribute *attr, const char
                return -EINVAL;
         /* TODO: sanity checks */
         card->portno = value;
+       if (card->dev)
+               card->dev->dev_port = card->portno;
 
         return count;
 
@@ -2158,6 +2160,7 @@ lcs_new_device(struct ccwgroup_device *ccwgdev)
        card->dev = dev;
        card->dev->ml_priv = card;
        card->dev->netdev_ops = &lcs_netdev_ops;
+       card->dev->dev_port = card->portno;
        memcpy(card->dev->dev_addr, card->mac, LCS_MAC_LENGTH);
 #ifdef CONFIG_IP_MULTICAST
        if (!lcs_check_multicast_support(card))
index 4326715dc13eb18af4c69cc233f279ab264fae36..2a5fec55bf60f6f30fd684e1c8c5c9a594aa8e75 100644 (file)
@@ -148,6 +148,7 @@ struct qeth_perf_stats {
        unsigned int tx_csum;
        unsigned int tx_lin;
        unsigned int tx_linfail;
+       unsigned int rx_csum;
 };
 
 /* Routing stuff */
@@ -557,7 +558,6 @@ enum qeth_prot_versions {
 enum qeth_cmd_buffer_state {
        BUF_STATE_FREE,
        BUF_STATE_LOCKED,
-       BUF_STATE_PROCESSED,
 };
 
 enum qeth_cq {
@@ -601,7 +601,6 @@ struct qeth_channel {
        struct qeth_cmd_buffer iob[QETH_CMD_BUFFER_NO];
        atomic_t irq_pending;
        int io_buf_no;
-       int buf_no;
 };
 
 /**
@@ -714,9 +713,6 @@ enum qeth_discipline_id {
 
 struct qeth_discipline {
        const struct device_type *devtype;
-       void (*start_poll)(struct ccw_device *, int, unsigned long);
-       qdio_handler_t *input_handler;
-       qdio_handler_t *output_handler;
        int (*process_rx_buffer)(struct qeth_card *card, int budget, int *done);
        int (*recover)(void *ptr);
        int (*setup) (struct ccwgroup_device *);
@@ -782,9 +778,9 @@ struct qeth_card {
        struct qeth_card_options options;
 
        wait_queue_head_t wait_q;
-       spinlock_t vlanlock;
        spinlock_t mclock;
        unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
+       struct mutex vid_list_mutex;            /* vid_list */
        struct list_head vid_list;
        DECLARE_HASHTABLE(mac_htable, 4);
        DECLARE_HASHTABLE(ip_htable, 4);
@@ -869,6 +865,32 @@ static inline int qeth_get_ip_version(struct sk_buff *skb)
        }
 }
 
+static inline void qeth_rx_csum(struct qeth_card *card, struct sk_buff *skb,
+                               u8 flags)
+{
+       if ((card->dev->features & NETIF_F_RXCSUM) &&
+           (flags & QETH_HDR_EXT_CSUM_TRANSP_REQ)) {
+               skb->ip_summed = CHECKSUM_UNNECESSARY;
+               if (card->options.performance_stats)
+                       card->perf_stats.rx_csum++;
+       } else {
+               skb->ip_summed = CHECKSUM_NONE;
+       }
+}
+
+static inline void qeth_tx_csum(struct sk_buff *skb, u8 *flags, int ipv)
+{
+       *flags |= QETH_HDR_EXT_CSUM_TRANSP_REQ;
+       if ((ipv == 4 && ip_hdr(skb)->protocol == IPPROTO_UDP) ||
+           (ipv == 6 && ipv6_hdr(skb)->nexthdr == IPPROTO_UDP))
+               *flags |= QETH_HDR_EXT_UDP;
+       if (ipv == 4) {
+               /* some HW requires combined L3+L4 csum offload: */
+               *flags |= QETH_HDR_EXT_CSUM_HDR_REQ;
+               ip_hdr(skb)->check = 0;
+       }
+}
+
 static inline void qeth_put_buffer_pool_entry(struct qeth_card *card,
                struct qeth_buffer_pool_entry *entry)
 {
@@ -881,6 +903,27 @@ static inline int qeth_is_diagass_supported(struct qeth_card *card,
        return card->info.diagass_support & (__u32)cmd;
 }
 
+int qeth_send_simple_setassparms_prot(struct qeth_card *card,
+                                     enum qeth_ipa_funcs ipa_func,
+                                     u16 cmd_code, long data,
+                                     enum qeth_prot_versions prot);
+/* IPv4 variant */
+static inline int qeth_send_simple_setassparms(struct qeth_card *card,
+                                              enum qeth_ipa_funcs ipa_func,
+                                              u16 cmd_code, long data)
+{
+       return qeth_send_simple_setassparms_prot(card, ipa_func, cmd_code,
+                                                data, QETH_PROT_IPV4);
+}
+
+static inline int qeth_send_simple_setassparms_v6(struct qeth_card *card,
+                                                 enum qeth_ipa_funcs ipa_func,
+                                                 u16 cmd_code, long data)
+{
+       return qeth_send_simple_setassparms_prot(card, ipa_func, cmd_code,
+                                                data, QETH_PROT_IPV6);
+}
+
 extern struct qeth_discipline qeth_l2_discipline;
 extern struct qeth_discipline qeth_l3_discipline;
 extern const struct attribute_group *qeth_generic_attr_groups[];
@@ -923,13 +966,7 @@ struct sk_buff *qeth_core_get_next_skb(struct qeth_card *,
                struct qeth_qdio_buffer *, struct qdio_buffer_element **, int *,
                struct qeth_hdr **);
 void qeth_schedule_recovery(struct qeth_card *);
-void qeth_qdio_start_poll(struct ccw_device *, int, unsigned long);
 int qeth_poll(struct napi_struct *napi, int budget);
-void qeth_qdio_input_handler(struct ccw_device *,
-               unsigned int, unsigned int, int,
-               int, unsigned long);
-void qeth_qdio_output_handler(struct ccw_device *, unsigned int,
-                       int, int, int, unsigned long);
 void qeth_clear_ipacmd_list(struct qeth_card *);
 int qeth_qdio_clear_card(struct qeth_card *, int);
 void qeth_clear_working_pool_list(struct qeth_card *);
@@ -981,8 +1018,6 @@ int qeth_hw_trap(struct qeth_card *, enum qeth_diags_trap_action);
 int qeth_query_ipassists(struct qeth_card *, enum qeth_prot_versions prot);
 void qeth_trace_features(struct qeth_card *);
 void qeth_close_dev(struct qeth_card *);
-int qeth_send_simple_setassparms(struct qeth_card *, enum qeth_ipa_funcs,
-                                __u16, long);
 int qeth_send_setassparms(struct qeth_card *, struct qeth_cmd_buffer *, __u16,
                          long,
                          int (*reply_cb)(struct qeth_card *,
index 04fefa5bb08d168b7beefeeef62b1190aab84232..06415b6a8f68c9a8d9972a6fec9749cbac532c88 100644 (file)
@@ -706,7 +706,6 @@ void qeth_clear_ipacmd_list(struct qeth_card *card)
                qeth_put_reply(reply);
        }
        spin_unlock_irqrestore(&card->lock, flags);
-       atomic_set(&card->write.irq_pending, 0);
 }
 EXPORT_SYMBOL_GPL(qeth_clear_ipacmd_list);
 
@@ -818,7 +817,6 @@ void qeth_clear_cmd_buffers(struct qeth_channel *channel)
 
        for (cnt = 0; cnt < QETH_CMD_BUFFER_NO; cnt++)
                qeth_release_buffer(channel, &channel->iob[cnt]);
-       channel->buf_no = 0;
        channel->io_buf_no = 0;
 }
 EXPORT_SYMBOL_GPL(qeth_clear_cmd_buffers);
@@ -924,7 +922,6 @@ static int qeth_setup_channel(struct qeth_channel *channel)
                        kfree(channel->iob[cnt].data);
                return -ENOMEM;
        }
-       channel->buf_no = 0;
        channel->io_buf_no = 0;
        atomic_set(&channel->irq_pending, 0);
        spin_lock_init(&channel->iob_lock);
@@ -1100,16 +1097,9 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm,
 {
        int rc;
        int cstat, dstat;
-       struct qeth_cmd_buffer *buffer;
+       struct qeth_cmd_buffer *iob = NULL;
        struct qeth_channel *channel;
        struct qeth_card *card;
-       struct qeth_cmd_buffer *iob;
-       __u8 index;
-
-       if (__qeth_check_irb_error(cdev, intparm, irb))
-               return;
-       cstat = irb->scsw.cmd.cstat;
-       dstat = irb->scsw.cmd.dstat;
 
        card = CARD_FROM_CDEV(cdev);
        if (!card)
@@ -1127,6 +1117,19 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm,
                channel = &card->data;
                QETH_CARD_TEXT(card, 5, "data");
        }
+
+       if (qeth_intparm_is_iob(intparm))
+               iob = (struct qeth_cmd_buffer *) __va((addr_t)intparm);
+
+       if (__qeth_check_irb_error(cdev, intparm, irb)) {
+               /* IO was terminated, free its resources. */
+               if (iob)
+                       qeth_release_buffer(iob->channel, iob);
+               atomic_set(&channel->irq_pending, 0);
+               wake_up(&card->wait_q);
+               return;
+       }
+
        atomic_set(&channel->irq_pending, 0);
 
        if (irb->scsw.cmd.fctl & (SCSW_FCTL_CLEAR_FUNC))
@@ -1150,6 +1153,10 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm,
                /* we don't have to handle this further */
                intparm = 0;
        }
+
+       cstat = irb->scsw.cmd.cstat;
+       dstat = irb->scsw.cmd.dstat;
+
        if ((dstat & DEV_STAT_UNIT_EXCEP) ||
            (dstat & DEV_STAT_UNIT_CHECK) ||
            (cstat)) {
@@ -1182,25 +1189,15 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm,
                channel->state = CH_STATE_RCD_DONE;
                goto out;
        }
-       if (intparm) {
-               buffer = (struct qeth_cmd_buffer *) __va((addr_t)intparm);
-               buffer->state = BUF_STATE_PROCESSED;
-       }
        if (channel == &card->data)
                return;
        if (channel == &card->read &&
            channel->state == CH_STATE_UP)
                __qeth_issue_next_read(card);
 
-       iob = channel->iob;
-       index = channel->buf_no;
-       while (iob[index].state == BUF_STATE_PROCESSED) {
-               if (iob[index].callback != NULL)
-                       iob[index].callback(channel, iob + index);
+       if (iob && iob->callback)
+               iob->callback(iob->channel, iob);
 
-               index = (index + 1) % QETH_CMD_BUFFER_NO;
-       }
-       channel->buf_no = index;
 out:
        wake_up(&card->wait_q);
        return;
@@ -1470,13 +1467,13 @@ static int qeth_setup_card(struct qeth_card *card)
        card->lan_online = 0;
        card->read_or_write_problem = 0;
        card->dev = NULL;
-       spin_lock_init(&card->vlanlock);
        spin_lock_init(&card->mclock);
        spin_lock_init(&card->lock);
        spin_lock_init(&card->ip_lock);
        spin_lock_init(&card->thread_mask_lock);
        mutex_init(&card->conf_mutex);
        mutex_init(&card->discipline_mutex);
+       mutex_init(&card->vid_list_mutex);
        card->thread_start_mask = 0;
        card->thread_allowed_mask = 0;
        card->thread_running_mask = 0;
@@ -1870,8 +1867,8 @@ static int qeth_idx_activate_get_answer(struct qeth_channel *channel,
                   atomic_cmpxchg(&channel->irq_pending, 0, 1) == 0);
        QETH_DBF_TEXT(SETUP, 6, "noirqpnd");
        spin_lock_irqsave(get_ccwdev_lock(channel->ccwdev), flags);
-       rc = ccw_device_start(channel->ccwdev,
-                             &channel->ccw, (addr_t) iob, 0, 0);
+       rc = ccw_device_start_timeout(channel->ccwdev, &channel->ccw,
+                                     (addr_t) iob, 0, 0, QETH_TIMEOUT);
        spin_unlock_irqrestore(get_ccwdev_lock(channel->ccwdev), flags);
 
        if (rc) {
@@ -1888,7 +1885,6 @@ static int qeth_idx_activate_get_answer(struct qeth_channel *channel,
        if (channel->state != CH_STATE_UP) {
                rc = -ETIME;
                QETH_DBF_TEXT_(SETUP, 2, "3err%d", rc);
-               qeth_clear_cmd_buffers(channel);
        } else
                rc = 0;
        return rc;
@@ -1942,8 +1938,8 @@ static int qeth_idx_activate_channel(struct qeth_channel *channel,
                   atomic_cmpxchg(&channel->irq_pending, 0, 1) == 0);
        QETH_DBF_TEXT(SETUP, 6, "noirqpnd");
        spin_lock_irqsave(get_ccwdev_lock(channel->ccwdev), flags);
-       rc = ccw_device_start(channel->ccwdev,
-                             &channel->ccw, (addr_t) iob, 0, 0);
+       rc = ccw_device_start_timeout(channel->ccwdev, &channel->ccw,
+                                     (addr_t) iob, 0, 0, QETH_TIMEOUT);
        spin_unlock_irqrestore(get_ccwdev_lock(channel->ccwdev), flags);
 
        if (rc) {
@@ -1964,7 +1960,6 @@ static int qeth_idx_activate_channel(struct qeth_channel *channel,
                QETH_DBF_MESSAGE(2, "%s IDX activate timed out\n",
                        dev_name(&channel->ccwdev->dev));
                QETH_DBF_TEXT_(SETUP, 2, "2err%d", -ETIME);
-               qeth_clear_cmd_buffers(channel);
                return -ETIME;
        }
        return qeth_idx_activate_get_answer(channel, idx_reply_cb);
@@ -2166,8 +2161,8 @@ int qeth_send_control_data(struct qeth_card *card, int len,
 
        QETH_CARD_TEXT(card, 6, "noirqpnd");
        spin_lock_irqsave(get_ccwdev_lock(card->write.ccwdev), flags);
-       rc = ccw_device_start(card->write.ccwdev, &card->write.ccw,
-                             (addr_t) iob, 0, 0);
+       rc = ccw_device_start_timeout(CARD_WDEV(card), &card->write.ccw,
+                                     (addr_t) iob, 0, 0, event_timeout);
        spin_unlock_irqrestore(get_ccwdev_lock(card->write.ccwdev), flags);
        if (rc) {
                QETH_DBF_MESSAGE(2, "%s qeth_send_control_data: "
@@ -2199,8 +2194,6 @@ int qeth_send_control_data(struct qeth_card *card, int len,
                }
        }
 
-       if (reply->rc == -EIO)
-               goto error;
        rc = reply->rc;
        qeth_put_reply(reply);
        return rc;
@@ -2211,10 +2204,6 @@ int qeth_send_control_data(struct qeth_card *card, int len,
        list_del_init(&reply->list);
        spin_unlock_irqrestore(&reply->card->lock, flags);
        atomic_inc(&reply->received);
-error:
-       atomic_set(&card->write.irq_pending, 0);
-       qeth_release_buffer(iob->channel, iob);
-       card->write.buf_no = (card->write.buf_no + 1) % QETH_CMD_BUFFER_NO;
        rc = reply->rc;
        qeth_put_reply(reply);
        return rc;
@@ -3033,28 +3022,23 @@ static int qeth_send_startlan(struct qeth_card *card)
        return rc;
 }
 
-static int qeth_default_setadapterparms_cb(struct qeth_card *card,
-               struct qeth_reply *reply, unsigned long data)
+static int qeth_setadpparms_inspect_rc(struct qeth_ipa_cmd *cmd)
 {
-       struct qeth_ipa_cmd *cmd;
-
-       QETH_CARD_TEXT(card, 4, "defadpcb");
-
-       cmd = (struct qeth_ipa_cmd *) data;
-       if (cmd->hdr.return_code == 0)
+       if (!cmd->hdr.return_code)
                cmd->hdr.return_code =
                        cmd->data.setadapterparms.hdr.return_code;
-       return 0;
+       return cmd->hdr.return_code;
 }
 
 static int qeth_query_setadapterparms_cb(struct qeth_card *card,
                struct qeth_reply *reply, unsigned long data)
 {
-       struct qeth_ipa_cmd *cmd;
+       struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data;
 
        QETH_CARD_TEXT(card, 3, "quyadpcb");
+       if (qeth_setadpparms_inspect_rc(cmd))
+               return 0;
 
-       cmd = (struct qeth_ipa_cmd *) data;
        if (cmd->data.setadapterparms.data.query_cmds_supp.lan_type & 0x7f) {
                card->info.link_type =
                      cmd->data.setadapterparms.data.query_cmds_supp.lan_type;
@@ -3062,7 +3046,7 @@ static int qeth_query_setadapterparms_cb(struct qeth_card *card,
        }
        card->options.adp.supported_funcs =
                cmd->data.setadapterparms.data.query_cmds_supp.supported_cmds;
-       return qeth_default_setadapterparms_cb(card, reply, (unsigned long)cmd);
+       return 0;
 }
 
 static struct qeth_cmd_buffer *qeth_get_adapter_cmd(struct qeth_card *card,
@@ -3154,22 +3138,20 @@ EXPORT_SYMBOL_GPL(qeth_query_ipassists);
 static int qeth_query_switch_attributes_cb(struct qeth_card *card,
                                struct qeth_reply *reply, unsigned long data)
 {
-       struct qeth_ipa_cmd *cmd;
-       struct qeth_switch_info *sw_info;
+       struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data;
        struct qeth_query_switch_attributes *attrs;
+       struct qeth_switch_info *sw_info;
 
        QETH_CARD_TEXT(card, 2, "qswiatcb");
-       cmd = (struct qeth_ipa_cmd *) data;
-       sw_info = (struct qeth_switch_info *)reply->param;
-       if (cmd->data.setadapterparms.hdr.return_code == 0) {
-               attrs = &cmd->data.setadapterparms.data.query_switch_attributes;
-               sw_info->capabilities = attrs->capabilities;
-               sw_info->settings = attrs->settings;
-               QETH_CARD_TEXT_(card, 2, "%04x%04x", sw_info->capabilities,
-                                                       sw_info->settings);
-       }
-       qeth_default_setadapterparms_cb(card, reply, (unsigned long) cmd);
+       if (qeth_setadpparms_inspect_rc(cmd))
+               return 0;
 
+       sw_info = (struct qeth_switch_info *)reply->param;
+       attrs = &cmd->data.setadapterparms.data.query_switch_attributes;
+       sw_info->capabilities = attrs->capabilities;
+       sw_info->settings = attrs->settings;
+       QETH_CARD_TEXT_(card, 2, "%04x%04x", sw_info->capabilities,
+                       sw_info->settings);
        return 0;
 }
 
@@ -3606,15 +3588,14 @@ static void qeth_check_outbound_queue(struct qeth_qdio_out_q *queue)
        }
 }
 
-void qeth_qdio_start_poll(struct ccw_device *ccwdev, int queue,
-               unsigned long card_ptr)
+static void qeth_qdio_start_poll(struct ccw_device *ccwdev, int queue,
+                                unsigned long card_ptr)
 {
        struct qeth_card *card = (struct qeth_card *)card_ptr;
 
        if (card->dev && (card->dev->flags & IFF_UP))
                napi_schedule(&card->napi);
 }
-EXPORT_SYMBOL_GPL(qeth_qdio_start_poll);
 
 int qeth_configure_cq(struct qeth_card *card, enum qeth_cq cq)
 {
@@ -3716,9 +3697,10 @@ static void qeth_qdio_cq_handler(struct qeth_card *card,
        return;
 }
 
-void qeth_qdio_input_handler(struct ccw_device *ccwdev, unsigned int qdio_err,
-               unsigned int queue, int first_elem, int count,
-               unsigned long card_ptr)
+static void qeth_qdio_input_handler(struct ccw_device *ccwdev,
+                                   unsigned int qdio_err, int queue,
+                                   int first_elem, int count,
+                                   unsigned long card_ptr)
 {
        struct qeth_card *card = (struct qeth_card *)card_ptr;
 
@@ -3729,14 +3711,12 @@ void qeth_qdio_input_handler(struct ccw_device *ccwdev, unsigned int qdio_err,
                qeth_qdio_cq_handler(card, qdio_err, queue, first_elem, count);
        else if (qdio_err)
                qeth_schedule_recovery(card);
-
-
 }
-EXPORT_SYMBOL_GPL(qeth_qdio_input_handler);
 
-void qeth_qdio_output_handler(struct ccw_device *ccwdev,
-               unsigned int qdio_error, int __queue, int first_element,
-               int count, unsigned long card_ptr)
+static void qeth_qdio_output_handler(struct ccw_device *ccwdev,
+                                    unsigned int qdio_error, int __queue,
+                                    int first_element, int count,
+                                    unsigned long card_ptr)
 {
        struct qeth_card *card        = (struct qeth_card *) card_ptr;
        struct qeth_qdio_out_q *queue = card->qdio.out_qs[__queue];
@@ -3805,7 +3785,6 @@ void qeth_qdio_output_handler(struct ccw_device *ccwdev,
                card->perf_stats.outbound_handler_time += qeth_get_micros() -
                        card->perf_stats.outbound_handler_start_time;
 }
-EXPORT_SYMBOL_GPL(qeth_qdio_output_handler);
 
 /* We cannot use outbound queue 3 for unicast packets on HiperSockets */
 static inline int qeth_cut_iqd_prio(struct qeth_card *card, int queue_num)
@@ -4207,16 +4186,13 @@ EXPORT_SYMBOL_GPL(qeth_do_send_packet);
 static int qeth_setadp_promisc_mode_cb(struct qeth_card *card,
                struct qeth_reply *reply, unsigned long data)
 {
-       struct qeth_ipa_cmd *cmd;
+       struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data;
        struct qeth_ipacmd_setadpparms *setparms;
 
        QETH_CARD_TEXT(card, 4, "prmadpcb");
 
-       cmd = (struct qeth_ipa_cmd *) data;
        setparms = &(cmd->data.setadapterparms);
-
-       qeth_default_setadapterparms_cb(card, reply, (unsigned long)cmd);
-       if (cmd->hdr.return_code) {
+       if (qeth_setadpparms_inspect_rc(cmd)) {
                QETH_CARD_TEXT_(card, 4, "prmrc%x", cmd->hdr.return_code);
                setparms->data.mode = SET_PROMISC_MODE_OFF;
        }
@@ -4286,18 +4262,18 @@ EXPORT_SYMBOL_GPL(qeth_get_stats);
 static int qeth_setadpparms_change_macaddr_cb(struct qeth_card *card,
                struct qeth_reply *reply, unsigned long data)
 {
-       struct qeth_ipa_cmd *cmd;
+       struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data;
 
        QETH_CARD_TEXT(card, 4, "chgmaccb");
+       if (qeth_setadpparms_inspect_rc(cmd))
+               return 0;
 
-       cmd = (struct qeth_ipa_cmd *) data;
        if (!card->options.layer2 ||
            !(card->info.mac_bits & QETH_LAYER2_MAC_READ)) {
                ether_addr_copy(card->dev->dev_addr,
                                cmd->data.setadapterparms.data.change_addr.addr);
                card->info.mac_bits |= QETH_LAYER2_MAC_READ;
        }
-       qeth_default_setadapterparms_cb(card, reply, (unsigned long) cmd);
        return 0;
 }
 
@@ -4328,13 +4304,15 @@ EXPORT_SYMBOL_GPL(qeth_setadpparms_change_macaddr);
 static int qeth_setadpparms_set_access_ctrl_cb(struct qeth_card *card,
                struct qeth_reply *reply, unsigned long data)
 {
-       struct qeth_ipa_cmd *cmd;
+       struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data;
        struct qeth_set_access_ctrl *access_ctrl_req;
        int fallback = *(int *)reply->param;
 
        QETH_CARD_TEXT(card, 4, "setaccb");
+       if (cmd->hdr.return_code)
+               return 0;
+       qeth_setadpparms_inspect_rc(cmd);
 
-       cmd = (struct qeth_ipa_cmd *) data;
        access_ctrl_req = &cmd->data.setadapterparms.data.set_access_ctrl;
        QETH_DBF_TEXT_(SETUP, 2, "setaccb");
        QETH_DBF_TEXT_(SETUP, 2, "%s", card->gdev->dev.kobj.name);
@@ -4407,7 +4385,6 @@ static int qeth_setadpparms_set_access_ctrl_cb(struct qeth_card *card,
                        card->options.isolation = card->options.prev_isolation;
                break;
        }
-       qeth_default_setadapterparms_cb(card, reply, (unsigned long) cmd);
        return 0;
 }
 
@@ -4695,14 +4672,15 @@ static int qeth_snmp_command(struct qeth_card *card, char __user *udata)
 static int qeth_setadpparms_query_oat_cb(struct qeth_card *card,
                struct qeth_reply *reply, unsigned long data)
 {
-       struct qeth_ipa_cmd *cmd;
+       struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *)data;
        struct qeth_qoat_priv *priv;
        char *resdata;
        int resdatalen;
 
        QETH_CARD_TEXT(card, 3, "qoatcb");
+       if (qeth_setadpparms_inspect_rc(cmd))
+               return 0;
 
-       cmd = (struct qeth_ipa_cmd *)data;
        priv = (struct qeth_qoat_priv *)reply->param;
        resdatalen = cmd->data.setadapterparms.hdr.cmdlength;
        resdata = (char *)data + 28;
@@ -4796,21 +4774,18 @@ static int qeth_query_oat_command(struct qeth_card *card, char __user *udata)
 static int qeth_query_card_info_cb(struct qeth_card *card,
                                   struct qeth_reply *reply, unsigned long data)
 {
-       struct qeth_ipa_cmd *cmd;
+       struct carrier_info *carrier_info = (struct carrier_info *)reply->param;
+       struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *)data;
        struct qeth_query_card_info *card_info;
-       struct carrier_info *carrier_info;
 
        QETH_CARD_TEXT(card, 2, "qcrdincb");
-       carrier_info = (struct carrier_info *)reply->param;
-       cmd = (struct qeth_ipa_cmd *)data;
-       card_info = &cmd->data.setadapterparms.data.card_info;
-       if (cmd->data.setadapterparms.hdr.return_code == 0) {
-               carrier_info->card_type = card_info->card_type;
-               carrier_info->port_mode = card_info->port_mode;
-               carrier_info->port_speed = card_info->port_speed;
-       }
+       if (qeth_setadpparms_inspect_rc(cmd))
+               return 0;
 
-       qeth_default_setadapterparms_cb(card, reply, (unsigned long) cmd);
+       card_info = &cmd->data.setadapterparms.data.card_info;
+       carrier_info->card_type = card_info->card_type;
+       carrier_info->port_mode = card_info->port_mode;
+       carrier_info->port_speed = card_info->port_speed;
        return 0;
 }
 
@@ -4857,7 +4832,7 @@ int qeth_vm_request_mac(struct qeth_card *card)
                goto out;
        }
 
-       ccw_device_get_id(CARD_DDEV(card), &id);
+       ccw_device_get_id(CARD_RDEV(card), &id);
        request->resp_buf_len = sizeof(*response);
        request->resp_version = DIAG26C_VERSION2;
        request->op_code = DIAG26C_GET_MAC;
@@ -5017,7 +4992,7 @@ static int qeth_qdio_establish(struct qeth_card *card)
                goto out_free_in_sbals;
        }
        for (i = 0; i < card->qdio.no_in_queues; ++i)
-               queue_start_poll[i] = card->discipline->start_poll;
+               queue_start_poll[i] = qeth_qdio_start_poll;
 
        qeth_qdio_establish_cq(card, in_sbal_ptrs, queue_start_poll);
 
@@ -5041,8 +5016,8 @@ static int qeth_qdio_establish(struct qeth_card *card)
        init_data.qib_param_field        = qib_param_field;
        init_data.no_input_qs            = card->qdio.no_in_queues;
        init_data.no_output_qs           = card->qdio.no_out_queues;
-       init_data.input_handler          = card->discipline->input_handler;
-       init_data.output_handler         = card->discipline->output_handler;
+       init_data.input_handler          = qeth_qdio_input_handler;
+       init_data.output_handler         = qeth_qdio_output_handler;
        init_data.queue_start_poll_array = queue_start_poll;
        init_data.int_parm               = (unsigned long) card;
        init_data.input_sbal_addr_array  = (void **) in_sbal_ptrs;
@@ -5226,6 +5201,11 @@ int qeth_core_hardsetup_card(struct qeth_card *card)
        rc = qeth_query_ipassists(card, QETH_PROT_IPV4);
        if (rc == -ENOMEM)
                goto out;
+       if (qeth_is_supported(card, IPA_IPV6)) {
+               rc = qeth_query_ipassists(card, QETH_PROT_IPV6);
+               if (rc == -ENOMEM)
+                       goto out;
+       }
        if (qeth_is_supported(card, IPA_SETADAPTERPARMS)) {
                rc = qeth_query_setadapterparms(card);
                if (rc < 0) {
@@ -5533,26 +5513,26 @@ int qeth_send_setassparms(struct qeth_card *card,
 }
 EXPORT_SYMBOL_GPL(qeth_send_setassparms);
 
-int qeth_send_simple_setassparms(struct qeth_card *card,
-                                enum qeth_ipa_funcs ipa_func,
-                                __u16 cmd_code, long data)
+int qeth_send_simple_setassparms_prot(struct qeth_card *card,
+                                     enum qeth_ipa_funcs ipa_func,
+                                     u16 cmd_code, long data,
+                                     enum qeth_prot_versions prot)
 {
        int rc;
        int length = 0;
        struct qeth_cmd_buffer *iob;
 
-       QETH_CARD_TEXT(card, 4, "simassp4");
+       QETH_CARD_TEXT_(card, 4, "simassp%i", prot);
        if (data)
                length = sizeof(__u32);
-       iob = qeth_get_setassparms_cmd(card, ipa_func, cmd_code,
-                                      length, QETH_PROT_IPV4);
+       iob = qeth_get_setassparms_cmd(card, ipa_func, cmd_code, length, prot);
        if (!iob)
                return -ENOMEM;
        rc = qeth_send_setassparms(card, iob, length, data,
                                   qeth_setassparms_cb, NULL);
        return rc;
 }
-EXPORT_SYMBOL_GPL(qeth_send_simple_setassparms);
+EXPORT_SYMBOL_GPL(qeth_send_simple_setassparms_prot);
 
 static void qeth_unregister_dbf_views(void)
 {
@@ -6030,7 +6010,8 @@ static struct {
        {"tx lin"},
        {"tx linfail"},
        {"cq handler count"},
-       {"cq handler time"}
+       {"cq handler time"},
+       {"rx csum"}
 };
 
 int qeth_core_get_sset_count(struct net_device *dev, int stringset)
@@ -6092,6 +6073,7 @@ void qeth_core_get_ethtool_stats(struct net_device *dev,
        data[35] = card->perf_stats.tx_linfail;
        data[36] = card->perf_stats.cq_cnt;
        data[37] = card->perf_stats.cq_time;
+       data[38] = card->perf_stats.rx_csum;
 }
 EXPORT_SYMBOL_GPL(qeth_core_get_ethtool_stats);
 
@@ -6348,14 +6330,15 @@ static int qeth_ipa_checksum_run_cmd_cb(struct qeth_card *card,
 static int qeth_ipa_checksum_run_cmd(struct qeth_card *card,
                                     enum qeth_ipa_funcs ipa_func,
                                     __u16 cmd_code, long data,
-                                    struct qeth_checksum_cmd *chksum_cb)
+                                    struct qeth_checksum_cmd *chksum_cb,
+                                    enum qeth_prot_versions prot)
 {
        struct qeth_cmd_buffer *iob;
        int rc = -ENOMEM;
 
        QETH_CARD_TEXT(card, 4, "chkdocmd");
        iob = qeth_get_setassparms_cmd(card, ipa_func, cmd_code,
-                                      sizeof(__u32), QETH_PROT_IPV4);
+                                      sizeof(__u32), prot);
        if (iob)
                rc = qeth_send_setassparms(card, iob, sizeof(__u32), data,
                                           qeth_ipa_checksum_run_cmd_cb,
@@ -6363,16 +6346,17 @@ static int qeth_ipa_checksum_run_cmd(struct qeth_card *card,
        return rc;
 }
 
-static int qeth_send_checksum_on(struct qeth_card *card, int cstype)
+static int qeth_send_checksum_on(struct qeth_card *card, int cstype,
+                                enum qeth_prot_versions prot)
 {
-       const __u32 required_features = QETH_IPA_CHECKSUM_IP_HDR |
-                                       QETH_IPA_CHECKSUM_UDP |
-                                       QETH_IPA_CHECKSUM_TCP;
+       u32 required_features = QETH_IPA_CHECKSUM_UDP | QETH_IPA_CHECKSUM_TCP;
        struct qeth_checksum_cmd chksum_cb;
        int rc;
 
+       if (prot == QETH_PROT_IPV4)
+               required_features |= QETH_IPA_CHECKSUM_IP_HDR;
        rc = qeth_ipa_checksum_run_cmd(card, cstype, IPA_CMD_ASS_START, 0,
-                                      &chksum_cb);
+                                      &chksum_cb, prot);
        if (!rc) {
                if ((required_features & chksum_cb.supported) !=
                    required_features)
@@ -6384,37 +6368,42 @@ static int qeth_send_checksum_on(struct qeth_card *card, int cstype)
                                 QETH_CARD_IFNAME(card));
        }
        if (rc) {
-               qeth_send_simple_setassparms(card, cstype, IPA_CMD_ASS_STOP, 0);
+               qeth_send_simple_setassparms_prot(card, cstype,
+                                                 IPA_CMD_ASS_STOP, 0, prot);
                dev_warn(&card->gdev->dev,
-                        "Starting HW checksumming for %s failed, using SW checksumming\n",
-                        QETH_CARD_IFNAME(card));
+                        "Starting HW IPv%d checksumming for %s failed, using SW checksumming\n",
+                        prot, QETH_CARD_IFNAME(card));
                return rc;
        }
        rc = qeth_ipa_checksum_run_cmd(card, cstype, IPA_CMD_ASS_ENABLE,
-                                      chksum_cb.supported, &chksum_cb);
+                                      chksum_cb.supported, &chksum_cb,
+                                      prot);
        if (!rc) {
                if ((required_features & chksum_cb.enabled) !=
                    required_features)
                        rc = -EIO;
        }
        if (rc) {
-               qeth_send_simple_setassparms(card, cstype, IPA_CMD_ASS_STOP, 0);
+               qeth_send_simple_setassparms_prot(card, cstype,
+                                                 IPA_CMD_ASS_STOP, 0, prot);
                dev_warn(&card->gdev->dev,
-                        "Enabling HW checksumming for %s failed, using SW checksumming\n",
-                        QETH_CARD_IFNAME(card));
+                        "Enabling HW IPv%d checksumming for %s failed, using SW checksumming\n",
+                        prot, QETH_CARD_IFNAME(card));
                return rc;
        }
 
-       dev_info(&card->gdev->dev, "HW Checksumming (%sbound) enabled\n",
-                cstype == IPA_INBOUND_CHECKSUM ? "in" : "out");
+       dev_info(&card->gdev->dev, "HW Checksumming (%sbound IPv%d) enabled\n",
+                cstype == IPA_INBOUND_CHECKSUM ? "in" : "out", prot);
        return 0;
 }
 
-static int qeth_set_ipa_csum(struct qeth_card *card, int on, int cstype)
+static int qeth_set_ipa_csum(struct qeth_card *card, bool on, int cstype,
+                            enum qeth_prot_versions prot)
 {
-       int rc = (on) ? qeth_send_checksum_on(card, cstype)
-                     : qeth_send_simple_setassparms(card, cstype,
-                                                    IPA_CMD_ASS_STOP, 0);
+       int rc = (on) ? qeth_send_checksum_on(card, cstype, prot)
+                     : qeth_send_simple_setassparms_prot(card, cstype,
+                                                         IPA_CMD_ASS_STOP, 0,
+                                                         prot);
        return rc ? -EIO : 0;
 }
 
@@ -6441,8 +6430,31 @@ static int qeth_set_ipa_tso(struct qeth_card *card, int on)
        return rc;
 }
 
-#define QETH_HW_FEATURES (NETIF_F_RXCSUM | NETIF_F_IP_CSUM | NETIF_F_TSO)
+static int qeth_set_ipa_rx_csum(struct qeth_card *card, bool on)
+{
+       int rc_ipv4 = (on) ? -EOPNOTSUPP : 0;
+       int rc_ipv6;
+
+       if (qeth_is_supported(card, IPA_INBOUND_CHECKSUM))
+               rc_ipv4 = qeth_set_ipa_csum(card, on, IPA_INBOUND_CHECKSUM,
+                                           QETH_PROT_IPV4);
+       if (!qeth_is_supported6(card, IPA_INBOUND_CHECKSUM_V6))
+               /* no/one Offload Assist available, so the rc is trivial */
+               return rc_ipv4;
+
+       rc_ipv6 = qeth_set_ipa_csum(card, on, IPA_INBOUND_CHECKSUM,
+                                   QETH_PROT_IPV6);
 
+       if (on)
+               /* enable: success if any Assist is active */
+               return (rc_ipv6) ? rc_ipv4 : 0;
+
+       /* disable: failure if any Assist is still active */
+       return (rc_ipv6) ? rc_ipv6 : rc_ipv4;
+}
+
+#define QETH_HW_FEATURES (NETIF_F_RXCSUM | NETIF_F_IP_CSUM | NETIF_F_TSO | \
+                         NETIF_F_IPV6_CSUM)
 /**
  * qeth_recover_features() - Restore device features after recovery
  * @dev:       the recovering net_device
@@ -6477,16 +6489,19 @@ int qeth_set_features(struct net_device *dev, netdev_features_t features)
        QETH_DBF_HEX(SETUP, 2, &features, sizeof(features));
 
        if ((changed & NETIF_F_IP_CSUM)) {
-               rc = qeth_set_ipa_csum(card,
-                                      features & NETIF_F_IP_CSUM ? 1 : 0,
-                                      IPA_OUTBOUND_CHECKSUM);
+               rc = qeth_set_ipa_csum(card, features & NETIF_F_IP_CSUM,
+                                      IPA_OUTBOUND_CHECKSUM, QETH_PROT_IPV4);
                if (rc)
                        changed ^= NETIF_F_IP_CSUM;
        }
-       if ((changed & NETIF_F_RXCSUM)) {
-               rc = qeth_set_ipa_csum(card,
-                                       features & NETIF_F_RXCSUM ? 1 : 0,
-                                       IPA_INBOUND_CHECKSUM);
+       if (changed & NETIF_F_IPV6_CSUM) {
+               rc = qeth_set_ipa_csum(card, features & NETIF_F_IPV6_CSUM,
+                                      IPA_OUTBOUND_CHECKSUM, QETH_PROT_IPV6);
+               if (rc)
+                       changed ^= NETIF_F_IPV6_CSUM;
+       }
+       if (changed & NETIF_F_RXCSUM) {
+               rc = qeth_set_ipa_rx_csum(card, features & NETIF_F_RXCSUM);
                if (rc)
                        changed ^= NETIF_F_RXCSUM;
        }
@@ -6513,7 +6528,10 @@ netdev_features_t qeth_fix_features(struct net_device *dev,
        QETH_DBF_TEXT(SETUP, 2, "fixfeat");
        if (!qeth_is_supported(card, IPA_OUTBOUND_CHECKSUM))
                features &= ~NETIF_F_IP_CSUM;
-       if (!qeth_is_supported(card, IPA_INBOUND_CHECKSUM))
+       if (!qeth_is_supported6(card, IPA_OUTBOUND_CHECKSUM_V6))
+               features &= ~NETIF_F_IPV6_CSUM;
+       if (!qeth_is_supported(card, IPA_INBOUND_CHECKSUM) &&
+           !qeth_is_supported6(card, IPA_INBOUND_CHECKSUM_V6))
                features &= ~NETIF_F_RXCSUM;
        if (!qeth_is_supported(card, IPA_OUTBOUND_TSO))
                features &= ~NETIF_F_TSO;
@@ -6563,10 +6581,14 @@ static int __init qeth_core_init(void)
        mutex_init(&qeth_mod_mutex);
 
        qeth_wq = create_singlethread_workqueue("qeth_wq");
+       if (!qeth_wq) {
+               rc = -ENOMEM;
+               goto out_err;
+       }
 
        rc = qeth_register_dbf_views();
        if (rc)
-               goto out_err;
+               goto dbf_err;
        qeth_core_root_dev = root_device_register("qeth");
        rc = PTR_ERR_OR_ZERO(qeth_core_root_dev);
        if (rc)
@@ -6603,6 +6625,8 @@ static int __init qeth_core_init(void)
        root_device_unregister(qeth_core_root_dev);
 register_err:
        qeth_unregister_dbf_views();
+dbf_err:
+       destroy_workqueue(qeth_wq);
 out_err:
        pr_err("Initializing the qeth device driver failed\n");
        return rc;
index 619f897b4bb0cef7be641f6e45c8f31a1502409c..878e62f3516915081c7a4f834724a67afd8485f4 100644 (file)
@@ -35,6 +35,18 @@ extern unsigned char IPA_PDU_HEADER[];
 #define QETH_HALT_CHANNEL_PARM -11
 #define QETH_RCD_PARM -12
 
+static inline bool qeth_intparm_is_iob(unsigned long intparm)
+{
+       switch (intparm) {
+       case QETH_CLEAR_CHANNEL_PARM:
+       case QETH_HALT_CHANNEL_PARM:
+       case QETH_RCD_PARM:
+       case 0:
+               return false;
+       }
+       return true;
+}
+
 /*****************************************************************************/
 /* IP Assist related definitions                                             */
 /*****************************************************************************/
@@ -234,6 +246,8 @@ enum qeth_ipa_funcs {
        IPA_QUERY_ARP_ASSIST    = 0x00040000L,
        IPA_INBOUND_TSO         = 0x00080000L,
        IPA_OUTBOUND_TSO        = 0x00100000L,
+       IPA_INBOUND_CHECKSUM_V6 = 0x00400000L,
+       IPA_OUTBOUND_CHECKSUM_V6 = 0x00800000L,
 };
 
 /* SETIP/DELIP IPA Command: ***************************************************/
index ae81534de91228910fd877fce0e1e262cc24fddf..c3f18afb368b1118a1ca56c89bf8115f84db088f 100644 (file)
@@ -144,6 +144,8 @@ static ssize_t qeth_dev_portno_store(struct device *dev,
                goto out;
        }
        card->info.portno = portno;
+       if (card->dev)
+               card->dev->dev_port = portno;
 out:
        mutex_unlock(&card->conf_mutex);
        return rc ? rc : count;
index 2ad6f12f3d497592484e2321fb02c39c7ca86962..a7cb37da6a21313eda8d03119135f1475d35f47d 100644 (file)
@@ -17,7 +17,6 @@
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/etherdevice.h>
-#include <linux/ip.h>
 #include <linux/list.h>
 #include <linux/hash.h>
 #include <linux/hashtable.h>
@@ -121,13 +120,10 @@ static int qeth_l2_send_setmac(struct qeth_card *card, __u8 *mac)
        QETH_CARD_TEXT(card, 2, "L2Setmac");
        rc = qeth_l2_send_setdelmac(card, mac, IPA_CMD_SETVMAC);
        if (rc == 0) {
-               card->info.mac_bits |= QETH_LAYER2_MAC_REGISTERED;
-               ether_addr_copy(card->dev->dev_addr, mac);
                dev_info(&card->gdev->dev,
-                       "MAC address %pM successfully registered on device %s\n",
-                       card->dev->dev_addr, card->dev->name);
+                        "MAC address %pM successfully registered on device %s\n",
+                        mac, card->dev->name);
        } else {
-               card->info.mac_bits &= ~QETH_LAYER2_MAC_REGISTERED;
                switch (rc) {
                case -EEXIST:
                        dev_warn(&card->gdev->dev,
@@ -142,19 +138,6 @@ static int qeth_l2_send_setmac(struct qeth_card *card, __u8 *mac)
        return rc;
 }
 
-static int qeth_l2_send_delmac(struct qeth_card *card, __u8 *mac)
-{
-       int rc;
-
-       QETH_CARD_TEXT(card, 2, "L2Delmac");
-       if (!(card->info.mac_bits & QETH_LAYER2_MAC_REGISTERED))
-               return 0;
-       rc = qeth_l2_send_setdelmac(card, mac, IPA_CMD_DELVMAC);
-       if (rc == 0)
-               card->info.mac_bits &= ~QETH_LAYER2_MAC_REGISTERED;
-       return rc;
-}
-
 static int qeth_l2_write_mac(struct qeth_card *card, u8 *mac)
 {
        enum qeth_ipa_cmds cmd = is_multicast_ether_addr_64bits(mac) ?
@@ -211,23 +194,6 @@ static int qeth_l2_get_cast_type(struct qeth_card *card, struct sk_buff *skb)
        return RTN_UNSPEC;
 }
 
-static void qeth_l2_hdr_csum(struct qeth_card *card, struct qeth_hdr *hdr,
-                            struct sk_buff *skb)
-{
-       struct iphdr *iph = ip_hdr(skb);
-
-       /* tcph->check contains already the pseudo hdr checksum
-        * so just set the header flags
-        */
-       if (iph->protocol == IPPROTO_UDP)
-               hdr->hdr.l2.flags[1] |= QETH_HDR_EXT_UDP;
-       hdr->hdr.l2.flags[1] |= QETH_HDR_EXT_CSUM_TRANSP_REQ |
-               QETH_HDR_EXT_CSUM_HDR_REQ;
-       iph->check = 0;
-       if (card->options.performance_stats)
-               card->perf_stats.tx_csum++;
-}
-
 static void qeth_l2_fill_header(struct qeth_hdr *hdr, struct sk_buff *skb,
                                int cast_type, unsigned int data_len)
 {
@@ -313,12 +279,13 @@ static int qeth_l2_send_setdelvlan(struct qeth_card *card, __u16 i,
 static void qeth_l2_process_vlans(struct qeth_card *card)
 {
        struct qeth_vlan_vid *id;
+
        QETH_CARD_TEXT(card, 3, "L2prcvln");
-       spin_lock_bh(&card->vlanlock);
+       mutex_lock(&card->vid_list_mutex);
        list_for_each_entry(id, &card->vid_list, list) {
                qeth_l2_send_setdelvlan(card, id->vid, IPA_CMD_SETVLAN);
        }
-       spin_unlock_bh(&card->vlanlock);
+       mutex_unlock(&card->vid_list_mutex);
 }
 
 static int qeth_l2_vlan_rx_add_vid(struct net_device *dev,
@@ -335,7 +302,7 @@ static int qeth_l2_vlan_rx_add_vid(struct net_device *dev,
                QETH_CARD_TEXT(card, 3, "aidREC");
                return 0;
        }
-       id = kmalloc(sizeof(struct qeth_vlan_vid), GFP_ATOMIC);
+       id = kmalloc(sizeof(*id), GFP_KERNEL);
        if (id) {
                id->vid = vid;
                rc = qeth_l2_send_setdelvlan(card, vid, IPA_CMD_SETVLAN);
@@ -343,9 +310,9 @@ static int qeth_l2_vlan_rx_add_vid(struct net_device *dev,
                        kfree(id);
                        return rc;
                }
-               spin_lock_bh(&card->vlanlock);
+               mutex_lock(&card->vid_list_mutex);
                list_add_tail(&id->list, &card->vid_list);
-               spin_unlock_bh(&card->vlanlock);
+               mutex_unlock(&card->vid_list_mutex);
        } else {
                return -ENOMEM;
        }
@@ -364,7 +331,7 @@ static int qeth_l2_vlan_rx_kill_vid(struct net_device *dev,
                QETH_CARD_TEXT(card, 3, "kidREC");
                return 0;
        }
-       spin_lock_bh(&card->vlanlock);
+       mutex_lock(&card->vid_list_mutex);
        list_for_each_entry(id, &card->vid_list, list) {
                if (id->vid == vid) {
                        list_del(&id->list);
@@ -372,7 +339,7 @@ static int qeth_l2_vlan_rx_kill_vid(struct net_device *dev,
                        break;
                }
        }
-       spin_unlock_bh(&card->vlanlock);
+       mutex_unlock(&card->vid_list_mutex);
        if (tmpid) {
                rc = qeth_l2_send_setdelvlan(card, vid, IPA_CMD_DELVLAN);
                kfree(tmpid);
@@ -439,15 +406,7 @@ static int qeth_l2_process_inbound_buffer(struct qeth_card *card,
                switch (hdr->hdr.l2.id) {
                case QETH_HEADER_TYPE_LAYER2:
                        skb->protocol = eth_type_trans(skb, skb->dev);
-                       if ((card->dev->features & NETIF_F_RXCSUM)
-                          && ((hdr->hdr.l2.flags[1] &
-                               (QETH_HDR_EXT_CSUM_HDR_REQ |
-                                  QETH_HDR_EXT_CSUM_TRANSP_REQ)) ==
-                               (QETH_HDR_EXT_CSUM_HDR_REQ |
-                                  QETH_HDR_EXT_CSUM_TRANSP_REQ)))
-                               skb->ip_summed = CHECKSUM_UNNECESSARY;
-                       else
-                               skb->ip_summed = CHECKSUM_NONE;
+                       qeth_rx_csum(card, skb, hdr->hdr.l2.flags[1]);
                        if (skb->protocol == htons(ETH_P_802_2))
                                *((__u32 *)skb->cb) = ++card->seqno.pkt_seqno;
                        len = skb->len;
@@ -480,7 +439,6 @@ static int qeth_l2_process_inbound_buffer(struct qeth_card *card,
 static int qeth_l2_request_initial_mac(struct qeth_card *card)
 {
        int rc = 0;
-       char vendor_pre[] = {0x02, 0x00, 0x00};
 
        QETH_DBF_TEXT(SETUP, 2, "l2reqmac");
        QETH_DBF_TEXT_(SETUP, 2, "doL2%s", CARD_BUS_ID(card));
@@ -500,16 +458,20 @@ static int qeth_l2_request_initial_mac(struct qeth_card *card)
            card->info.type == QETH_CARD_TYPE_OSX ||
            card->info.guestlan) {
                rc = qeth_setadpparms_change_macaddr(card);
-               if (rc) {
-                       QETH_DBF_MESSAGE(2, "couldn't get MAC address on "
-                               "device %s: x%x\n", CARD_BUS_ID(card), rc);
-                       QETH_DBF_TEXT_(SETUP, 2, "1err%04x", rc);
-                       return rc;
-               }
-       } else {
-               eth_random_addr(card->dev->dev_addr);
-               memcpy(card->dev->dev_addr, vendor_pre, 3);
+               if (!rc)
+                       goto out;
+               QETH_DBF_MESSAGE(2, "READ_MAC Assist failed on device %s: x%x\n",
+                                CARD_BUS_ID(card), rc);
+               QETH_DBF_TEXT_(SETUP, 2, "1err%04x", rc);
+               /* fall back once more: */
        }
+
+       /* some devices don't support a custom MAC address: */
+       if (card->info.type == QETH_CARD_TYPE_OSM ||
+           card->info.type == QETH_CARD_TYPE_OSX)
+               return (rc) ? rc : -EADDRNOTAVAIL;
+       eth_hw_addr_random(card->dev);
+
 out:
        QETH_DBF_HEX(SETUP, 2, card->dev->dev_addr, card->dev->addr_len);
        return 0;
@@ -519,6 +481,7 @@ static int qeth_l2_set_mac_address(struct net_device *dev, void *p)
 {
        struct sockaddr *addr = p;
        struct qeth_card *card = dev->ml_priv;
+       u8 old_addr[ETH_ALEN];
        int rc = 0;
 
        QETH_CARD_TEXT(card, 3, "setmac");
@@ -530,14 +493,35 @@ static int qeth_l2_set_mac_address(struct net_device *dev, void *p)
                return -EOPNOTSUPP;
        }
        QETH_CARD_HEX(card, 3, addr->sa_data, ETH_ALEN);
+       if (!is_valid_ether_addr(addr->sa_data))
+               return -EADDRNOTAVAIL;
+
        if (qeth_wait_for_threads(card, QETH_RECOVER_THREAD)) {
                QETH_CARD_TEXT(card, 3, "setmcREC");
                return -ERESTARTSYS;
        }
-       rc = qeth_l2_send_delmac(card, &card->dev->dev_addr[0]);
-       if (!rc || (rc == -ENOENT))
-               rc = qeth_l2_send_setmac(card, addr->sa_data);
-       return rc ? -EINVAL : 0;
+
+       if (!qeth_card_hw_is_reachable(card)) {
+               ether_addr_copy(dev->dev_addr, addr->sa_data);
+               return 0;
+       }
+
+       /* don't register the same address twice */
+       if (ether_addr_equal_64bits(dev->dev_addr, addr->sa_data) &&
+           (card->info.mac_bits & QETH_LAYER2_MAC_REGISTERED))
+               return 0;
+
+       /* add the new address, switch over, drop the old */
+       rc = qeth_l2_send_setmac(card, addr->sa_data);
+       if (rc)
+               return rc;
+       ether_addr_copy(old_addr, dev->dev_addr);
+       ether_addr_copy(dev->dev_addr, addr->sa_data);
+
+       if (card->info.mac_bits & QETH_LAYER2_MAC_REGISTERED)
+               qeth_l2_remove_mac(card, old_addr);
+       card->info.mac_bits |= QETH_LAYER2_MAC_REGISTERED;
+       return 0;
 }
 
 static void qeth_promisc_to_bridge(struct qeth_card *card)
@@ -679,7 +663,8 @@ static int qeth_l2_xmit_iqd(struct qeth_card *card, struct sk_buff *skb,
 }
 
 static int qeth_l2_xmit_osa(struct qeth_card *card, struct sk_buff *skb,
-                           struct qeth_qdio_out_q *queue, int cast_type)
+                           struct qeth_qdio_out_q *queue, int cast_type,
+                           int ipv)
 {
        int push_len = sizeof(struct qeth_hdr);
        unsigned int elements, nr_frags;
@@ -717,8 +702,11 @@ static int qeth_l2_xmit_osa(struct qeth_card *card, struct sk_buff *skb,
                hdr_elements = 1;
        }
        qeth_l2_fill_header(hdr, skb, cast_type, skb->len - push_len);
-       if (skb->ip_summed == CHECKSUM_PARTIAL)
-               qeth_l2_hdr_csum(card, hdr, skb);
+       if (skb->ip_summed == CHECKSUM_PARTIAL) {
+               qeth_tx_csum(skb, &hdr->hdr.l2.flags[1], ipv);
+               if (card->options.performance_stats)
+                       card->perf_stats.tx_csum++;
+       }
 
        elements = qeth_get_elements_no(card, skb, hdr_elements, 0);
        if (!elements) {
@@ -770,6 +758,7 @@ static netdev_tx_t qeth_l2_hard_start_xmit(struct sk_buff *skb,
 {
        struct qeth_card *card = dev->ml_priv;
        int cast_type = qeth_l2_get_cast_type(card, skb);
+       int ipv = qeth_get_ip_version(skb);
        struct qeth_qdio_out_q *queue;
        int tx_bytes = skb->len;
        int rc;
@@ -777,7 +766,7 @@ static netdev_tx_t qeth_l2_hard_start_xmit(struct sk_buff *skb,
        if (card->qdio.do_prio_queueing || (cast_type &&
                                        card->info.is_multicast_different))
                queue = card->qdio.out_qs[qeth_get_priority_queue(card, skb,
-                                       qeth_get_ip_version(skb), cast_type)];
+                                       ipv, cast_type)];
        else
                queue = card->qdio.out_qs[card->qdio.default_out_queue];
 
@@ -800,7 +789,7 @@ static netdev_tx_t qeth_l2_hard_start_xmit(struct sk_buff *skb,
                rc = qeth_l2_xmit_iqd(card, skb, queue, cast_type);
                break;
        default:
-               rc = qeth_l2_xmit_osa(card, skb, queue, cast_type);
+               rc = qeth_l2_xmit_osa(card, skb, queue, cast_type, ipv);
        }
 
        if (!rc) {
@@ -977,6 +966,7 @@ static int qeth_l2_setup_netdev(struct qeth_card *card)
        card->dev->mtu = card->info.initial_mtu;
        card->dev->min_mtu = 64;
        card->dev->max_mtu = ETH_MAX_MTU;
+       card->dev->dev_port = card->info.portno;
        card->dev->netdev_ops = &qeth_l2_netdev_ops;
        if (card->info.type == QETH_CARD_TYPE_OSN) {
                card->dev->ethtool_ops = &qeth_l2_osn_ops;
@@ -1005,10 +995,15 @@ static int qeth_l2_setup_netdev(struct qeth_card *card)
                        card->dev->hw_features |= NETIF_F_IP_CSUM;
                        card->dev->vlan_features |= NETIF_F_IP_CSUM;
                }
-               if (qeth_is_supported(card, IPA_INBOUND_CHECKSUM)) {
-                       card->dev->hw_features |= NETIF_F_RXCSUM;
-                       card->dev->vlan_features |= NETIF_F_RXCSUM;
-               }
+       }
+       if (qeth_is_supported6(card, IPA_OUTBOUND_CHECKSUM_V6)) {
+               card->dev->hw_features |= NETIF_F_IPV6_CSUM;
+               card->dev->vlan_features |= NETIF_F_IPV6_CSUM;
+       }
+       if (qeth_is_supported(card, IPA_INBOUND_CHECKSUM) ||
+           qeth_is_supported6(card, IPA_INBOUND_CHECKSUM_V6)) {
+               card->dev->hw_features |= NETIF_F_RXCSUM;
+               card->dev->vlan_features |= NETIF_F_RXCSUM;
        }
 
        card->info.broadcast_capable = 1;
@@ -1067,8 +1062,9 @@ static int __qeth_l2_set_online(struct ccwgroup_device *gdev, int recovery_mode)
                goto out_remove;
        }
 
-       if (card->info.type != QETH_CARD_TYPE_OSN)
-               qeth_l2_send_setmac(card, &card->dev->dev_addr[0]);
+       if (card->info.type != QETH_CARD_TYPE_OSN &&
+           !qeth_l2_send_setmac(card, card->dev->dev_addr))
+               card->info.mac_bits |= QETH_LAYER2_MAC_REGISTERED;
 
        if (qeth_is_diagass_supported(card, QETH_DIAGS_CMD_TRAP)) {
                if (card->info.hwtrap &&
@@ -1308,9 +1304,6 @@ static int qeth_l2_control_event(struct qeth_card *card,
 
 struct qeth_discipline qeth_l2_discipline = {
        .devtype = &qeth_l2_devtype,
-       .start_poll = qeth_qdio_start_poll,
-       .input_handler = (qdio_handler_t *) qeth_qdio_input_handler,
-       .output_handler = (qdio_handler_t *) qeth_qdio_output_handler,
        .process_rx_buffer = qeth_l2_process_inbound_buffer,
        .recover = qeth_l2_recover,
        .setup = qeth_l2_probe_device,
@@ -1338,8 +1331,8 @@ static int qeth_osn_send_control_data(struct qeth_card *card, int len,
        qeth_prepare_control_data(card, len, iob);
        QETH_CARD_TEXT(card, 6, "osnoirqp");
        spin_lock_irqsave(get_ccwdev_lock(card->write.ccwdev), flags);
-       rc = ccw_device_start(card->write.ccwdev, &card->write.ccw,
-                             (addr_t) iob, 0, 0);
+       rc = ccw_device_start_timeout(CARD_WDEV(card), &card->write.ccw,
+                                     (addr_t) iob, 0, 0, QETH_IPA_TIMEOUT);
        spin_unlock_irqrestore(get_ccwdev_lock(card->write.ccwdev), flags);
        if (rc) {
                QETH_DBF_MESSAGE(2, "qeth_osn_send_control_data: "
index c1a16a74aa8331177744fb0a7236e343d38efd3a..e7fa479adf47e0dd41bfacaed8fd347bc40b5581 100644 (file)
@@ -735,22 +735,6 @@ static int qeth_l3_setadapter_parms(struct qeth_card *card)
        return rc;
 }
 
-static int qeth_l3_send_simple_setassparms_ipv6(struct qeth_card *card,
-               enum qeth_ipa_funcs ipa_func, __u16 cmd_code)
-{
-       int rc;
-       struct qeth_cmd_buffer *iob;
-
-       QETH_CARD_TEXT(card, 4, "simassp6");
-       iob = qeth_get_setassparms_cmd(card, ipa_func, cmd_code,
-                                      0, QETH_PROT_IPV6);
-       if (!iob)
-               return -ENOMEM;
-       rc = qeth_send_setassparms(card, iob, 0, 0,
-                                  qeth_setassparms_cb, NULL);
-       return rc;
-}
-
 static int qeth_l3_start_ipa_arp_processing(struct qeth_card *card)
 {
        int rc;
@@ -851,14 +835,6 @@ static int qeth_l3_softsetup_ipv6(struct qeth_card *card)
 
        QETH_CARD_TEXT(card, 3, "softipv6");
 
-       rc = qeth_query_ipassists(card, QETH_PROT_IPV6);
-       if (rc) {
-               dev_err(&card->gdev->dev,
-                       "Activating IPv6 support for %s failed\n",
-                       QETH_CARD_IFNAME(card));
-               return rc;
-       }
-
        if (card->info.type == QETH_CARD_TYPE_IQD)
                goto out;
 
@@ -870,16 +846,16 @@ static int qeth_l3_softsetup_ipv6(struct qeth_card *card)
                        QETH_CARD_IFNAME(card));
                return rc;
        }
-       rc = qeth_l3_send_simple_setassparms_ipv6(card, IPA_IPV6,
-                                              IPA_CMD_ASS_START);
+       rc = qeth_send_simple_setassparms_v6(card, IPA_IPV6,
+                                            IPA_CMD_ASS_START, 0);
        if (rc) {
                dev_err(&card->gdev->dev,
                        "Activating IPv6 support for %s failed\n",
                         QETH_CARD_IFNAME(card));
                return rc;
        }
-       rc = qeth_l3_send_simple_setassparms_ipv6(card, IPA_PASSTHRU,
-                                              IPA_CMD_ASS_START);
+       rc = qeth_send_simple_setassparms_v6(card, IPA_PASSTHRU,
+                                            IPA_CMD_ASS_START, 0);
        if (rc) {
                dev_warn(&card->gdev->dev,
                        "Enabling the passthrough mode for %s failed\n",
@@ -1293,91 +1269,6 @@ static void qeth_l3_add_multicast_ipv6(struct qeth_card *card)
        in6_dev_put(in6_dev);
 }
 
-static void qeth_l3_free_vlan_addresses4(struct qeth_card *card,
-                       unsigned short vid)
-{
-       struct in_device *in_dev;
-       struct in_ifaddr *ifa;
-       struct qeth_ipaddr *addr;
-       struct net_device *netdev;
-
-       QETH_CARD_TEXT(card, 4, "frvaddr4");
-
-       netdev = __vlan_find_dev_deep_rcu(card->dev, htons(ETH_P_8021Q), vid);
-       if (!netdev)
-               return;
-       in_dev = in_dev_get(netdev);
-       if (!in_dev)
-               return;
-
-       addr = qeth_l3_get_addr_buffer(QETH_PROT_IPV4);
-       if (!addr)
-               goto out;
-
-       spin_lock_bh(&card->ip_lock);
-
-       for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
-               addr->u.a4.addr = be32_to_cpu(ifa->ifa_address);
-               addr->u.a4.mask = be32_to_cpu(ifa->ifa_mask);
-               addr->type = QETH_IP_TYPE_NORMAL;
-               qeth_l3_delete_ip(card, addr);
-       }
-
-       spin_unlock_bh(&card->ip_lock);
-
-       kfree(addr);
-out:
-       in_dev_put(in_dev);
-}
-
-static void qeth_l3_free_vlan_addresses6(struct qeth_card *card,
-                                        unsigned short vid)
-{
-       struct inet6_dev *in6_dev;
-       struct inet6_ifaddr *ifa;
-       struct qeth_ipaddr *addr;
-       struct net_device *netdev;
-
-       QETH_CARD_TEXT(card, 4, "frvaddr6");
-
-       netdev = __vlan_find_dev_deep_rcu(card->dev, htons(ETH_P_8021Q), vid);
-       if (!netdev)
-               return;
-
-       in6_dev = in6_dev_get(netdev);
-       if (!in6_dev)
-               return;
-
-       addr = qeth_l3_get_addr_buffer(QETH_PROT_IPV6);
-       if (!addr)
-               goto out;
-
-       spin_lock_bh(&card->ip_lock);
-
-       list_for_each_entry(ifa, &in6_dev->addr_list, if_list) {
-               memcpy(&addr->u.a6.addr, &ifa->addr,
-                      sizeof(struct in6_addr));
-               addr->u.a6.pfxlen = ifa->prefix_len;
-               addr->type = QETH_IP_TYPE_NORMAL;
-               qeth_l3_delete_ip(card, addr);
-       }
-
-       spin_unlock_bh(&card->ip_lock);
-
-       kfree(addr);
-out:
-       in6_dev_put(in6_dev);
-}
-
-static void qeth_l3_free_vlan_addresses(struct qeth_card *card,
-                       unsigned short vid)
-{
-       rcu_read_lock();
-       qeth_l3_free_vlan_addresses4(card, vid);
-       qeth_l3_free_vlan_addresses6(card, vid);
-       rcu_read_unlock();
-}
-
 static int qeth_l3_vlan_rx_add_vid(struct net_device *dev,
                                   __be16 proto, u16 vid)
 {
@@ -1398,8 +1289,6 @@ static int qeth_l3_vlan_rx_kill_vid(struct net_device *dev,
                QETH_CARD_TEXT(card, 3, "kidREC");
                return 0;
        }
-       /* unregister IP addresses of vlan device */
-       qeth_l3_free_vlan_addresses(card, vid);
        clear_bit(vid, card->active_vlans);
        qeth_l3_set_rx_mode(dev);
        return 0;
@@ -1454,17 +1343,7 @@ static void qeth_l3_rebuild_skb(struct qeth_card *card, struct sk_buff *skb,
                __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), tag);
        }
 
-       if (card->dev->features & NETIF_F_RXCSUM) {
-               if ((hdr->hdr.l3.ext_flags &
-                   (QETH_HDR_EXT_CSUM_HDR_REQ |
-                    QETH_HDR_EXT_CSUM_TRANSP_REQ)) ==
-                   (QETH_HDR_EXT_CSUM_HDR_REQ |
-                    QETH_HDR_EXT_CSUM_TRANSP_REQ))
-                       skb->ip_summed = CHECKSUM_UNNECESSARY;
-               else
-                       skb->ip_summed = CHECKSUM_NONE;
-       } else
-               skb->ip_summed = CHECKSUM_NONE;
+       qeth_rx_csum(card, skb, hdr->hdr.l3.ext_flags);
 }
 
 static int qeth_l3_process_inbound_buffer(struct qeth_card *card,
@@ -2210,23 +2089,6 @@ static void qeth_l3_fill_header(struct qeth_card *card, struct qeth_hdr *hdr,
        rcu_read_unlock();
 }
 
-static void qeth_l3_hdr_csum(struct qeth_card *card, struct qeth_hdr *hdr,
-                            struct sk_buff *skb)
-{
-       struct iphdr *iph = ip_hdr(skb);
-
-       /* tcph->check contains already the pseudo hdr checksum
-        * so just set the header flags
-        */
-       if (iph->protocol == IPPROTO_UDP)
-               hdr->hdr.l3.ext_flags |= QETH_HDR_EXT_UDP;
-       hdr->hdr.l3.ext_flags |= QETH_HDR_EXT_CSUM_TRANSP_REQ |
-               QETH_HDR_EXT_CSUM_HDR_REQ;
-       iph->check = 0;
-       if (card->options.performance_stats)
-               card->perf_stats.tx_csum++;
-}
-
 static void qeth_tso_fill_header(struct qeth_card *card,
                struct qeth_hdr *qhdr, struct sk_buff *skb)
 {
@@ -2418,8 +2280,11 @@ static netdev_tx_t qeth_l3_hard_start_xmit(struct sk_buff *skb,
                        }
                }
 
-               if (skb->ip_summed == CHECKSUM_PARTIAL)
-                       qeth_l3_hdr_csum(card, hdr, new_skb);
+               if (new_skb->ip_summed == CHECKSUM_PARTIAL) {
+                       qeth_tx_csum(new_skb, &hdr->hdr.l3.ext_flags, ipv);
+                       if (card->options.performance_stats)
+                               card->perf_stats.tx_csum++;
+               }
        }
 
        elements = use_tso ?
@@ -2620,28 +2485,32 @@ static int qeth_l3_setup_netdev(struct qeth_card *card)
                    (card->info.link_type == QETH_LINK_TYPE_HSTR)) {
                        pr_info("qeth_l3: ignoring TR device\n");
                        return -ENODEV;
-               } else {
-                       card->dev = alloc_etherdev(0);
-                       if (!card->dev)
-                               return -ENODEV;
-                       card->dev->netdev_ops = &qeth_l3_osa_netdev_ops;
-
-                       /*IPv6 address autoconfiguration stuff*/
-                       qeth_l3_get_unique_id(card);
-                       if (!(card->info.unique_id & UNIQUE_ID_NOT_BY_CARD))
-                               card->dev->dev_id = card->info.unique_id &
-                                                        0xffff;
-
-                       card->dev->hw_features |= NETIF_F_SG;
-                       card->dev->vlan_features |= NETIF_F_SG;
-
-                       if (!card->info.guestlan) {
-                               card->dev->features |= NETIF_F_SG;
-                               card->dev->hw_features |= NETIF_F_TSO |
-                                       NETIF_F_RXCSUM | NETIF_F_IP_CSUM;
-                               card->dev->vlan_features |= NETIF_F_TSO |
-                                       NETIF_F_RXCSUM | NETIF_F_IP_CSUM;
-                       }
+               }
+
+               card->dev = alloc_etherdev(0);
+               if (!card->dev)
+                       return -ENODEV;
+               card->dev->netdev_ops = &qeth_l3_osa_netdev_ops;
+
+               /*IPv6 address autoconfiguration stuff*/
+               qeth_l3_get_unique_id(card);
+               if (!(card->info.unique_id & UNIQUE_ID_NOT_BY_CARD))
+                       card->dev->dev_id = card->info.unique_id & 0xffff;
+
+               card->dev->hw_features |= NETIF_F_SG;
+               card->dev->vlan_features |= NETIF_F_SG;
+
+               if (!card->info.guestlan) {
+                       card->dev->features |= NETIF_F_SG;
+                       card->dev->hw_features |= NETIF_F_TSO |
+                               NETIF_F_RXCSUM | NETIF_F_IP_CSUM;
+                       card->dev->vlan_features |= NETIF_F_TSO |
+                               NETIF_F_RXCSUM | NETIF_F_IP_CSUM;
+               }
+
+               if (qeth_is_supported6(card, IPA_OUTBOUND_CHECKSUM_V6)) {
+                       card->dev->hw_features |= NETIF_F_IPV6_CSUM;
+                       card->dev->vlan_features |= NETIF_F_IPV6_CSUM;
                }
        } else if (card->info.type == QETH_CARD_TYPE_IQD) {
                card->dev = alloc_netdev(0, "hsi%d", NET_NAME_UNKNOWN,
@@ -2663,6 +2532,7 @@ static int qeth_l3_setup_netdev(struct qeth_card *card)
        card->dev->mtu = card->info.initial_mtu;
        card->dev->min_mtu = 64;
        card->dev->max_mtu = ETH_MAX_MTU;
+       card->dev->dev_port = card->info.portno;
        card->dev->ethtool_ops = &qeth_l3_ethtool_ops;
        card->dev->features |=  NETIF_F_HW_VLAN_CTAG_TX |
                                NETIF_F_HW_VLAN_CTAG_RX |
@@ -2960,9 +2830,6 @@ static int qeth_l3_control_event(struct qeth_card *card,
 
 struct qeth_discipline qeth_l3_discipline = {
        .devtype = &qeth_l3_devtype,
-       .start_poll = qeth_qdio_start_poll,
-       .input_handler = (qdio_handler_t *) qeth_qdio_input_handler,
-       .output_handler = (qdio_handler_t *) qeth_qdio_output_handler,
        .process_rx_buffer = qeth_l3_process_inbound_buffer,
        .recover = qeth_l3_recover,
        .setup = qeth_l3_probe_device,
index c44d7c7ffc920e9cfb35c521c6012cddfccb7b8a..1754f55e2facfb4b21169a7c97ee514392c9eaa8 100644 (file)
@@ -3,7 +3,7 @@
  *
  * This program is free software: you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
+ * the Free Software Foundation, either version 2 of the License, or
  * (at your option) any later version.
  *
  * This program is distributed in the hope that it will be useful,
index 0156c9623c35d55012f8c77b24efd19b0b42d980..d62ddd63f4fe115d849f5aea3e16a649fa0531eb 100644 (file)
@@ -724,6 +724,8 @@ int aac_hba_send(u8 command, struct fib *fibptr, fib_callback callback,
        int wait;
        unsigned long flags = 0;
        unsigned long mflags = 0;
+       struct aac_hba_cmd_req *hbacmd = (struct aac_hba_cmd_req *)
+                       fibptr->hw_fib_va;
 
        fibptr->flags = (FIB_CONTEXT_FLAG | FIB_CONTEXT_FLAG_NATIVE_HBA);
        if (callback) {
@@ -734,11 +736,9 @@ int aac_hba_send(u8 command, struct fib *fibptr, fib_callback callback,
                wait = 1;
 
 
-       if (command == HBA_IU_TYPE_SCSI_CMD_REQ) {
-               struct aac_hba_cmd_req *hbacmd =
-                       (struct aac_hba_cmd_req *)fibptr->hw_fib_va;
+       hbacmd->iu_type = command;
 
-               hbacmd->iu_type = command;
+       if (command == HBA_IU_TYPE_SCSI_CMD_REQ) {
                /* bit1 of request_id must be 0 */
                hbacmd->request_id =
                        cpu_to_le32((((u32)(fibptr - dev->fibs)) << 2) + 1);
index abddde11982bbb11be7d4cf848de6d9e289a9790..98597b59c12ab6eb944ffa21107f10178c03e024 100644 (file)
@@ -296,7 +296,7 @@ int fnic_get_stats_data(struct stats_debug_info *debug,
                  "Number of Abort FW Timeouts: %lld\n"
                  "Number of Abort IO NOT Found: %lld\n"
 
-                 "Abord issued times: \n"
+                 "Abort issued times: \n"
                  "            < 6 sec : %lld\n"
                  "     6 sec - 20 sec : %lld\n"
                  "    20 sec - 30 sec : %lld\n"
index edb7be786c6501602195b25285dda78ceb9ea537..9e8de1462593aa7371104354e0ac6b9f540ae57a 100644 (file)
@@ -291,7 +291,7 @@ sci_mpc_agent_validate_phy_configuration(struct isci_host *ihost,
                 * Note: We have not moved the current phy_index so we will actually
                 *       compare the startting phy with itself.
                 *       This is expected and required to add the phy to the port. */
-               while (phy_index < SCI_MAX_PHYS) {
+               for (; phy_index < SCI_MAX_PHYS; phy_index++) {
                        if ((phy_mask & (1 << phy_index)) == 0)
                                continue;
                        sci_phy_get_sas_address(&ihost->phys[phy_index],
@@ -311,7 +311,6 @@ sci_mpc_agent_validate_phy_configuration(struct isci_host *ihost,
                                              &ihost->phys[phy_index]);
 
                        assigned_phy_mask |= (1 << phy_index);
-                       phy_index++;
                }
 
        }
index ce97cde3b41cd174e5c29ef8145b8938b9dc9609..f4d988dd1e9d1a8389f4486a9b08bdfe9eb31a84 100644 (file)
@@ -1124,12 +1124,12 @@ megasas_ioc_init_fusion(struct megasas_instance *instance)
                goto fail_fw_init;
        }
 
-       ret = 0;
+       return 0;
 
 fail_fw_init:
        dev_err(&instance->pdev->dev,
-               "Init cmd return status %s for SCSI host %d\n",
-               ret ? "FAILED" : "SUCCESS", instance->host->host_no);
+               "Init cmd return status FAILED for SCSI host %d\n",
+               instance->host->host_no);
 
        return ret;
 }
index 773558fc0697513a523eeae10b6598eb37ea22a9..16d1a21cdff9af2f9d78809fa141366fad06aa41 100644 (file)
@@ -23,6 +23,7 @@ void qedf_fcoe_send_vlan_req(struct qedf_ctx *qedf)
        struct fip_vlan *vlan;
 #define MY_FIP_ALL_FCF_MACS        ((__u8[6]) { 1, 0x10, 0x18, 1, 0, 2 })
        static u8 my_fcoe_all_fcfs[ETH_ALEN] = MY_FIP_ALL_FCF_MACS;
+       unsigned long flags = 0;
 
        skb = dev_alloc_skb(sizeof(struct fip_vlan));
        if (!skb)
@@ -65,7 +66,9 @@ void qedf_fcoe_send_vlan_req(struct qedf_ctx *qedf)
                kfree_skb(skb);
                return;
        }
-       qed_ops->ll2->start_xmit(qedf->cdev, skb);
+
+       set_bit(QED_LL2_XMIT_FLAGS_FIP_DISCOVERY, &flags);
+       qed_ops->ll2->start_xmit(qedf->cdev, skb, flags);
 }
 
 static void qedf_fcoe_process_vlan_resp(struct qedf_ctx *qedf,
@@ -139,7 +142,7 @@ void qedf_fip_send(struct fcoe_ctlr *fip, struct sk_buff *skb)
                print_hex_dump(KERN_WARNING, "fip ", DUMP_PREFIX_OFFSET, 16, 1,
                    skb->data, skb->len, false);
 
-       qed_ops->ll2->start_xmit(qedf->cdev, skb);
+       qed_ops->ll2->start_xmit(qedf->cdev, skb, 0);
 }
 
 /* Process incoming FIP frames. */
index 284ccb566b196c3adabfaf8144892fbea4a0f50f..6c19015975a87c7d05c41e17089e49bc05e92b25 100644 (file)
@@ -994,7 +994,7 @@ static int qedf_xmit(struct fc_lport *lport, struct fc_frame *fp)
        if (qedf_dump_frames)
                print_hex_dump(KERN_WARNING, "fcoe: ", DUMP_PREFIX_OFFSET, 16,
                    1, skb->data, skb->len, false);
-       qed_ops->ll2->start_xmit(qedf->cdev, skb);
+       qed_ops->ll2->start_xmit(qedf->cdev, skb, 0);
 
        return 0;
 }
index 7ec7f6e00fb8d709b6f5e4c925b707f375b9823d..ff21aa1fd939a052ad80a947df1393b7f28177cc 100644 (file)
@@ -1150,7 +1150,7 @@ static int qedi_data_avail(struct qedi_ctx *qedi, u16 vlanid)
        if (vlanid)
                __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlanid);
 
-       rc = qedi_ops->ll2->start_xmit(cdev, skb);
+       rc = qedi_ops->ll2->start_xmit(cdev, skb, 0);
        if (rc) {
                QEDI_ERR(&qedi->dbg_ctx, "ll2 start_xmit returned %d\n",
                         rc);
index 9ef5e3b810f6e0f9682c3577e61005af9658e999..656c98e116a902da2c230c2026d3b2632548e2f6 100644 (file)
@@ -234,11 +234,13 @@ static const char *sdebug_version_date = "20180128";
 #define F_INV_OP               0x200
 #define F_FAKE_RW              0x400
 #define F_M_ACCESS             0x800   /* media access */
-#define F_LONG_DELAY           0x1000
+#define F_SSU_DELAY            0x1000
+#define F_SYNC_DELAY           0x2000
 
 #define FF_RESPOND (F_RL_WLUN_OK | F_SKIP_UA | F_DELAY_OVERR)
 #define FF_MEDIA_IO (F_M_ACCESS | F_FAKE_RW)
 #define FF_SA (F_SA_HIGH | F_SA_LOW)
+#define F_LONG_DELAY           (F_SSU_DELAY | F_SYNC_DELAY)
 
 #define SDEBUG_MAX_PARTS 4
 
@@ -510,7 +512,7 @@ static const struct opcode_info_t release_iarr[] = {
 };
 
 static const struct opcode_info_t sync_cache_iarr[] = {
-       {0, 0x91, 0, F_LONG_DELAY | F_M_ACCESS, resp_sync_cache, NULL,
+       {0, 0x91, 0, F_SYNC_DELAY | F_M_ACCESS, resp_sync_cache, NULL,
            {16,  0x6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
             0xff, 0xff, 0xff, 0xff, 0x3f, 0xc7} },     /* SYNC_CACHE (16) */
 };
@@ -553,7 +555,7 @@ static const struct opcode_info_t opcode_info_arr[SDEB_I_LAST_ELEMENT + 1] = {
            resp_write_dt0, write_iarr,                 /* WRITE(16) */
                {16,  0xfa, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
                 0xff, 0xff, 0xff, 0xff, 0xff, 0xc7} },
-       {0, 0x1b, 0, F_LONG_DELAY, resp_start_stop, NULL,/* START STOP UNIT */
+       {0, 0x1b, 0, F_SSU_DELAY, resp_start_stop, NULL,/* START STOP UNIT */
            {6,  0x1, 0, 0xf, 0xf7, 0xc7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} },
        {ARRAY_SIZE(sa_in_16_iarr), 0x9e, 0x10, F_SA_LOW | F_D_IN,
            resp_readcap16, sa_in_16_iarr, /* SA_IN(16), READ CAPACITY(16) */
@@ -606,7 +608,7 @@ static const struct opcode_info_t opcode_info_arr[SDEB_I_LAST_ELEMENT + 1] = {
            resp_write_same_10, write_same_iarr,        /* WRITE SAME(10) */
                {10,  0xff, 0xff, 0xff, 0xff, 0xff, 0x3f, 0xff, 0xff, 0xc7, 0,
                 0, 0, 0, 0, 0} },
-       {ARRAY_SIZE(sync_cache_iarr), 0x35, 0, F_LONG_DELAY | F_M_ACCESS,
+       {ARRAY_SIZE(sync_cache_iarr), 0x35, 0, F_SYNC_DELAY | F_M_ACCESS,
            resp_sync_cache, sync_cache_iarr,
            {10,  0x7, 0xff, 0xff, 0xff, 0xff, 0x3f, 0xff, 0xff, 0xc7, 0, 0,
             0, 0, 0, 0} },                     /* SYNC_CACHE (10) */
@@ -667,6 +669,7 @@ static bool sdebug_strict = DEF_STRICT;
 static bool sdebug_any_injecting_opt;
 static bool sdebug_verbose;
 static bool have_dif_prot;
+static bool write_since_sync;
 static bool sdebug_statistics = DEF_STATISTICS;
 
 static unsigned int sdebug_store_sectors;
@@ -1607,6 +1610,7 @@ static int resp_start_stop(struct scsi_cmnd *scp,
 {
        unsigned char *cmd = scp->cmnd;
        int power_cond, stop;
+       bool changing;
 
        power_cond = (cmd[4] & 0xf0) >> 4;
        if (power_cond) {
@@ -1614,8 +1618,12 @@ static int resp_start_stop(struct scsi_cmnd *scp,
                return check_condition_result;
        }
        stop = !(cmd[4] & 1);
+       changing = atomic_read(&devip->stopped) == !stop;
        atomic_xchg(&devip->stopped, stop);
-       return (cmd[1] & 0x1) ? SDEG_RES_IMMED_MASK : 0; /* check IMMED bit */
+       if (!changing || cmd[1] & 0x1)  /* state unchanged or IMMED set */
+               return SDEG_RES_IMMED_MASK;
+       else
+               return 0;
 }
 
 static sector_t get_sdebug_capacity(void)
@@ -2473,6 +2481,7 @@ static int do_device_access(struct scsi_cmnd *scmd, u32 sg_skip, u64 lba,
        if (do_write) {
                sdb = scsi_out(scmd);
                dir = DMA_TO_DEVICE;
+               write_since_sync = true;
        } else {
                sdb = scsi_in(scmd);
                dir = DMA_FROM_DEVICE;
@@ -3583,6 +3592,7 @@ static int resp_get_lba_status(struct scsi_cmnd *scp,
 static int resp_sync_cache(struct scsi_cmnd *scp,
                           struct sdebug_dev_info *devip)
 {
+       int res = 0;
        u64 lba;
        u32 num_blocks;
        u8 *cmd = scp->cmnd;
@@ -3598,7 +3608,11 @@ static int resp_sync_cache(struct scsi_cmnd *scp,
                mk_sense_buffer(scp, ILLEGAL_REQUEST, LBA_OUT_OF_RANGE, 0);
                return check_condition_result;
        }
-       return (cmd[1] & 0x2) ? SDEG_RES_IMMED_MASK : 0; /* check IMMED bit */
+       if (!write_since_sync || cmd[1] & 0x2)
+               res = SDEG_RES_IMMED_MASK;
+       else            /* delay if write_since_sync and IMMED clear */
+               write_since_sync = false;
+       return res;
 }
 
 #define RL_BUCKET_ELEMS 8
@@ -5777,13 +5791,14 @@ static int scsi_debug_queuecommand(struct Scsi_Host *shost,
                return schedule_resp(scp, devip, errsts, pfp, 0, 0);
        else if ((sdebug_jdelay || sdebug_ndelay) && (flags & F_LONG_DELAY)) {
                /*
-                * If any delay is active, want F_LONG_DELAY to be at least 1
+                * If any delay is active, for F_SSU_DELAY want at least 1
                 * second and if sdebug_jdelay>0 want a long delay of that
-                * many seconds.
+                * many seconds; for F_SYNC_DELAY want 1/20 of that.
                 */
                int jdelay = (sdebug_jdelay < 2) ? 1 : sdebug_jdelay;
+               int denom = (flags & F_SYNC_DELAY) ? 20 : 1;
 
-               jdelay = mult_frac(USER_HZ * jdelay, HZ, USER_HZ);
+               jdelay = mult_frac(USER_HZ * jdelay, HZ, denom * USER_HZ);
                return schedule_resp(scp, devip, errsts, pfp, jdelay, 0);
        } else
                return schedule_resp(scp, devip, errsts, pfp, sdebug_jdelay,
index f4b52b44b966c98edc18549f37c1fe603737aaa1..65f6c94f2e9b3fa0be7104fa53048a0c61579701 100644 (file)
@@ -2322,6 +2322,12 @@ iscsi_multicast_skb(struct sk_buff *skb, uint32_t group, gfp_t gfp)
        return nlmsg_multicast(nls, skb, 0, group, gfp);
 }
 
+static int
+iscsi_unicast_skb(struct sk_buff *skb, u32 portid)
+{
+       return nlmsg_unicast(nls, skb, portid);
+}
+
 int iscsi_recv_pdu(struct iscsi_cls_conn *conn, struct iscsi_hdr *hdr,
                   char *data, uint32_t data_size)
 {
@@ -2524,14 +2530,11 @@ void iscsi_ping_comp_event(uint32_t host_no, struct iscsi_transport *transport,
 EXPORT_SYMBOL_GPL(iscsi_ping_comp_event);
 
 static int
-iscsi_if_send_reply(uint32_t group, int seq, int type, int done, int multi,
-                   void *payload, int size)
+iscsi_if_send_reply(u32 portid, int type, void *payload, int size)
 {
        struct sk_buff  *skb;
        struct nlmsghdr *nlh;
        int len = nlmsg_total_size(size);
-       int flags = multi ? NLM_F_MULTI : 0;
-       int t = done ? NLMSG_DONE : type;
 
        skb = alloc_skb(len, GFP_ATOMIC);
        if (!skb) {
@@ -2539,10 +2542,9 @@ iscsi_if_send_reply(uint32_t group, int seq, int type, int done, int multi,
                return -ENOMEM;
        }
 
-       nlh = __nlmsg_put(skb, 0, 0, t, (len - sizeof(*nlh)), 0);
-       nlh->nlmsg_flags = flags;
+       nlh = __nlmsg_put(skb, 0, 0, type, (len - sizeof(*nlh)), 0);
        memcpy(nlmsg_data(nlh), payload, size);
-       return iscsi_multicast_skb(skb, group, GFP_ATOMIC);
+       return iscsi_unicast_skb(skb, portid);
 }
 
 static int
@@ -3470,6 +3472,7 @@ static int
 iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group)
 {
        int err = 0;
+       u32 portid;
        struct iscsi_uevent *ev = nlmsg_data(nlh);
        struct iscsi_transport *transport = NULL;
        struct iscsi_internal *priv;
@@ -3490,10 +3493,12 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group)
        if (!try_module_get(transport->owner))
                return -EINVAL;
 
+       portid = NETLINK_CB(skb).portid;
+
        switch (nlh->nlmsg_type) {
        case ISCSI_UEVENT_CREATE_SESSION:
                err = iscsi_if_create_session(priv, ep, ev,
-                                             NETLINK_CB(skb).portid,
+                                             portid,
                                              ev->u.c_session.initial_cmdsn,
                                              ev->u.c_session.cmds_max,
                                              ev->u.c_session.queue_depth);
@@ -3506,7 +3511,7 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group)
                }
 
                err = iscsi_if_create_session(priv, ep, ev,
-                                       NETLINK_CB(skb).portid,
+                                       portid,
                                        ev->u.c_bound_session.initial_cmdsn,
                                        ev->u.c_bound_session.cmds_max,
                                        ev->u.c_bound_session.queue_depth);
@@ -3664,6 +3669,8 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group)
 static void
 iscsi_if_rx(struct sk_buff *skb)
 {
+       u32 portid = NETLINK_CB(skb).portid;
+
        mutex_lock(&rx_queue_mutex);
        while (skb->len >= NLMSG_HDRLEN) {
                int err;
@@ -3699,8 +3706,8 @@ iscsi_if_rx(struct sk_buff *skb)
                                break;
                        if (ev->type == ISCSI_UEVENT_GET_CHAP && !err)
                                break;
-                       err = iscsi_if_send_reply(group, nlh->nlmsg_seq,
-                               nlh->nlmsg_type, 0, 0, ev, sizeof(*ev));
+                       err = iscsi_if_send_reply(portid, nlh->nlmsg_type,
+                                                 ev, sizeof(*ev));
                } while (err < 0 && err != -ECONNREFUSED && err != -ESRCH);
                skb_pull(skb, rlen);
        }
index a6201e696ab9c5a20e0e75e1ee9b3a7b87c14461..9421d987773051e72af159ce1b4cc1f5f961e2a6 100644 (file)
@@ -2121,6 +2121,8 @@ sd_spinup_disk(struct scsi_disk *sdkp)
                                break;  /* standby */
                        if (sshdr.asc == 4 && sshdr.ascq == 0xc)
                                break;  /* unavailable */
+                       if (sshdr.asc == 4 && sshdr.ascq == 0x1b)
+                               break;  /* sanitize in progress */
                        /*
                         * Issue command to spin up drive when not ready
                         */
index 41df75eea57be7362e1be8f737d1bae9bac91977..210407cd2341bf9bb89f5831939d4c0a204af485 100644 (file)
@@ -400,8 +400,10 @@ static int sd_zbc_check_capacity(struct scsi_disk *sdkp, unsigned char *buf)
  *
  * Check that all zones of the device are equal. The last zone can however
  * be smaller. The zone size must also be a power of two number of LBAs.
+ *
+ * Returns the zone size in bytes upon success or an error code upon failure.
  */
-static int sd_zbc_check_zone_size(struct scsi_disk *sdkp)
+static s64 sd_zbc_check_zone_size(struct scsi_disk *sdkp)
 {
        u64 zone_blocks = 0;
        sector_t block = 0;
@@ -412,8 +414,6 @@ static int sd_zbc_check_zone_size(struct scsi_disk *sdkp)
        int ret;
        u8 same;
 
-       sdkp->zone_blocks = 0;
-
        /* Get a buffer */
        buf = kmalloc(SD_ZBC_BUF_SIZE, GFP_KERNEL);
        if (!buf)
@@ -445,16 +445,17 @@ static int sd_zbc_check_zone_size(struct scsi_disk *sdkp)
 
                /* Parse zone descriptors */
                while (rec < buf + buf_len) {
-                       zone_blocks = get_unaligned_be64(&rec[8]);
-                       if (sdkp->zone_blocks == 0) {
-                               sdkp->zone_blocks = zone_blocks;
-                       } else if (zone_blocks != sdkp->zone_blocks &&
-                                  (block + zone_blocks < sdkp->capacity
-                                   || zone_blocks > sdkp->zone_blocks)) {
-                               zone_blocks = 0;
+                       u64 this_zone_blocks = get_unaligned_be64(&rec[8]);
+
+                       if (zone_blocks == 0) {
+                               zone_blocks = this_zone_blocks;
+                       } else if (this_zone_blocks != zone_blocks &&
+                                  (block + this_zone_blocks < sdkp->capacity
+                                   || this_zone_blocks > zone_blocks)) {
+                               this_zone_blocks = 0;
                                goto out;
                        }
-                       block += zone_blocks;
+                       block += this_zone_blocks;
                        rec += 64;
                }
 
@@ -467,8 +468,6 @@ static int sd_zbc_check_zone_size(struct scsi_disk *sdkp)
 
        } while (block < sdkp->capacity);
 
-       zone_blocks = sdkp->zone_blocks;
-
 out:
        if (!zone_blocks) {
                if (sdkp->first_scan)
@@ -488,8 +487,7 @@ static int sd_zbc_check_zone_size(struct scsi_disk *sdkp)
                                  "Zone size too large\n");
                ret = -ENODEV;
        } else {
-               sdkp->zone_blocks = zone_blocks;
-               sdkp->zone_shift = ilog2(zone_blocks);
+               ret = zone_blocks;
        }
 
 out_free:
@@ -500,15 +498,14 @@ static int sd_zbc_check_zone_size(struct scsi_disk *sdkp)
 
 /**
  * sd_zbc_alloc_zone_bitmap - Allocate a zone bitmap (one bit per zone).
- * @sdkp: The disk of the bitmap
+ * @nr_zones: Number of zones to allocate space for.
+ * @numa_node: NUMA node to allocate the memory from.
  */
-static inline unsigned long *sd_zbc_alloc_zone_bitmap(struct scsi_disk *sdkp)
+static inline unsigned long *
+sd_zbc_alloc_zone_bitmap(u32 nr_zones, int numa_node)
 {
-       struct request_queue *q = sdkp->disk->queue;
-
-       return kzalloc_node(BITS_TO_LONGS(sdkp->nr_zones)
-                           * sizeof(unsigned long),
-                           GFP_KERNEL, q->node);
+       return kzalloc_node(BITS_TO_LONGS(nr_zones) * sizeof(unsigned long),
+                           GFP_KERNEL, numa_node);
 }
 
 /**
@@ -516,6 +513,7 @@ static inline unsigned long *sd_zbc_alloc_zone_bitmap(struct scsi_disk *sdkp)
  * @sdkp: disk used
  * @buf: report reply buffer
  * @buflen: length of @buf
+ * @zone_shift: logarithm base 2 of the number of blocks in a zone
  * @seq_zones_bitmap: bitmap of sequential zones to set
  *
  * Parse reported zone descriptors in @buf to identify sequential zones and
@@ -525,7 +523,7 @@ static inline unsigned long *sd_zbc_alloc_zone_bitmap(struct scsi_disk *sdkp)
  * Return the LBA after the last zone reported.
  */
 static sector_t sd_zbc_get_seq_zones(struct scsi_disk *sdkp, unsigned char *buf,
-                                    unsigned int buflen,
+                                    unsigned int buflen, u32 zone_shift,
                                     unsigned long *seq_zones_bitmap)
 {
        sector_t lba, next_lba = sdkp->capacity;
@@ -544,7 +542,7 @@ static sector_t sd_zbc_get_seq_zones(struct scsi_disk *sdkp, unsigned char *buf,
                if (type != ZBC_ZONE_TYPE_CONV &&
                    cond != ZBC_ZONE_COND_READONLY &&
                    cond != ZBC_ZONE_COND_OFFLINE)
-                       set_bit(lba >> sdkp->zone_shift, seq_zones_bitmap);
+                       set_bit(lba >> zone_shift, seq_zones_bitmap);
                next_lba = lba + get_unaligned_be64(&rec[8]);
                rec += 64;
        }
@@ -553,12 +551,16 @@ static sector_t sd_zbc_get_seq_zones(struct scsi_disk *sdkp, unsigned char *buf,
 }
 
 /**
- * sd_zbc_setup_seq_zones_bitmap - Initialize the disk seq zone bitmap.
+ * sd_zbc_setup_seq_zones_bitmap - Initialize a seq zone bitmap.
  * @sdkp: target disk
+ * @zone_shift: logarithm base 2 of the number of blocks in a zone
+ * @nr_zones: number of zones to set up a seq zone bitmap for
  *
  * Allocate a zone bitmap and initialize it by identifying sequential zones.
  */
-static int sd_zbc_setup_seq_zones_bitmap(struct scsi_disk *sdkp)
+static unsigned long *
+sd_zbc_setup_seq_zones_bitmap(struct scsi_disk *sdkp, u32 zone_shift,
+                             u32 nr_zones)
 {
        struct request_queue *q = sdkp->disk->queue;
        unsigned long *seq_zones_bitmap;
@@ -566,9 +568,9 @@ static int sd_zbc_setup_seq_zones_bitmap(struct scsi_disk *sdkp)
        unsigned char *buf;
        int ret = -ENOMEM;
 
-       seq_zones_bitmap = sd_zbc_alloc_zone_bitmap(sdkp);
+       seq_zones_bitmap = sd_zbc_alloc_zone_bitmap(nr_zones, q->node);
        if (!seq_zones_bitmap)
-               return -ENOMEM;
+               return ERR_PTR(-ENOMEM);
 
        buf = kmalloc(SD_ZBC_BUF_SIZE, GFP_KERNEL);
        if (!buf)
@@ -579,7 +581,7 @@ static int sd_zbc_setup_seq_zones_bitmap(struct scsi_disk *sdkp)
                if (ret)
                        goto out;
                lba = sd_zbc_get_seq_zones(sdkp, buf, SD_ZBC_BUF_SIZE,
-                                          seq_zones_bitmap);
+                                          zone_shift, seq_zones_bitmap);
        }
 
        if (lba != sdkp->capacity) {
@@ -591,12 +593,9 @@ static int sd_zbc_setup_seq_zones_bitmap(struct scsi_disk *sdkp)
        kfree(buf);
        if (ret) {
                kfree(seq_zones_bitmap);
-               return ret;
+               return ERR_PTR(ret);
        }
-
-       q->seq_zones_bitmap = seq_zones_bitmap;
-
-       return 0;
+       return seq_zones_bitmap;
 }
 
 static void sd_zbc_cleanup(struct scsi_disk *sdkp)
@@ -612,44 +611,64 @@ static void sd_zbc_cleanup(struct scsi_disk *sdkp)
        q->nr_zones = 0;
 }
 
-static int sd_zbc_setup(struct scsi_disk *sdkp)
+static int sd_zbc_setup(struct scsi_disk *sdkp, u32 zone_blocks)
 {
        struct request_queue *q = sdkp->disk->queue;
+       u32 zone_shift = ilog2(zone_blocks);
+       u32 nr_zones;
        int ret;
 
-       /* READ16/WRITE16 is mandatory for ZBC disks */
-       sdkp->device->use_16_for_rw = 1;
-       sdkp->device->use_10_for_rw = 0;
-
        /* chunk_sectors indicates the zone size */
-       blk_queue_chunk_sectors(sdkp->disk->queue,
-                       logical_to_sectors(sdkp->device, sdkp->zone_blocks));
-       sdkp->nr_zones =
-               round_up(sdkp->capacity, sdkp->zone_blocks) >> sdkp->zone_shift;
+       blk_queue_chunk_sectors(q,
+                       logical_to_sectors(sdkp->device, zone_blocks));
+       nr_zones = round_up(sdkp->capacity, zone_blocks) >> zone_shift;
 
        /*
         * Initialize the device request queue information if the number
         * of zones changed.
         */
-       if (sdkp->nr_zones != q->nr_zones) {
-
-               sd_zbc_cleanup(sdkp);
-
-               q->nr_zones = sdkp->nr_zones;
-               if (sdkp->nr_zones) {
-                       q->seq_zones_wlock = sd_zbc_alloc_zone_bitmap(sdkp);
-                       if (!q->seq_zones_wlock) {
+       if (nr_zones != sdkp->nr_zones || nr_zones != q->nr_zones) {
+               unsigned long *seq_zones_wlock = NULL, *seq_zones_bitmap = NULL;
+               size_t zone_bitmap_size;
+
+               if (nr_zones) {
+                       seq_zones_wlock = sd_zbc_alloc_zone_bitmap(nr_zones,
+                                                                  q->node);
+                       if (!seq_zones_wlock) {
                                ret = -ENOMEM;
                                goto err;
                        }
 
-                       ret = sd_zbc_setup_seq_zones_bitmap(sdkp);
-                       if (ret) {
-                               sd_zbc_cleanup(sdkp);
+                       seq_zones_bitmap = sd_zbc_setup_seq_zones_bitmap(sdkp,
+                                                       zone_shift, nr_zones);
+                       if (IS_ERR(seq_zones_bitmap)) {
+                               ret = PTR_ERR(seq_zones_bitmap);
+                               kfree(seq_zones_wlock);
                                goto err;
                        }
                }
-
+               zone_bitmap_size = BITS_TO_LONGS(nr_zones) *
+                       sizeof(unsigned long);
+               blk_mq_freeze_queue(q);
+               if (q->nr_zones != nr_zones) {
+                       /* READ16/WRITE16 is mandatory for ZBC disks */
+                       sdkp->device->use_16_for_rw = 1;
+                       sdkp->device->use_10_for_rw = 0;
+
+                       sdkp->zone_blocks = zone_blocks;
+                       sdkp->zone_shift = zone_shift;
+                       sdkp->nr_zones = nr_zones;
+                       q->nr_zones = nr_zones;
+                       swap(q->seq_zones_wlock, seq_zones_wlock);
+                       swap(q->seq_zones_bitmap, seq_zones_bitmap);
+               } else if (memcmp(q->seq_zones_bitmap, seq_zones_bitmap,
+                                 zone_bitmap_size) != 0) {
+                       memcpy(q->seq_zones_bitmap, seq_zones_bitmap,
+                              zone_bitmap_size);
+               }
+               blk_mq_unfreeze_queue(q);
+               kfree(seq_zones_wlock);
+               kfree(seq_zones_bitmap);
        }
 
        return 0;
@@ -661,6 +680,7 @@ static int sd_zbc_setup(struct scsi_disk *sdkp)
 
 int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf)
 {
+       int64_t zone_blocks;
        int ret;
 
        if (!sd_is_zoned(sdkp))
@@ -697,12 +717,16 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf)
         * Check zone size: only devices with a constant zone size (except
         * an eventual last runt zone) that is a power of 2 are supported.
         */
-       ret = sd_zbc_check_zone_size(sdkp);
-       if (ret)
+       zone_blocks = sd_zbc_check_zone_size(sdkp);
+       ret = -EFBIG;
+       if (zone_blocks != (u32)zone_blocks)
+               goto err;
+       ret = zone_blocks;
+       if (ret < 0)
                goto err;
 
        /* The drive satisfies the kernel restrictions: set it up */
-       ret = sd_zbc_setup(sdkp);
+       ret = sd_zbc_setup(sdkp, zone_blocks);
        if (ret)
                goto err;
 
index 8c51d628b52edfd7e891182919fab16b469b0f3c..a2ec0bc9e9fac01f0f30237540b2d5ea1d4a3f2f 100644 (file)
@@ -1722,11 +1722,14 @@ static int storvsc_probe(struct hv_device *device,
                max_targets = STORVSC_MAX_TARGETS;
                max_channels = STORVSC_MAX_CHANNELS;
                /*
-                * On Windows8 and above, we support sub-channels for storage.
+                * On Windows8 and above, we support sub-channels for storage
+                * on SCSI and FC controllers.
                 * The number of sub-channels offerred is based on the number of
                 * VCPUs in the guest.
                 */
-               max_sub_channels = (num_cpus / storvsc_vcpus_per_sub_channel);
+               if (!dev_is_ide)
+                       max_sub_channels =
+                               (num_cpus - 1) / storvsc_vcpus_per_sub_channel;
        }
 
        scsi_driver.can_queue = (max_outstanding_req_per_channel *
index c5b1bf1cadcb043390b3e8c6657f38692bea0774..00e79057f870dfa67303dc3ed7bff96ba874f329 100644 (file)
@@ -276,6 +276,35 @@ static inline void ufshcd_remove_non_printable(char *val)
                *val = ' ';
 }
 
+static void ufshcd_add_cmd_upiu_trace(struct ufs_hba *hba, unsigned int tag,
+               const char *str)
+{
+       struct utp_upiu_req *rq = hba->lrb[tag].ucd_req_ptr;
+
+       trace_ufshcd_upiu(dev_name(hba->dev), str, &rq->header, &rq->sc.cdb);
+}
+
+static void ufshcd_add_query_upiu_trace(struct ufs_hba *hba, unsigned int tag,
+               const char *str)
+{
+       struct utp_upiu_req *rq = hba->lrb[tag].ucd_req_ptr;
+
+       trace_ufshcd_upiu(dev_name(hba->dev), str, &rq->header, &rq->qr);
+}
+
+static void ufshcd_add_tm_upiu_trace(struct ufs_hba *hba, unsigned int tag,
+               const char *str)
+{
+       struct utp_task_req_desc *descp;
+       struct utp_upiu_task_req *task_req;
+       int off = (int)tag - hba->nutrs;
+
+       descp = &hba->utmrdl_base_addr[off];
+       task_req = (struct utp_upiu_task_req *)descp->task_req_upiu;
+       trace_ufshcd_upiu(dev_name(hba->dev), str, &task_req->header,
+                       &task_req->input_param1);
+}
+
 static void ufshcd_add_command_trace(struct ufs_hba *hba,
                unsigned int tag, const char *str)
 {
@@ -285,6 +314,9 @@ static void ufshcd_add_command_trace(struct ufs_hba *hba,
        struct ufshcd_lrb *lrbp;
        int transfer_len = -1;
 
+       /* trace UPIU also */
+       ufshcd_add_cmd_upiu_trace(hba, tag, str);
+
        if (!trace_ufshcd_command_enabled())
                return;
 
@@ -2550,6 +2582,7 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba *hba,
 
        hba->dev_cmd.complete = &wait;
 
+       ufshcd_add_query_upiu_trace(hba, tag, "query_send");
        /* Make sure descriptors are ready before ringing the doorbell */
        wmb();
        spin_lock_irqsave(hba->host->host_lock, flags);
@@ -2559,6 +2592,9 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba *hba,
 
        err = ufshcd_wait_for_dev_cmd(hba, lrbp, timeout);
 
+       ufshcd_add_query_upiu_trace(hba, tag,
+                       err ? "query_complete_err" : "query_complete");
+
 out_put_tag:
        ufshcd_put_dev_cmd_tag(hba, tag);
        wake_up(&hba->dev_cmd.tag_wq);
@@ -5443,11 +5479,14 @@ static int ufshcd_issue_tm_cmd(struct ufs_hba *hba, int lun_id, int task_id,
 
        spin_unlock_irqrestore(host->host_lock, flags);
 
+       ufshcd_add_tm_upiu_trace(hba, task_tag, "tm_send");
+
        /* wait until the task management command is completed */
        err = wait_event_timeout(hba->tm_wq,
                        test_bit(free_slot, &hba->tm_condition),
                        msecs_to_jiffies(TM_CMD_TIMEOUT));
        if (!err) {
+               ufshcd_add_tm_upiu_trace(hba, task_tag, "tm_complete_err");
                dev_err(hba->dev, "%s: task management cmd 0x%.2x timed-out\n",
                                __func__, tm_function);
                if (ufshcd_clear_tm_cmd(hba, free_slot))
@@ -5456,6 +5495,7 @@ static int ufshcd_issue_tm_cmd(struct ufs_hba *hba, int lun_id, int task_id,
                err = -ETIMEDOUT;
        } else {
                err = ufshcd_task_req_compl(hba, free_slot, tm_response);
+               ufshcd_add_tm_upiu_trace(hba, task_tag, "tm_complete");
        }
 
        clear_bit(free_slot, &hba->tm_condition);
index c374e3b5c678d215bfa9e7ed33e2d033e5d4bfb3..777e5f1e52d10968d5f23e0e316db05b8209511d 100644 (file)
@@ -609,7 +609,7 @@ static void pvscsi_complete_request(struct pvscsi_adapter *adapter,
                        break;
 
                case BTSTAT_ABORTQUEUE:
-                       cmd->result = (DID_ABORT << 16);
+                       cmd->result = (DID_BUS_BUSY << 16);
                        break;
 
                case BTSTAT_SCSIPARITY:
index 884419c37e8419c8e2a955e4406f75d68e1e1298..457ea1f8db309cb90ea2eef0feefe64be56e3f70 100644 (file)
@@ -183,7 +183,7 @@ static u16 slim_slicesize(int code)
                0, 1, 2, 3, 3, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7
        };
 
-       clamp(code, 1, (int)ARRAY_SIZE(sizetocode));
+       code = clamp(code, 1, (int)ARRAY_SIZE(sizetocode));
 
        return sizetocode[code - 1];
 }
index fe96a8b956fbd54424bea059af5b0be1240c91b3..f7ed1187518b9d2b47bedd29ae6b5a1e3ab566fd 100644 (file)
@@ -45,7 +45,7 @@ struct rpi_power_domains {
 struct rpi_power_domain_packet {
        u32 domain;
        u32 on;
-} __packet;
+};
 
 /*
  * Asks the firmware to enable or disable power on a specific power
index 1596d35498c5a5567bc844c11c2fd1463867dde5..6573152ce8936e728cfb86a0210ce33eeb4e81d8 100644 (file)
@@ -490,7 +490,7 @@ static int bcm_qspi_bspi_set_mode(struct bcm_qspi *qspi,
 
 static void bcm_qspi_enable_bspi(struct bcm_qspi *qspi)
 {
-       if (!has_bspi(qspi) || (qspi->bspi_enabled))
+       if (!has_bspi(qspi))
                return;
 
        qspi->bspi_enabled = 1;
@@ -505,7 +505,7 @@ static void bcm_qspi_enable_bspi(struct bcm_qspi *qspi)
 
 static void bcm_qspi_disable_bspi(struct bcm_qspi *qspi)
 {
-       if (!has_bspi(qspi) || (!qspi->bspi_enabled))
+       if (!has_bspi(qspi))
                return;
 
        qspi->bspi_enabled = 0;
@@ -519,16 +519,19 @@ static void bcm_qspi_disable_bspi(struct bcm_qspi *qspi)
 
 static void bcm_qspi_chip_select(struct bcm_qspi *qspi, int cs)
 {
-       u32 data = 0;
+       u32 rd = 0;
+       u32 wr = 0;
 
-       if (qspi->curr_cs == cs)
-               return;
        if (qspi->base[CHIP_SELECT]) {
-               data = bcm_qspi_read(qspi, CHIP_SELECT, 0);
-               data = (data & ~0xff) | (1 << cs);
-               bcm_qspi_write(qspi, CHIP_SELECT, 0, data);
+               rd = bcm_qspi_read(qspi, CHIP_SELECT, 0);
+               wr = (rd & ~0xff) | (1 << cs);
+               if (rd == wr)
+                       return;
+               bcm_qspi_write(qspi, CHIP_SELECT, 0, wr);
                usleep_range(10, 20);
        }
+
+       dev_dbg(&qspi->pdev->dev, "using cs:%d\n", cs);
        qspi->curr_cs = cs;
 }
 
@@ -755,8 +758,13 @@ static int write_to_hw(struct bcm_qspi *qspi, struct spi_device *spi)
                        dev_dbg(&qspi->pdev->dev, "WR %04x\n", val);
                }
                mspi_cdram = MSPI_CDRAM_CONT_BIT;
-               mspi_cdram |= (~(1 << spi->chip_select) &
-                              MSPI_CDRAM_PCS);
+
+               if (has_bspi(qspi))
+                       mspi_cdram &= ~1;
+               else
+                       mspi_cdram |= (~(1 << spi->chip_select) &
+                                      MSPI_CDRAM_PCS);
+
                mspi_cdram |= ((tp.trans->bits_per_word <= 8) ? 0 :
                                MSPI_CDRAM_BITSE_BIT);
 
index 1431cb98fe403a4b7039195bf172bf47bceb6702..3094d818cf06d4751122611bc2eb807e71d965fd 100644 (file)
@@ -184,6 +184,11 @@ static irqreturn_t bcm2835aux_spi_interrupt(int irq, void *dev_id)
        struct bcm2835aux_spi *bs = spi_master_get_devdata(master);
        irqreturn_t ret = IRQ_NONE;
 
+       /* IRQ may be shared, so return if our interrupts are disabled */
+       if (!(bcm2835aux_rd(bs, BCM2835_AUX_SPI_CNTL1) &
+             (BCM2835_AUX_SPI_CNTL1_TXEMPTY | BCM2835_AUX_SPI_CNTL1_IDLE)))
+               return ret;
+
        /* check if we have data to read */
        while (bs->rx_len &&
               (!(bcm2835aux_rd(bs, BCM2835_AUX_SPI_STAT) &
index 5c9516ae4942e5cf8b2ef381d2ecd496803cbf14..4a001634023e09b8e83b8e6b82b5af557e2c0853 100644 (file)
@@ -313,6 +313,14 @@ static void cdns_spi_fill_tx_fifo(struct cdns_spi *xspi)
 
        while ((trans_cnt < CDNS_SPI_FIFO_DEPTH) &&
               (xspi->tx_bytes > 0)) {
+
+               /* When xspi in busy condition, bytes may send failed,
+                * then spi control did't work thoroughly, add one byte delay
+                */
+               if (cdns_spi_read(xspi, CDNS_SPI_ISR) &
+                   CDNS_SPI_IXR_TXFULL)
+                       usleep_range(10, 20);
+
                if (xspi->txbuf)
                        cdns_spi_write(xspi, CDNS_SPI_TXD, *xspi->txbuf++);
                else
index 6f57592a7f95ad0980362dd69840d72846cbad77..a056ee88a960bf8e0cb33a871ef573d0d81e417e 100644 (file)
@@ -1701,7 +1701,7 @@ static struct platform_driver spi_imx_driver = {
 };
 module_platform_driver(spi_imx_driver);
 
-MODULE_DESCRIPTION("SPI Master Controller driver");
+MODULE_DESCRIPTION("SPI Controller driver");
 MODULE_AUTHOR("Sascha Hauer, Pengutronix");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("platform:" DRIVER_NAME);
index 513ec6c6e25b310b62d0024cd6c71a23c4890b5e..0ae7defd3492001dd096d8772bfde53db63226e7 100644 (file)
@@ -38,7 +38,7 @@ struct driver_data {
 
        /* SSP register addresses */
        void __iomem *ioaddr;
-       u32 ssdr_physical;
+       phys_addr_t ssdr_physical;
 
        /* SSP masks*/
        u32 dma_cr1;
index ae086aab57d51edc2c8014160f4abeab365ea9cc..8171eedbfc90033b5bf942667cfcfe5214453d4e 100644 (file)
@@ -283,6 +283,7 @@ static void sh_msiof_spi_set_clk_regs(struct sh_msiof_spi_priv *p,
        }
 
        k = min_t(int, k, ARRAY_SIZE(sh_msiof_spi_div_table) - 1);
+       brps = min_t(int, brps, 32);
 
        scr = sh_msiof_spi_div_table[k].brdv | SCR_BRPS(brps);
        sh_msiof_write(p, TSCR, scr);
index 16cab40156ca6c3f5c779ece59ac9e7a4f1906e0..aeab05f682d92f24987e2eedfce6a6fb900ac500 100644 (file)
@@ -1799,7 +1799,7 @@ static int imx_csi_probe(struct platform_device *pdev)
        priv->dev->of_node = pdata->of_node;
        pinctrl = devm_pinctrl_get_select_default(priv->dev);
        if (IS_ERR(pinctrl)) {
-               ret = PTR_ERR(priv->vdev);
+               ret = PTR_ERR(pinctrl);
                dev_dbg(priv->dev,
                        "devm_pinctrl_get_select_default() failed: %d\n", ret);
                if (ret != -ENODEV)
index 6b5300ca44a67a8bcd75a5ad0149504220a2984e..885f5fcead777e7526f9c0e3b363bf9ca8584784 100644 (file)
@@ -1390,7 +1390,7 @@ static inline void host_int_parse_assoc_resp_info(struct wilc_vif *vif,
        }
 
        if (hif_drv->usr_conn_req.ies) {
-               conn_info.req_ies = kmemdup(conn_info.req_ies,
+               conn_info.req_ies = kmemdup(hif_drv->usr_conn_req.ies,
                                            hif_drv->usr_conn_req.ies_len,
                                            GFP_KERNEL);
                if (conn_info.req_ies)
index 07c814c42648faa00d7bca39ad339d7ef916ec99..60429011292a2c4c2fa4104a90414ae948f956a7 100644 (file)
@@ -427,8 +427,8 @@ iblock_execute_zero_out(struct block_device *bdev, struct se_cmd *cmd)
 {
        struct se_device *dev = cmd->se_dev;
        struct scatterlist *sg = &cmd->t_data_sg[0];
-       unsigned char *buf, zero = 0x00, *p = &zero;
-       int rc, ret;
+       unsigned char *buf, *not_zero;
+       int ret;
 
        buf = kmap(sg_page(sg)) + sg->offset;
        if (!buf)
@@ -437,10 +437,10 @@ iblock_execute_zero_out(struct block_device *bdev, struct se_cmd *cmd)
         * Fall back to block_execute_write_same() slow-path if
         * incoming WRITE_SAME payload does not contain zeros.
         */
-       rc = memcmp(buf, p, cmd->data_length);
+       not_zero = memchr_inv(buf, 0x00, cmd->data_length);
        kunmap(sg_page(sg));
 
-       if (rc)
+       if (not_zero)
                return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
 
        ret = blkdev_issue_zeroout(bdev,
index 0d99b242e82e3f84da25a47564f96db60be4b5f5..6cb933ecc084029f420fb2e13784a01f9c97e434 100644 (file)
@@ -890,6 +890,7 @@ pscsi_map_sg(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
                        bytes = min(bytes, data_len);
 
                        if (!bio) {
+new_bio:
                                nr_vecs = min_t(int, BIO_MAX_PAGES, nr_pages);
                                nr_pages -= nr_vecs;
                                /*
@@ -931,6 +932,7 @@ pscsi_map_sg(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
                                 * be allocated with pscsi_get_bio() above.
                                 */
                                bio = NULL;
+                               goto new_bio;
                        }
 
                        data_len -= bytes;
index 0124a91c8d7139cd6840cfffeea64adc1cd1825c..dd46b758852aa9ba2866348e6f973da3447b3623 100644 (file)
@@ -238,6 +238,17 @@ static int params_from_user(struct tee_context *ctx, struct tee_param *params,
                        if (IS_ERR(shm))
                                return PTR_ERR(shm);
 
+                       /*
+                        * Ensure offset + size does not overflow offset
+                        * and does not overflow the size of the referred
+                        * shared memory object.
+                        */
+                       if ((ip.a + ip.b) < ip.a ||
+                           (ip.a + ip.b) > shm->size) {
+                               tee_shm_put(shm);
+                               return -EINVAL;
+                       }
+
                        params[n].u.memref.shm_offs = ip.a;
                        params[n].u.memref.size = ip.b;
                        params[n].u.memref.shm = shm;
index 556960a1bab3b4631b10a2803ab54167b06c376b..07d3be6f0780db209ac2be07354ac390c31d6be8 100644 (file)
@@ -360,9 +360,10 @@ int tee_shm_get_fd(struct tee_shm *shm)
        if (!(shm->flags & TEE_SHM_DMA_BUF))
                return -EINVAL;
 
+       get_dma_buf(shm->dmabuf);
        fd = dma_buf_fd(shm->dmabuf, O_CLOEXEC);
-       if (fd >= 0)
-               get_dma_buf(shm->dmabuf);
+       if (fd < 0)
+               dma_buf_put(shm->dmabuf);
        return fd;
 }
 
index 8a7f24dd9315e3be809ef98e4513506561be04fb..0c19fcd56a0da02713e93778afc78c84017aeeef 100644 (file)
@@ -194,6 +194,7 @@ static int int3403_cdev_add(struct int3403_priv *priv)
                return -EFAULT;
        }
 
+       priv->priv = obj;
        obj->max_state = p->package.count - 1;
        obj->cdev =
                thermal_cooling_device_register(acpi_device_bid(priv->adev),
@@ -201,8 +202,6 @@ static int int3403_cdev_add(struct int3403_priv *priv)
        if (IS_ERR(obj->cdev))
                result = PTR_ERR(obj->cdev);
 
-       priv->priv = obj;
-
        kfree(buf.pointer);
        /* TODO: add ACPI notification support */
 
index ed805c7c5ace3a121be27359bb4d08ee42634a4d..ac83f721db24d78cff24d9349ecf75edafe9f2d4 100644 (file)
  * @regulator: pointer to the TMU regulator structure.
  * @reg_conf: pointer to structure to register with core thermal.
  * @ntrip: number of supported trip points.
+ * @enabled: current status of TMU device
  * @tmu_initialize: SoC specific TMU initialization method
  * @tmu_control: SoC specific TMU control method
  * @tmu_read: SoC specific TMU temperature read method
@@ -205,6 +206,7 @@ struct exynos_tmu_data {
        struct regulator *regulator;
        struct thermal_zone_device *tzd;
        unsigned int ntrip;
+       bool enabled;
 
        int (*tmu_initialize)(struct platform_device *pdev);
        void (*tmu_control)(struct platform_device *pdev, bool on);
@@ -398,6 +400,7 @@ static void exynos_tmu_control(struct platform_device *pdev, bool on)
        mutex_lock(&data->lock);
        clk_enable(data->clk);
        data->tmu_control(pdev, on);
+       data->enabled = on;
        clk_disable(data->clk);
        mutex_unlock(&data->lock);
 }
@@ -889,19 +892,24 @@ static void exynos7_tmu_control(struct platform_device *pdev, bool on)
 static int exynos_get_temp(void *p, int *temp)
 {
        struct exynos_tmu_data *data = p;
+       int value, ret = 0;
 
-       if (!data || !data->tmu_read)
+       if (!data || !data->tmu_read || !data->enabled)
                return -EINVAL;
 
        mutex_lock(&data->lock);
        clk_enable(data->clk);
 
-       *temp = code_to_temp(data, data->tmu_read(data)) * MCELSIUS;
+       value = data->tmu_read(data);
+       if (value < 0)
+               ret = value;
+       else
+               *temp = code_to_temp(data, value) * MCELSIUS;
 
        clk_disable(data->clk);
        mutex_unlock(&data->lock);
 
-       return 0;
+       return ret;
 }
 
 #ifdef CONFIG_THERMAL_EMULATION
index 3b3e1f6632d71c1aae24d690679b4b53673ce148..1dbe27c9946c16578a6ad0f82313c4adf797aef0 100644 (file)
@@ -121,6 +121,9 @@ struct gsm_dlci {
        struct mutex mutex;
 
        /* Link layer */
+       int mode;
+#define DLCI_MODE_ABM          0       /* Normal Asynchronous Balanced Mode */
+#define DLCI_MODE_ADM          1       /* Asynchronous Disconnected Mode */
        spinlock_t lock;        /* Protects the internal state */
        struct timer_list t1;   /* Retransmit timer for SABM and UA */
        int retries;
@@ -1364,7 +1367,13 @@ static struct gsm_control *gsm_control_send(struct gsm_mux *gsm,
        ctrl->data = data;
        ctrl->len = clen;
        gsm->pending_cmd = ctrl;
-       gsm->cretries = gsm->n2;
+
+       /* If DLCI0 is in ADM mode skip retries, it won't respond */
+       if (gsm->dlci[0]->mode == DLCI_MODE_ADM)
+               gsm->cretries = 1;
+       else
+               gsm->cretries = gsm->n2;
+
        mod_timer(&gsm->t2_timer, jiffies + gsm->t2 * HZ / 100);
        gsm_control_transmit(gsm, ctrl);
        spin_unlock_irqrestore(&gsm->control_lock, flags);
@@ -1472,6 +1481,7 @@ static void gsm_dlci_t1(struct timer_list *t)
                        if (debug & 8)
                                pr_info("DLCI %d opening in ADM mode.\n",
                                        dlci->addr);
+                       dlci->mode = DLCI_MODE_ADM;
                        gsm_dlci_open(dlci);
                } else {
                        gsm_dlci_close(dlci);
@@ -2861,11 +2871,22 @@ static int gsmtty_modem_update(struct gsm_dlci *dlci, u8 brk)
 static int gsm_carrier_raised(struct tty_port *port)
 {
        struct gsm_dlci *dlci = container_of(port, struct gsm_dlci, port);
+       struct gsm_mux *gsm = dlci->gsm;
+
        /* Not yet open so no carrier info */
        if (dlci->state != DLCI_OPEN)
                return 0;
        if (debug & 2)
                return 1;
+
+       /*
+        * Basic mode with control channel in ADM mode may not respond
+        * to CMD_MSC at all and modem_rx is empty.
+        */
+       if (gsm->encoding == 0 && gsm->dlci[0]->mode == DLCI_MODE_ADM &&
+           !dlci->modem_rx)
+               return 1;
+
        return dlci->modem_rx & TIOCM_CD;
 }
 
index a24278380fec2a6a67b2513514f08ecd6688dfe2..22683393a0f2c3f7b6054d367823a3f99f4df103 100644 (file)
@@ -169,7 +169,7 @@ static int __init register_earlycon(char *buf, const struct earlycon_id *match)
  */
 int __init setup_earlycon(char *buf)
 {
-       const struct earlycon_id *match;
+       const struct earlycon_id **p_match;
 
        if (!buf || !buf[0])
                return -EINVAL;
@@ -177,7 +177,9 @@ int __init setup_earlycon(char *buf)
        if (early_con.flags & CON_ENABLED)
                return -EALREADY;
 
-       for (match = __earlycon_table; match < __earlycon_table_end; match++) {
+       for (p_match = __earlycon_table; p_match < __earlycon_table_end;
+            p_match++) {
+               const struct earlycon_id *match = *p_match;
                size_t len = strlen(match->name);
 
                if (strncmp(buf, match->name, len))
index 91f3a1a5cb7fa81ac82a2289e792fc5c908df9b2..c2fc6bef7a6f28a6fc3fe9f18782c555bdb35a35 100644 (file)
@@ -316,7 +316,7 @@ static u32 imx_uart_readl(struct imx_port *sport, u32 offset)
                 * differ from the value that was last written. As it only
                 * clears after being set, reread conditionally.
                 */
-               if (sport->ucr2 & UCR2_SRST)
+               if (!(sport->ucr2 & UCR2_SRST))
                        sport->ucr2 = readl(sport->port.membase + offset);
                return sport->ucr2;
                break;
@@ -1833,6 +1833,11 @@ static int imx_uart_rs485_config(struct uart_port *port,
                rs485conf->flags &= ~SER_RS485_ENABLED;
 
        if (rs485conf->flags & SER_RS485_ENABLED) {
+               /* Enable receiver if low-active RTS signal is requested */
+               if (sport->have_rtscts &&  !sport->have_rtsgpio &&
+                   !(rs485conf->flags & SER_RS485_RTS_ON_SEND))
+                       rs485conf->flags |= SER_RS485_RX_DURING_TX;
+
                /* disable transmitter */
                ucr2 = imx_uart_readl(sport, UCR2);
                if (rs485conf->flags & SER_RS485_RTS_AFTER_SEND)
@@ -2265,6 +2270,18 @@ static int imx_uart_probe(struct platform_device *pdev)
            (!sport->have_rtscts && !sport->have_rtsgpio))
                dev_err(&pdev->dev, "no RTS control, disabling rs485\n");
 
+       /*
+        * If using the i.MX UART RTS/CTS control then the RTS (CTS_B)
+        * signal cannot be set low during transmission in case the
+        * receiver is off (limitation of the i.MX UART IP).
+        */
+       if (sport->port.rs485.flags & SER_RS485_ENABLED &&
+           sport->have_rtscts && !sport->have_rtsgpio &&
+           (!(sport->port.rs485.flags & SER_RS485_RTS_ON_SEND) &&
+            !(sport->port.rs485.flags & SER_RS485_RX_DURING_TX)))
+               dev_err(&pdev->dev,
+                       "low-active RTS not possible when receiver is off, enabling receiver\n");
+
        imx_uart_rs485_config(&sport->port, &sport->port.rs485);
 
        /* Disable interrupts before requesting them */
index 750e5645dc857c7ba63bd0a848e9f6055fac988c..f503fab1e2685f2451d540ed18d5f474a71581b4 100644 (file)
@@ -495,7 +495,6 @@ static void mvebu_uart_set_termios(struct uart_port *port,
                termios->c_iflag |= old->c_iflag & ~(INPCK | IGNPAR);
                termios->c_cflag &= CREAD | CBAUD;
                termios->c_cflag |= old->c_cflag & ~(CREAD | CBAUD);
-               termios->c_lflag = old->c_lflag;
        }
 
        spin_unlock_irqrestore(&port->lock, flags);
index 65ff669373d4a0325ffd97a04f070ee3e389217a..a1b3eb04cb32c78f17ccd32698ced4a459091869 100644 (file)
@@ -1022,6 +1022,7 @@ static int qcom_geni_serial_probe(struct platform_device *pdev)
        struct qcom_geni_serial_port *port;
        struct uart_port *uport;
        struct resource *res;
+       int irq;
 
        if (pdev->dev.of_node)
                line = of_alias_get_id(pdev->dev.of_node, "serial");
@@ -1061,11 +1062,12 @@ static int qcom_geni_serial_probe(struct platform_device *pdev)
        port->rx_fifo_depth = DEF_FIFO_DEPTH_WORDS;
        port->tx_fifo_width = DEF_FIFO_WIDTH_BITS;
 
-       uport->irq = platform_get_irq(pdev, 0);
-       if (uport->irq < 0) {
-               dev_err(&pdev->dev, "Failed to get IRQ %d\n", uport->irq);
-               return uport->irq;
+       irq = platform_get_irq(pdev, 0);
+       if (irq < 0) {
+               dev_err(&pdev->dev, "Failed to get IRQ %d\n", irq);
+               return irq;
        }
+       uport->irq = irq;
 
        uport->private_data = &qcom_geni_console_driver;
        platform_set_drvdata(pdev, port);
index abcb4d09a2d866d05b7af3c1ff5d3f68492514c1..bd72dd843338dc9a3aecaacaa04dc7b2fc2442db 100644 (file)
@@ -1181,7 +1181,7 @@ static int __init cdns_early_console_setup(struct earlycon_device *device,
        /* only set baud if specified on command line - otherwise
         * assume it has been initialized by a boot loader.
         */
-       if (device->baud) {
+       if (port->uartclk && device->baud) {
                u32 cd = 0, bdiv = 0;
                u32 mr;
                int div8;
index 63114ea35ec1b8f3620ec888155b929ed1915200..7c838b90a31d636865ce99a84466967d57b777aa 100644 (file)
@@ -2816,7 +2816,10 @@ struct tty_struct *alloc_tty_struct(struct tty_driver *driver, int idx)
 
        kref_init(&tty->kref);
        tty->magic = TTY_MAGIC;
-       tty_ldisc_init(tty);
+       if (tty_ldisc_init(tty)) {
+               kfree(tty);
+               return NULL;
+       }
        tty->session = NULL;
        tty->pgrp = NULL;
        mutex_init(&tty->legacy_mutex);
index 050f4d650891763f96800244869821afb9d1586e..fb7329ab2b37a85f0682805d2fe996f6279df64b 100644 (file)
@@ -176,12 +176,11 @@ static struct tty_ldisc *tty_ldisc_get(struct tty_struct *tty, int disc)
                        return ERR_CAST(ldops);
        }
 
-       ld = kmalloc(sizeof(struct tty_ldisc), GFP_KERNEL);
-       if (ld == NULL) {
-               put_ldops(ldops);
-               return ERR_PTR(-ENOMEM);
-       }
-
+       /*
+        * There is no way to handle allocation failure of only 16 bytes.
+        * Let's simplify error handling and save more memory.
+        */
+       ld = kmalloc(sizeof(struct tty_ldisc), GFP_KERNEL | __GFP_NOFAIL);
        ld->ops = ldops;
        ld->tty = tty;
 
@@ -527,19 +526,16 @@ static int tty_ldisc_failto(struct tty_struct *tty, int ld)
 static void tty_ldisc_restore(struct tty_struct *tty, struct tty_ldisc *old)
 {
        /* There is an outstanding reference here so this is safe */
-       old = tty_ldisc_get(tty, old->ops->num);
-       WARN_ON(IS_ERR(old));
-       tty->ldisc = old;
-       tty_set_termios_ldisc(tty, old->ops->num);
-       if (tty_ldisc_open(tty, old) < 0) {
-               tty_ldisc_put(old);
+       if (tty_ldisc_failto(tty, old->ops->num) < 0) {
+               const char *name = tty_name(tty);
+
+               pr_warn("Falling back ldisc for %s.\n", name);
                /* The traditional behaviour is to fall back to N_TTY, we
                   want to avoid falling back to N_NULL unless we have no
                   choice to avoid the risk of breaking anything */
                if (tty_ldisc_failto(tty, N_TTY) < 0 &&
                    tty_ldisc_failto(tty, N_NULL) < 0)
-                       panic("Couldn't open N_NULL ldisc for %s.",
-                             tty_name(tty));
+                       panic("Couldn't open N_NULL ldisc for %s.", name);
        }
 }
 
@@ -824,12 +820,13 @@ EXPORT_SYMBOL_GPL(tty_ldisc_release);
  *     the tty structure is not completely set up when this call is made.
  */
 
-void tty_ldisc_init(struct tty_struct *tty)
+int tty_ldisc_init(struct tty_struct *tty)
 {
        struct tty_ldisc *ld = tty_ldisc_get(tty, N_TTY);
        if (IS_ERR(ld))
-               panic("n_tty: init_tty");
+               return PTR_ERR(ld);
        tty->ldisc = ld;
+       return 0;
 }
 
 /**
index f695a7e8c314585c8b0df8ab88f44d897986839b..c690d100adcd075b2999b82443b08e4ce03c2b53 100644 (file)
@@ -19,7 +19,7 @@
  * # echo -n "ed963694-e847-4b2a-85af-bc9cfc11d6f3" \
  *    > /sys/bus/vmbus/drivers/uio_hv_generic/bind
  */
-
+#define DEBUG 1
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/device.h>
@@ -94,10 +94,11 @@ hv_uio_irqcontrol(struct uio_info *info, s32 irq_state)
  */
 static void hv_uio_channel_cb(void *context)
 {
-       struct hv_uio_private_data *pdata = context;
-       struct hv_device *dev = pdata->device;
+       struct vmbus_channel *chan = context;
+       struct hv_device *hv_dev = chan->device_obj;
+       struct hv_uio_private_data *pdata = hv_get_drvdata(hv_dev);
 
-       dev->channel->inbound.ring_buffer->interrupt_mask = 1;
+       chan->inbound.ring_buffer->interrupt_mask = 1;
        virt_mb();
 
        uio_event_notify(&pdata->info);
@@ -121,78 +122,46 @@ static void hv_uio_rescind(struct vmbus_channel *channel)
        uio_event_notify(&pdata->info);
 }
 
-/*
- * Handle fault when looking for sub channel ring buffer
- * Subchannel ring buffer is same as resource 0 which is main ring buffer
- * This is derived from uio_vma_fault
+/* Sysfs API to allow mmap of the ring buffers
+ * The ring buffer is allocated as contiguous memory by vmbus_open
  */
-static int hv_uio_vma_fault(struct vm_fault *vmf)
-{
-       struct vm_area_struct *vma = vmf->vma;
-       void *ring_buffer = vma->vm_private_data;
-       struct page *page;
-       void *addr;
-
-       addr = ring_buffer + (vmf->pgoff << PAGE_SHIFT);
-       page = virt_to_page(addr);
-       get_page(page);
-       vmf->page = page;
-       return 0;
-}
-
-static const struct vm_operations_struct hv_uio_vm_ops = {
-       .fault = hv_uio_vma_fault,
-};
-
-/* Sysfs API to allow mmap of the ring buffers */
 static int hv_uio_ring_mmap(struct file *filp, struct kobject *kobj,
                            struct bin_attribute *attr,
                            struct vm_area_struct *vma)
 {
        struct vmbus_channel *channel
                = container_of(kobj, struct vmbus_channel, kobj);
-       unsigned long requested_pages, actual_pages;
-
-       if (vma->vm_end < vma->vm_start)
-               return -EINVAL;
-
-       /* only allow 0 for now */
-       if (vma->vm_pgoff > 0)
-               return -EINVAL;
+       struct hv_device *dev = channel->primary_channel->device_obj;
+       u16 q_idx = channel->offermsg.offer.sub_channel_index;
 
-       requested_pages = vma_pages(vma);
-       actual_pages = 2 * HV_RING_SIZE;
-       if (requested_pages > actual_pages)
-               return -EINVAL;
+       dev_dbg(&dev->device, "mmap channel %u pages %#lx at %#lx\n",
+               q_idx, vma_pages(vma), vma->vm_pgoff);
 
-       vma->vm_private_data = channel->ringbuffer_pages;
-       vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
-       vma->vm_ops = &hv_uio_vm_ops;
-       return 0;
+       return vm_iomap_memory(vma, virt_to_phys(channel->ringbuffer_pages),
+                              channel->ringbuffer_pagecount << PAGE_SHIFT);
 }
 
-static struct bin_attribute ring_buffer_bin_attr __ro_after_init = {
+static const struct bin_attribute ring_buffer_bin_attr = {
        .attr = {
                .name = "ring",
                .mode = 0600,
-               /* size is set at init time */
        },
+       .size = 2 * HV_RING_SIZE * PAGE_SIZE,
        .mmap = hv_uio_ring_mmap,
 };
 
-/* Callback from VMBUS subystem when new channel created. */
+/* Callback from VMBUS subsystem when new channel created. */
 static void
 hv_uio_new_channel(struct vmbus_channel *new_sc)
 {
        struct hv_device *hv_dev = new_sc->primary_channel->device_obj;
        struct device *device = &hv_dev->device;
-       struct hv_uio_private_data *pdata = hv_get_drvdata(hv_dev);
        const size_t ring_bytes = HV_RING_SIZE * PAGE_SIZE;
        int ret;
 
        /* Create host communication ring */
        ret = vmbus_open(new_sc, ring_bytes, ring_bytes, NULL, 0,
-                        hv_uio_channel_cb, pdata);
+                        hv_uio_channel_cb, new_sc);
        if (ret) {
                dev_err(device, "vmbus_open subchannel failed: %d\n", ret);
                return;
@@ -234,7 +203,7 @@ hv_uio_probe(struct hv_device *dev,
 
        ret = vmbus_open(dev->channel, HV_RING_SIZE * PAGE_SIZE,
                         HV_RING_SIZE * PAGE_SIZE, NULL, 0,
-                        hv_uio_channel_cb, pdata);
+                        hv_uio_channel_cb, dev->channel);
        if (ret)
                goto fail;
 
@@ -326,6 +295,11 @@ hv_uio_probe(struct hv_device *dev,
        vmbus_set_chn_rescind_callback(dev->channel, hv_uio_rescind);
        vmbus_set_sc_create_callback(dev->channel, hv_uio_new_channel);
 
+       ret = sysfs_create_bin_file(&dev->channel->kobj, &ring_buffer_bin_attr);
+       if (ret)
+               dev_notice(&dev->device,
+                          "sysfs create ring bin file failed; %d\n", ret);
+
        hv_set_drvdata(dev, pdata);
 
        return 0;
index 75f7fb151f713b737a5d9a3a5b07a77de68bdf25..987fc5ba63211bb5829a8f38629e86ab6022a094 100644 (file)
@@ -207,5 +207,6 @@ config USB_ULPI_BUS
 
 config USB_ROLE_SWITCH
        tristate
+       select USB_COMMON
 
 endif # USB_SUPPORT
index c821b4b9647e357468335fca83b3aa980e600315..7b5cb28ffb3578c8785ee4e0ca541c88846890be 100644 (file)
@@ -191,7 +191,9 @@ static const unsigned short full_speed_maxpacket_maxes[4] = {
 static const unsigned short high_speed_maxpacket_maxes[4] = {
        [USB_ENDPOINT_XFER_CONTROL] = 64,
        [USB_ENDPOINT_XFER_ISOC] = 1024,
-       [USB_ENDPOINT_XFER_BULK] = 512,
+
+       /* Bulk should be 512, but some devices use 1024: we will warn below */
+       [USB_ENDPOINT_XFER_BULK] = 1024,
        [USB_ENDPOINT_XFER_INT] = 1024,
 };
 static const unsigned short super_speed_maxpacket_maxes[4] = {
index 777036ae63674af94e845a78e0cf75e607f8ba68..0a42c5df3c0fb3971c538689697dd52644f443dd 100644 (file)
@@ -2262,7 +2262,8 @@ int hcd_bus_suspend(struct usb_device *rhdev, pm_message_t msg)
                hcd->state = HC_STATE_SUSPENDED;
 
                if (!PMSG_IS_AUTO(msg))
-                       usb_phy_roothub_power_off(hcd->phy_roothub);
+                       usb_phy_roothub_suspend(hcd->self.sysdev,
+                                               hcd->phy_roothub);
 
                /* Did we race with a root-hub wakeup event? */
                if (rhdev->do_remote_wakeup) {
@@ -2302,7 +2303,8 @@ int hcd_bus_resume(struct usb_device *rhdev, pm_message_t msg)
        }
 
        if (!PMSG_IS_AUTO(msg)) {
-               status = usb_phy_roothub_power_on(hcd->phy_roothub);
+               status = usb_phy_roothub_resume(hcd->self.sysdev,
+                                               hcd->phy_roothub);
                if (status)
                        return status;
        }
@@ -2344,7 +2346,7 @@ int hcd_bus_resume(struct usb_device *rhdev, pm_message_t msg)
                }
        } else {
                hcd->state = old_state;
-               usb_phy_roothub_power_off(hcd->phy_roothub);
+               usb_phy_roothub_suspend(hcd->self.sysdev, hcd->phy_roothub);
                dev_dbg(&rhdev->dev, "bus %s fail, err %d\n",
                                "resume", status);
                if (status != -ESHUTDOWN)
@@ -2377,6 +2379,7 @@ void usb_hcd_resume_root_hub (struct usb_hcd *hcd)
 
        spin_lock_irqsave (&hcd_root_hub_lock, flags);
        if (hcd->rh_registered) {
+               pm_wakeup_event(&hcd->self.root_hub->dev, 0);
                set_bit(HCD_FLAG_WAKEUP_PENDING, &hcd->flags);
                queue_work(pm_wq, &hcd->wakeup_work);
        }
@@ -2758,12 +2761,16 @@ int usb_add_hcd(struct usb_hcd *hcd,
        }
 
        if (!hcd->skip_phy_initialization && usb_hcd_is_primary_hcd(hcd)) {
-               hcd->phy_roothub = usb_phy_roothub_init(hcd->self.sysdev);
+               hcd->phy_roothub = usb_phy_roothub_alloc(hcd->self.sysdev);
                if (IS_ERR(hcd->phy_roothub)) {
                        retval = PTR_ERR(hcd->phy_roothub);
-                       goto err_phy_roothub_init;
+                       goto err_phy_roothub_alloc;
                }
 
+               retval = usb_phy_roothub_init(hcd->phy_roothub);
+               if (retval)
+                       goto err_phy_roothub_alloc;
+
                retval = usb_phy_roothub_power_on(hcd->phy_roothub);
                if (retval)
                        goto err_usb_phy_roothub_power_on;
@@ -2936,7 +2943,7 @@ int usb_add_hcd(struct usb_hcd *hcd,
        usb_phy_roothub_power_off(hcd->phy_roothub);
 err_usb_phy_roothub_power_on:
        usb_phy_roothub_exit(hcd->phy_roothub);
-err_phy_roothub_init:
+err_phy_roothub_alloc:
        if (hcd->remove_phy && hcd->usb_phy) {
                usb_phy_shutdown(hcd->usb_phy);
                usb_put_phy(hcd->usb_phy);
index f6ea16e9f6bb975a09e296f2493a5a687828d920..aa9968d90a48c301e6af566d00a20139397e1742 100644 (file)
@@ -653,12 +653,17 @@ void usb_wakeup_notification(struct usb_device *hdev,
                unsigned int portnum)
 {
        struct usb_hub *hub;
+       struct usb_port *port_dev;
 
        if (!hdev)
                return;
 
        hub = usb_hub_to_struct_hub(hdev);
        if (hub) {
+               port_dev = hub->ports[portnum - 1];
+               if (port_dev && port_dev->child)
+                       pm_wakeup_event(&port_dev->child->dev, 0);
+
                set_bit(portnum, hub->wakeup_bits);
                kick_hub_wq(hub);
        }
@@ -3434,8 +3439,11 @@ int usb_port_resume(struct usb_device *udev, pm_message_t msg)
 
        /* Skip the initial Clear-Suspend step for a remote wakeup */
        status = hub_port_status(hub, port1, &portstatus, &portchange);
-       if (status == 0 && !port_is_suspended(hub, portstatus))
+       if (status == 0 && !port_is_suspended(hub, portstatus)) {
+               if (portchange & USB_PORT_STAT_C_SUSPEND)
+                       pm_wakeup_event(&udev->dev, 0);
                goto SuspendCleared;
+       }
 
        /* see 7.1.7.7; affects power usage, but not budgeting */
        if (hub_is_superspeed(hub->hdev))
index 09b7c43c0ea4c69e365f22274bb8c36a984d49d2..9879767452a23e6a0c1ceeea65cfc0b8874fa65a 100644 (file)
@@ -19,19 +19,6 @@ struct usb_phy_roothub {
        struct list_head        list;
 };
 
-static struct usb_phy_roothub *usb_phy_roothub_alloc(struct device *dev)
-{
-       struct usb_phy_roothub *roothub_entry;
-
-       roothub_entry = devm_kzalloc(dev, sizeof(*roothub_entry), GFP_KERNEL);
-       if (!roothub_entry)
-               return ERR_PTR(-ENOMEM);
-
-       INIT_LIST_HEAD(&roothub_entry->list);
-
-       return roothub_entry;
-}
-
 static int usb_phy_roothub_add_phy(struct device *dev, int index,
                                   struct list_head *list)
 {
@@ -45,9 +32,11 @@ static int usb_phy_roothub_add_phy(struct device *dev, int index,
                        return PTR_ERR(phy);
        }
 
-       roothub_entry = usb_phy_roothub_alloc(dev);
-       if (IS_ERR(roothub_entry))
-               return PTR_ERR(roothub_entry);
+       roothub_entry = devm_kzalloc(dev, sizeof(*roothub_entry), GFP_KERNEL);
+       if (!roothub_entry)
+               return -ENOMEM;
+
+       INIT_LIST_HEAD(&roothub_entry->list);
 
        roothub_entry->phy = phy;
 
@@ -56,28 +45,44 @@ static int usb_phy_roothub_add_phy(struct device *dev, int index,
        return 0;
 }
 
-struct usb_phy_roothub *usb_phy_roothub_init(struct device *dev)
+struct usb_phy_roothub *usb_phy_roothub_alloc(struct device *dev)
 {
        struct usb_phy_roothub *phy_roothub;
-       struct usb_phy_roothub *roothub_entry;
-       struct list_head *head;
        int i, num_phys, err;
 
+       if (!IS_ENABLED(CONFIG_GENERIC_PHY))
+               return NULL;
+
        num_phys = of_count_phandle_with_args(dev->of_node, "phys",
                                              "#phy-cells");
        if (num_phys <= 0)
                return NULL;
 
-       phy_roothub = usb_phy_roothub_alloc(dev);
-       if (IS_ERR(phy_roothub))
-               return phy_roothub;
+       phy_roothub = devm_kzalloc(dev, sizeof(*phy_roothub), GFP_KERNEL);
+       if (!phy_roothub)
+               return ERR_PTR(-ENOMEM);
+
+       INIT_LIST_HEAD(&phy_roothub->list);
 
        for (i = 0; i < num_phys; i++) {
                err = usb_phy_roothub_add_phy(dev, i, &phy_roothub->list);
                if (err)
-                       goto err_out;
+                       return ERR_PTR(err);
        }
 
+       return phy_roothub;
+}
+EXPORT_SYMBOL_GPL(usb_phy_roothub_alloc);
+
+int usb_phy_roothub_init(struct usb_phy_roothub *phy_roothub)
+{
+       struct usb_phy_roothub *roothub_entry;
+       struct list_head *head;
+       int err;
+
+       if (!phy_roothub)
+               return 0;
+
        head = &phy_roothub->list;
 
        list_for_each_entry(roothub_entry, head, list) {
@@ -86,14 +91,13 @@ struct usb_phy_roothub *usb_phy_roothub_init(struct device *dev)
                        goto err_exit_phys;
        }
 
-       return phy_roothub;
+       return 0;
 
 err_exit_phys:
        list_for_each_entry_continue_reverse(roothub_entry, head, list)
                phy_exit(roothub_entry->phy);
 
-err_out:
-       return ERR_PTR(err);
+       return err;
 }
 EXPORT_SYMBOL_GPL(usb_phy_roothub_init);
 
@@ -111,7 +115,7 @@ int usb_phy_roothub_exit(struct usb_phy_roothub *phy_roothub)
        list_for_each_entry(roothub_entry, head, list) {
                err = phy_exit(roothub_entry->phy);
                if (err)
-                       ret = ret;
+                       ret = err;
        }
 
        return ret;
@@ -156,3 +160,38 @@ void usb_phy_roothub_power_off(struct usb_phy_roothub *phy_roothub)
                phy_power_off(roothub_entry->phy);
 }
 EXPORT_SYMBOL_GPL(usb_phy_roothub_power_off);
+
+int usb_phy_roothub_suspend(struct device *controller_dev,
+                           struct usb_phy_roothub *phy_roothub)
+{
+       usb_phy_roothub_power_off(phy_roothub);
+
+       /* keep the PHYs initialized so the device can wake up the system */
+       if (device_may_wakeup(controller_dev))
+               return 0;
+
+       return usb_phy_roothub_exit(phy_roothub);
+}
+EXPORT_SYMBOL_GPL(usb_phy_roothub_suspend);
+
+int usb_phy_roothub_resume(struct device *controller_dev,
+                          struct usb_phy_roothub *phy_roothub)
+{
+       int err;
+
+       /* if the device can't wake up the system _exit was called */
+       if (!device_may_wakeup(controller_dev)) {
+               err = usb_phy_roothub_init(phy_roothub);
+               if (err)
+                       return err;
+       }
+
+       err = usb_phy_roothub_power_on(phy_roothub);
+
+       /* undo _init if _power_on failed */
+       if (err && !device_may_wakeup(controller_dev))
+               usb_phy_roothub_exit(phy_roothub);
+
+       return err;
+}
+EXPORT_SYMBOL_GPL(usb_phy_roothub_resume);
index 6fde59bfbff8ace58bebb50bc88348eab03a1ca2..88a3c037e9df59194ba94d3ecd7ec3e2626ccb33 100644 (file)
@@ -1,7 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * USB roothub wrapper
+ *
+ * Copyright (C) 2018 Martin Blumenstingl <martin.blumenstingl@googlemail.com>
+ */
+
+#ifndef __USB_CORE_PHY_H_
+#define __USB_CORE_PHY_H_
+
+struct device;
 struct usb_phy_roothub;
 
-struct usb_phy_roothub *usb_phy_roothub_init(struct device *dev);
+struct usb_phy_roothub *usb_phy_roothub_alloc(struct device *dev);
+
+int usb_phy_roothub_init(struct usb_phy_roothub *phy_roothub);
 int usb_phy_roothub_exit(struct usb_phy_roothub *phy_roothub);
 
 int usb_phy_roothub_power_on(struct usb_phy_roothub *phy_roothub);
 void usb_phy_roothub_power_off(struct usb_phy_roothub *phy_roothub);
+
+int usb_phy_roothub_suspend(struct device *controller_dev,
+                           struct usb_phy_roothub *phy_roothub);
+int usb_phy_roothub_resume(struct device *controller_dev,
+                          struct usb_phy_roothub *phy_roothub);
+
+#endif /* __USB_CORE_PHY_H_ */
index 920f48a49a87021a513b4e395a903044a558abf5..c55def2f1320f92c6c0c652fc94c7056165ee467 100644 (file)
@@ -186,6 +186,9 @@ static const struct usb_device_id usb_quirk_list[] = {
        { USB_DEVICE(0x03f0, 0x0701), .driver_info =
                        USB_QUIRK_STRING_FETCH_255 },
 
+       /* HP v222w 16GB Mini USB Drive */
+       { USB_DEVICE(0x03f0, 0x3f40), .driver_info = USB_QUIRK_DELAY_INIT },
+
        /* Creative SB Audigy 2 NX */
        { USB_DEVICE(0x041e, 0x3020), .driver_info = USB_QUIRK_RESET_RESUME },
 
index d83be5651f870a5a45f8194693c105dfb474394e..a666e0758a99a4a662918c57489ad1fff46a4683 100644 (file)
@@ -985,6 +985,7 @@ struct dwc2_hsotg {
 
        /* DWC OTG HW Release versions */
 #define DWC2_CORE_REV_2_71a    0x4f54271a
+#define DWC2_CORE_REV_2_72a     0x4f54272a
 #define DWC2_CORE_REV_2_80a    0x4f54280a
 #define DWC2_CORE_REV_2_90a    0x4f54290a
 #define DWC2_CORE_REV_2_91a    0x4f54291a
@@ -992,6 +993,7 @@ struct dwc2_hsotg {
 #define DWC2_CORE_REV_2_94a    0x4f54294a
 #define DWC2_CORE_REV_3_00a    0x4f54300a
 #define DWC2_CORE_REV_3_10a    0x4f54310a
+#define DWC2_CORE_REV_4_00a    0x4f54400a
 #define DWC2_FS_IOT_REV_1_00a  0x5531100a
 #define DWC2_HS_IOT_REV_1_00a  0x5532100a
 
index 6c32bf26e48e9660cf5415f0eb8029953fa0aa3c..83cb5577a52f118b13c58bfd68bb0532279bda6f 100644 (file)
@@ -3928,6 +3928,27 @@ static int dwc2_hsotg_ep_enable(struct usb_ep *ep,
        if (index && !hs_ep->isochronous)
                epctrl |= DXEPCTL_SETD0PID;
 
+       /* WA for Full speed ISOC IN in DDMA mode.
+        * By Clear NAK status of EP, core will send ZLP
+        * to IN token and assert NAK interrupt relying
+        * on TxFIFO status only
+        */
+
+       if (hsotg->gadget.speed == USB_SPEED_FULL &&
+           hs_ep->isochronous && dir_in) {
+               /* The WA applies only to core versions from 2.72a
+                * to 4.00a (including both). Also for FS_IOT_1.00a
+                * and HS_IOT_1.00a.
+                */
+               u32 gsnpsid = dwc2_readl(hsotg->regs + GSNPSID);
+
+               if ((gsnpsid >= DWC2_CORE_REV_2_72a &&
+                    gsnpsid <= DWC2_CORE_REV_4_00a) ||
+                    gsnpsid == DWC2_FS_IOT_REV_1_00a ||
+                    gsnpsid == DWC2_HS_IOT_REV_1_00a)
+                       epctrl |= DXEPCTL_CNAK;
+       }
+
        dev_dbg(hsotg->dev, "%s: write DxEPCTL=0x%08x\n",
                __func__, epctrl);
 
index 190f959640007365083c239b0c3ea48ed133a85a..c51b73b3e048543860ac8e122121435b5676ab61 100644 (file)
@@ -358,9 +358,14 @@ static void dwc2_gusbcfg_init(struct dwc2_hsotg *hsotg)
 
 static int dwc2_vbus_supply_init(struct dwc2_hsotg *hsotg)
 {
+       int ret;
+
        hsotg->vbus_supply = devm_regulator_get_optional(hsotg->dev, "vbus");
-       if (IS_ERR(hsotg->vbus_supply))
-               return 0;
+       if (IS_ERR(hsotg->vbus_supply)) {
+               ret = PTR_ERR(hsotg->vbus_supply);
+               hsotg->vbus_supply = NULL;
+               return ret == -ENODEV ? 0 : ret;
+       }
 
        return regulator_enable(hsotg->vbus_supply);
 }
@@ -4342,9 +4347,7 @@ static int _dwc2_hcd_start(struct usb_hcd *hcd)
 
        spin_unlock_irqrestore(&hsotg->lock, flags);
 
-       dwc2_vbus_supply_init(hsotg);
-
-       return 0;
+       return dwc2_vbus_supply_init(hsotg);
 }
 
 /*
index 7f21747007f17d8a788d5ca0ab8cb5285b0e2e50..bea2e8ec036926f5f7fc1049735c04245e0120c5 100644 (file)
@@ -141,8 +141,10 @@ static int dwc2_pci_probe(struct pci_dev *pci,
                goto err;
 
        glue = devm_kzalloc(dev, sizeof(*glue), GFP_KERNEL);
-       if (!glue)
+       if (!glue) {
+               ret = -ENOMEM;
                goto err;
+       }
 
        ret = platform_device_add(dwc2);
        if (ret) {
index 8796a5ee9bb95fe2e9e0b0455d3ae55614ca9bef..0dedf8a799f454a896677f9e474bdc6397f94412 100644 (file)
@@ -166,7 +166,7 @@ static void dwc3_ep_inc_deq(struct dwc3_ep *dep)
        dwc3_ep_inc_trb(&dep->trb_dequeue);
 }
 
-void dwc3_gadget_del_and_unmap_request(struct dwc3_ep *dep,
+static void dwc3_gadget_del_and_unmap_request(struct dwc3_ep *dep,
                struct dwc3_request *req, int status)
 {
        struct dwc3                     *dwc = dep->dwc;
@@ -1424,7 +1424,7 @@ static int dwc3_gadget_ep_dequeue(struct usb_ep *ep,
                                        dwc->lock);
 
                        if (!r->trb)
-                               goto out1;
+                               goto out0;
 
                        if (r->num_pending_sgs) {
                                struct dwc3_trb *trb;
index 7889bcc0509a31d82129ff801cdf569bfc376281..8b72b192c747688dcc974c0e58324c53c65fe3da 100644 (file)
@@ -221,7 +221,7 @@ static void pn_tx_complete(struct usb_ep *ep, struct usb_request *req)
        netif_wake_queue(dev);
 }
 
-static int pn_net_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t pn_net_xmit(struct sk_buff *skb, struct net_device *dev)
 {
        struct phonet_port *port = netdev_priv(dev);
        struct f_phonet *fp;
index 4c6c08b675b5c70f53c3815a460cd1b90d929678..21307d862af674bfb995577f5046b713304d5ddc 100644 (file)
@@ -73,9 +73,10 @@ static struct ehci_qh *ehci_qh_alloc (struct ehci_hcd *ehci, gfp_t flags)
        if (!qh)
                goto done;
        qh->hw = (struct ehci_qh_hw *)
-               dma_pool_zalloc(ehci->qh_pool, flags, &dma);
+               dma_pool_alloc(ehci->qh_pool, flags, &dma);
        if (!qh->hw)
                goto fail;
+       memset(qh->hw, 0, sizeof *qh->hw);
        qh->qh_dma = dma;
        // INIT_LIST_HEAD (&qh->qh_list);
        INIT_LIST_HEAD (&qh->qtd_list);
index 28e2a338b48153496485bbf29b448f63a404e9e4..e56db44708bccd86ac43a870d51fc039224305c8 100644 (file)
@@ -1287,7 +1287,7 @@ itd_urb_transaction(
                } else {
  alloc_itd:
                        spin_unlock_irqrestore(&ehci->lock, flags);
-                       itd = dma_pool_zalloc(ehci->itd_pool, mem_flags,
+                       itd = dma_pool_alloc(ehci->itd_pool, mem_flags,
                                        &itd_dma);
                        spin_lock_irqsave(&ehci->lock, flags);
                        if (!itd) {
@@ -1297,6 +1297,7 @@ itd_urb_transaction(
                        }
                }
 
+               memset(itd, 0, sizeof(*itd));
                itd->itd_dma = itd_dma;
                itd->frame = NO_FRAME;
                list_add(&itd->itd_list, &sched->td_list);
@@ -2080,7 +2081,7 @@ sitd_urb_transaction(
                } else {
  alloc_sitd:
                        spin_unlock_irqrestore(&ehci->lock, flags);
-                       sitd = dma_pool_zalloc(ehci->sitd_pool, mem_flags,
+                       sitd = dma_pool_alloc(ehci->sitd_pool, mem_flags,
                                        &sitd_dma);
                        spin_lock_irqsave(&ehci->lock, flags);
                        if (!sitd) {
@@ -2090,6 +2091,7 @@ sitd_urb_transaction(
                        }
                }
 
+               memset(sitd, 0, sizeof(*sitd));
                sitd->sitd_dma = sitd_dma;
                sitd->frame = NO_FRAME;
                list_add(&sitd->sitd_list, &iso_sched->td_list);
index 48779c44c361d98db9f25ce3609860d243beea9f..eb494ec547e806e9d5656f072160d58bf68a0d17 100644 (file)
@@ -320,9 +320,11 @@ int xhci_dbc_tty_register_driver(struct xhci_hcd *xhci)
 
 void xhci_dbc_tty_unregister_driver(void)
 {
-       tty_unregister_driver(dbc_tty_driver);
-       put_tty_driver(dbc_tty_driver);
-       dbc_tty_driver = NULL;
+       if (dbc_tty_driver) {
+               tty_unregister_driver(dbc_tty_driver);
+               put_tty_driver(dbc_tty_driver);
+               dbc_tty_driver = NULL;
+       }
 }
 
 static void dbc_rx_push(unsigned long _port)
index 72ebbc908e19f7ea9701ca0e831ee703874d1908..32cd52ca8318b6bcb6a9e12f7dbc5c0b18185672 100644 (file)
@@ -354,7 +354,7 @@ int xhci_find_slot_id_by_port(struct usb_hcd *hcd, struct xhci_hcd *xhci,
 
        slot_id = 0;
        for (i = 0; i < MAX_HC_SLOTS; i++) {
-               if (!xhci->devs[i])
+               if (!xhci->devs[i] || !xhci->devs[i]->udev)
                        continue;
                speed = xhci->devs[i]->udev->speed;
                if (((speed >= USB_SPEED_SUPER) == (hcd->speed >= HCD_USB3))
index f17b7eab66cf9e51edec5b573a60ac909f6dc0b3..85ffda85f8ab39043c5323a3799420d14c98baa8 100644 (file)
@@ -126,7 +126,10 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
        if (pdev->vendor == PCI_VENDOR_ID_AMD && usb_amd_find_chipset_info())
                xhci->quirks |= XHCI_AMD_PLL_FIX;
 
-       if (pdev->vendor == PCI_VENDOR_ID_AMD && pdev->device == 0x43bb)
+       if (pdev->vendor == PCI_VENDOR_ID_AMD &&
+               (pdev->device == 0x15e0 ||
+                pdev->device == 0x15e1 ||
+                pdev->device == 0x43bb))
                xhci->quirks |= XHCI_SUSPEND_DELAY;
 
        if (pdev->vendor == PCI_VENDOR_ID_AMD)
index df327dcc2bac3b934399360ed32b504c7fbeabc1..c1b22fc64e387a3bf7f836729de1ddcd2676d8fc 100644 (file)
@@ -157,6 +157,7 @@ static int xhci_plat_probe(struct platform_device *pdev)
        struct resource         *res;
        struct usb_hcd          *hcd;
        struct clk              *clk;
+       struct clk              *reg_clk;
        int                     ret;
        int                     irq;
 
@@ -226,17 +227,27 @@ static int xhci_plat_probe(struct platform_device *pdev)
        hcd->rsrc_len = resource_size(res);
 
        /*
-        * Not all platforms have a clk so it is not an error if the
-        * clock does not exists.
+        * Not all platforms have clks so it is not an error if the
+        * clock do not exist.
         */
+       reg_clk = devm_clk_get(&pdev->dev, "reg");
+       if (!IS_ERR(reg_clk)) {
+               ret = clk_prepare_enable(reg_clk);
+               if (ret)
+                       goto put_hcd;
+       } else if (PTR_ERR(reg_clk) == -EPROBE_DEFER) {
+               ret = -EPROBE_DEFER;
+               goto put_hcd;
+       }
+
        clk = devm_clk_get(&pdev->dev, NULL);
        if (!IS_ERR(clk)) {
                ret = clk_prepare_enable(clk);
                if (ret)
-                       goto put_hcd;
+                       goto disable_reg_clk;
        } else if (PTR_ERR(clk) == -EPROBE_DEFER) {
                ret = -EPROBE_DEFER;
-               goto put_hcd;
+               goto disable_reg_clk;
        }
 
        xhci = hcd_to_xhci(hcd);
@@ -252,6 +263,7 @@ static int xhci_plat_probe(struct platform_device *pdev)
        device_wakeup_enable(hcd->self.controller);
 
        xhci->clk = clk;
+       xhci->reg_clk = reg_clk;
        xhci->main_hcd = hcd;
        xhci->shared_hcd = __usb_create_hcd(driver, sysdev, &pdev->dev,
                        dev_name(&pdev->dev), hcd);
@@ -320,8 +332,10 @@ static int xhci_plat_probe(struct platform_device *pdev)
        usb_put_hcd(xhci->shared_hcd);
 
 disable_clk:
-       if (!IS_ERR(clk))
-               clk_disable_unprepare(clk);
+       clk_disable_unprepare(clk);
+
+disable_reg_clk:
+       clk_disable_unprepare(reg_clk);
 
 put_hcd:
        usb_put_hcd(hcd);
@@ -338,6 +352,7 @@ static int xhci_plat_remove(struct platform_device *dev)
        struct usb_hcd  *hcd = platform_get_drvdata(dev);
        struct xhci_hcd *xhci = hcd_to_xhci(hcd);
        struct clk *clk = xhci->clk;
+       struct clk *reg_clk = xhci->reg_clk;
 
        xhci->xhc_state |= XHCI_STATE_REMOVING;
 
@@ -347,8 +362,8 @@ static int xhci_plat_remove(struct platform_device *dev)
        usb_remove_hcd(hcd);
        usb_put_hcd(xhci->shared_hcd);
 
-       if (!IS_ERR(clk))
-               clk_disable_unprepare(clk);
+       clk_disable_unprepare(clk);
+       clk_disable_unprepare(reg_clk);
        usb_put_hcd(hcd);
 
        pm_runtime_set_suspended(&dev->dev);
@@ -420,7 +435,6 @@ MODULE_DEVICE_TABLE(acpi, usb_xhci_acpi_match);
 static struct platform_driver usb_xhci_driver = {
        .probe  = xhci_plat_probe,
        .remove = xhci_plat_remove,
-       .shutdown       = usb_hcd_platform_shutdown,
        .driver = {
                .name = "xhci-hcd",
                .pm = &xhci_plat_pm_ops,
index 9b27798ecce5fa3dec8e25efe61b6407b9b1396b..711da3306b14d48b7b85fcb3d4a693a623a504ac 100644 (file)
@@ -3621,6 +3621,7 @@ static void xhci_free_dev(struct usb_hcd *hcd, struct usb_device *udev)
                del_timer_sync(&virt_dev->eps[i].stop_cmd_timer);
        }
        xhci_debugfs_remove_slot(xhci, udev->slot_id);
+       virt_dev->udev = NULL;
        ret = xhci_disable_slot(xhci, udev->slot_id);
        if (ret)
                xhci_free_virt_device(xhci, udev->slot_id);
index 05c909b04f14c544c0ac7a8fba3a8c1bdd3a41f2..6dfc4867dbcf23ec34cecfd0658f93dae242500d 100644 (file)
@@ -1729,8 +1729,9 @@ struct xhci_hcd {
        int             page_shift;
        /* msi-x vectors */
        int             msix_count;
-       /* optional clock */
+       /* optional clocks */
        struct clk              *clk;
+       struct clk              *reg_clk;
        /* data structures */
        struct xhci_device_context_array *dcbaa;
        struct xhci_ring        *cmd_ring;
index 05a679d5e3a2b5487a852330edafd58ccba60b32..6a60bc0490c5270e8434e4d94e36e4120db215f6 100644 (file)
@@ -451,7 +451,6 @@ static int dsps_musb_init(struct musb *musb)
        if (!rev)
                return -ENODEV;
 
-       usb_phy_init(musb->xceiv);
        if (IS_ERR(musb->phy))  {
                musb->phy = NULL;
        } else {
@@ -501,7 +500,6 @@ static int dsps_musb_exit(struct musb *musb)
        struct dsps_glue *glue = dev_get_drvdata(dev->parent);
 
        del_timer_sync(&musb->dev_timer);
-       usb_phy_shutdown(musb->xceiv);
        phy_power_off(musb->phy);
        phy_exit(musb->phy);
        debugfs_remove_recursive(glue->dbgfs_root);
index e564695c6c8da85f33f5e9ba79d3e0a73ba4379d..71c5835ea9cd087893979c62c90f840c5570f136 100644 (file)
@@ -417,7 +417,6 @@ void musb_g_tx(struct musb *musb, u8 epnum)
        req = next_request(musb_ep);
        request = &req->request;
 
-       trace_musb_req_tx(req);
        csr = musb_readw(epio, MUSB_TXCSR);
        musb_dbg(musb, "<== %s, txcsr %04x", musb_ep->end_point.name, csr);
 
@@ -456,6 +455,8 @@ void musb_g_tx(struct musb *musb, u8 epnum)
                u8      is_dma = 0;
                bool    short_packet = false;
 
+               trace_musb_req_tx(req);
+
                if (dma && (csr & MUSB_TXCSR_DMAENAB)) {
                        is_dma = 1;
                        csr |= MUSB_TXCSR_P_WZC_BITS;
index 3a8451a15f7f735acb90886a1ee3521ebbb7b39f..15a42cee0a9c27ad930fd28ae637fb6d1d9262f9 100644 (file)
@@ -990,7 +990,9 @@ static void musb_bulk_nak_timeout(struct musb *musb, struct musb_hw_ep *ep,
                        /* set tx_reinit and schedule the next qh */
                        ep->tx_reinit = 1;
                }
-               musb_start_urb(musb, is_in, next_qh);
+
+               if (next_qh)
+                       musb_start_urb(musb, is_in, next_qh);
        }
 }
 
@@ -2522,8 +2524,11 @@ static int musb_bus_suspend(struct usb_hcd *hcd)
 {
        struct musb     *musb = hcd_to_musb(hcd);
        u8              devctl;
+       int             ret;
 
-       musb_port_suspend(musb, true);
+       ret = musb_port_suspend(musb, true);
+       if (ret)
+               return ret;
 
        if (!is_host_active(musb))
                return 0;
@@ -2754,6 +2759,7 @@ int musb_host_setup(struct musb *musb, int power_budget)
        hcd->self.otg_port = 1;
        musb->xceiv->otg->host = &hcd->self;
        hcd->power_budget = 2 * (power_budget ? : 250);
+       hcd->skip_phy_initialization = 1;
 
        ret = usb_add_hcd(hcd, 0, 0);
        if (ret < 0)
index 72392bbcd0a4b72bfead8dc2c26c7d40ea0947ee..2999845632cefb2fafa17d7da8622c06772dcda5 100644 (file)
@@ -67,7 +67,7 @@ extern void musb_host_rx(struct musb *, u8);
 extern void musb_root_disconnect(struct musb *musb);
 extern void musb_host_resume_root_hub(struct musb *musb);
 extern void musb_host_poke_root_hub(struct musb *musb);
-extern void musb_port_suspend(struct musb *musb, bool do_suspend);
+extern int musb_port_suspend(struct musb *musb, bool do_suspend);
 extern void musb_port_reset(struct musb *musb, bool do_reset);
 extern void musb_host_finish_resume(struct work_struct *work);
 #else
@@ -99,7 +99,10 @@ static inline void musb_root_disconnect(struct musb *musb)   {}
 static inline void musb_host_resume_root_hub(struct musb *musb)        {}
 static inline void musb_host_poll_rh_status(struct musb *musb) {}
 static inline void musb_host_poke_root_hub(struct musb *musb)  {}
-static inline void musb_port_suspend(struct musb *musb, bool do_suspend) {}
+static inline int musb_port_suspend(struct musb *musb, bool do_suspend)
+{
+       return 0;
+}
 static inline void musb_port_reset(struct musb *musb, bool do_reset) {}
 static inline void musb_host_finish_resume(struct work_struct *work) {}
 #endif
index 5165d2b07ade01985d2e9b104552b33d92850f47..2f8dd9826e9481a99e28f16a65be89e1e13a8955 100644 (file)
@@ -48,14 +48,14 @@ void musb_host_finish_resume(struct work_struct *work)
        spin_unlock_irqrestore(&musb->lock, flags);
 }
 
-void musb_port_suspend(struct musb *musb, bool do_suspend)
+int musb_port_suspend(struct musb *musb, bool do_suspend)
 {
        struct usb_otg  *otg = musb->xceiv->otg;
        u8              power;
        void __iomem    *mbase = musb->mregs;
 
        if (!is_host_active(musb))
-               return;
+               return 0;
 
        /* NOTE:  this doesn't necessarily put PHY into low power mode,
         * turning off its clock; that's a function of PHY integration and
@@ -66,16 +66,20 @@ void musb_port_suspend(struct musb *musb, bool do_suspend)
        if (do_suspend) {
                int retries = 10000;
 
-               power &= ~MUSB_POWER_RESUME;
-               power |= MUSB_POWER_SUSPENDM;
-               musb_writeb(mbase, MUSB_POWER, power);
+               if (power & MUSB_POWER_RESUME)
+                       return -EBUSY;
 
-               /* Needed for OPT A tests */
-               power = musb_readb(mbase, MUSB_POWER);
-               while (power & MUSB_POWER_SUSPENDM) {
+               if (!(power & MUSB_POWER_SUSPENDM)) {
+                       power |= MUSB_POWER_SUSPENDM;
+                       musb_writeb(mbase, MUSB_POWER, power);
+
+                       /* Needed for OPT A tests */
                        power = musb_readb(mbase, MUSB_POWER);
-                       if (retries-- < 1)
-                               break;
+                       while (power & MUSB_POWER_SUSPENDM) {
+                               power = musb_readb(mbase, MUSB_POWER);
+                               if (retries-- < 1)
+                                       break;
+                       }
                }
 
                musb_dbg(musb, "Root port suspended, power %02x", power);
@@ -111,6 +115,7 @@ void musb_port_suspend(struct musb *musb, bool do_suspend)
                schedule_delayed_work(&musb->finish_resume_work,
                                      msecs_to_jiffies(USB_RESUME_TIMEOUT));
        }
+       return 0;
 }
 
 void musb_port_reset(struct musb *musb, bool do_reset)
index a646820f5a78f0ae67551ff0b55b1b7e13c5f797..533f127c30ad846f236170f36a61a127ea9db6ab 100644 (file)
@@ -62,6 +62,7 @@ config USB_SERIAL_SIMPLE
                - Fundamental Software dongle.
                - Google USB serial devices
                - HP4x calculators
+               - Libtransistor USB console
                - a number of Motorola phones
                - Motorola Tetra devices
                - Novatel Wireless GPS receivers
index de1e759dd51220880ca364f9e1920af360f5e88d..eb6c26cbe5792b0e535c77b9e2e245b700071458 100644 (file)
@@ -214,6 +214,7 @@ static const struct usb_device_id id_table[] = {
        { USB_DEVICE(0x3195, 0xF190) }, /* Link Instruments MSO-19 */
        { USB_DEVICE(0x3195, 0xF280) }, /* Link Instruments MSO-28 */
        { USB_DEVICE(0x3195, 0xF281) }, /* Link Instruments MSO-28 */
+       { USB_DEVICE(0x3923, 0x7A0B) }, /* National Instruments USB Serial Console */
        { USB_DEVICE(0x413C, 0x9500) }, /* DW700 GPS USB interface */
        { } /* Terminating Entry */
 };
index 87202ad5a50dfb4d8afe865ce57a7b5fbe0d6b8b..7ea221d42dbadbbef33bb1e02ae35775d5765d62 100644 (file)
@@ -1898,7 +1898,8 @@ static int ftdi_8u2232c_probe(struct usb_serial *serial)
                return ftdi_jtag_probe(serial);
 
        if (udev->product &&
-               (!strcmp(udev->product, "BeagleBone/XDS100V2") ||
+               (!strcmp(udev->product, "Arrow USB Blaster") ||
+                !strcmp(udev->product, "BeagleBone/XDS100V2") ||
                 !strcmp(udev->product, "SNAP Connect E10")))
                return ftdi_jtag_probe(serial);
 
index c3f252283ab9df9a5132f7d297bd79bcbffad801..2058852a87faf4b946b0b4303715de0a0147ecfb 100644 (file)
@@ -233,6 +233,8 @@ static void option_instat_callback(struct urb *urb);
 /* These Quectel products use Qualcomm's vendor ID */
 #define QUECTEL_PRODUCT_UC20                   0x9003
 #define QUECTEL_PRODUCT_UC15                   0x9090
+/* These u-blox products use Qualcomm's vendor ID */
+#define UBLOX_PRODUCT_R410M                    0x90b2
 /* These Yuga products use Qualcomm's vendor ID */
 #define YUGA_PRODUCT_CLM920_NC5                        0x9625
 
@@ -1065,6 +1067,9 @@ static const struct usb_device_id option_ids[] = {
        /* Yuga products use Qualcomm vendor ID */
        { USB_DEVICE(QUALCOMM_VENDOR_ID, YUGA_PRODUCT_CLM920_NC5),
          .driver_info = RSVD(1) | RSVD(4) },
+       /* u-blox products using Qualcomm vendor ID */
+       { USB_DEVICE(QUALCOMM_VENDOR_ID, UBLOX_PRODUCT_R410M),
+         .driver_info = RSVD(1) | RSVD(3) },
        /* Quectel products using Quectel vendor ID */
        { USB_DEVICE(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC21),
          .driver_info = RSVD(4) },
index 4ef79e29cb26031f4ccf100e34de453d41c031d4..40864c2bd9dc0ad73e8c5dd0701f621c4302712c 100644 (file)
@@ -63,6 +63,11 @@ DEVICE(flashloader, FLASHLOADER_IDS);
                                        0x01) }
 DEVICE(google, GOOGLE_IDS);
 
+/* Libtransistor USB console */
+#define LIBTRANSISTOR_IDS()                    \
+       { USB_DEVICE(0x1209, 0x8b00) }
+DEVICE(libtransistor, LIBTRANSISTOR_IDS);
+
 /* ViVOpay USB Serial Driver */
 #define VIVOPAY_IDS()                  \
        { USB_DEVICE(0x1d5f, 0x1004) }  /* ViVOpay 8800 */
@@ -110,6 +115,7 @@ static struct usb_serial_driver * const serial_drivers[] = {
        &funsoft_device,
        &flashloader_device,
        &google_device,
+       &libtransistor_device,
        &vivopay_device,
        &moto_modem_device,
        &motorola_tetra_device,
@@ -126,6 +132,7 @@ static const struct usb_device_id id_table[] = {
        FUNSOFT_IDS(),
        FLASHLOADER_IDS(),
        GOOGLE_IDS(),
+       LIBTRANSISTOR_IDS(),
        VIVOPAY_IDS(),
        MOTO_IDS(),
        MOTOROLA_TETRA_IDS(),
index f5373ed2cd455dbb0e58fa81aba7338b6fbf22f0..8ddbecc25d89ac6fbb2bc1a385b569d00f2737b2 100644 (file)
@@ -335,47 +335,48 @@ static int palm_os_3_probe(struct usb_serial *serial,
                goto exit;
        }
 
-       if (retval == sizeof(*connection_info)) {
-                       connection_info = (struct visor_connection_info *)
-                                                       transfer_buffer;
-
-               num_ports = le16_to_cpu(connection_info->num_ports);
-               for (i = 0; i < num_ports; ++i) {
-                       switch (
-                          connection_info->connections[i].port_function_id) {
-                       case VISOR_FUNCTION_GENERIC:
-                               string = "Generic";
-                               break;
-                       case VISOR_FUNCTION_DEBUGGER:
-                               string = "Debugger";
-                               break;
-                       case VISOR_FUNCTION_HOTSYNC:
-                               string = "HotSync";
-                               break;
-                       case VISOR_FUNCTION_CONSOLE:
-                               string = "Console";
-                               break;
-                       case VISOR_FUNCTION_REMOTE_FILE_SYS:
-                               string = "Remote File System";
-                               break;
-                       default:
-                               string = "unknown";
-                               break;
-                       }
-                       dev_info(dev, "%s: port %d, is for %s use\n",
-                               serial->type->description,
-                               connection_info->connections[i].port, string);
-               }
+       if (retval != sizeof(*connection_info)) {
+               dev_err(dev, "Invalid connection information received from device\n");
+               retval = -ENODEV;
+               goto exit;
        }
-       /*
-       * Handle devices that report invalid stuff here.
-       */
+
+       connection_info = (struct visor_connection_info *)transfer_buffer;
+
+       num_ports = le16_to_cpu(connection_info->num_ports);
+
+       /* Handle devices that report invalid stuff here. */
        if (num_ports == 0 || num_ports > 2) {
                dev_warn(dev, "%s: No valid connect info available\n",
                        serial->type->description);
                num_ports = 2;
        }
 
+       for (i = 0; i < num_ports; ++i) {
+               switch (connection_info->connections[i].port_function_id) {
+               case VISOR_FUNCTION_GENERIC:
+                       string = "Generic";
+                       break;
+               case VISOR_FUNCTION_DEBUGGER:
+                       string = "Debugger";
+                       break;
+               case VISOR_FUNCTION_HOTSYNC:
+                       string = "HotSync";
+                       break;
+               case VISOR_FUNCTION_CONSOLE:
+                       string = "Console";
+                       break;
+               case VISOR_FUNCTION_REMOTE_FILE_SYS:
+                       string = "Remote File System";
+                       break;
+               default:
+                       string = "unknown";
+                       break;
+               }
+               dev_info(dev, "%s: port %d, is for %s use\n",
+                       serial->type->description,
+                       connection_info->connections[i].port, string);
+       }
        dev_info(dev, "%s: Number of ports: %d\n", serial->type->description,
                num_ports);
 
index 677d12138dbd99c4c40556b8c9882e6e8c6de46b..ded49e3bf2b02506b34b8d0b4272f0eec1b8d9d4 100644 (file)
@@ -3725,6 +3725,7 @@ void tcpm_unregister_port(struct tcpm_port *port)
        for (i = 0; i < ARRAY_SIZE(port->port_altmode); i++)
                typec_unregister_altmode(port->port_altmode[i]);
        typec_unregister_port(port->typec_port);
+       usb_role_switch_put(port->role_sw);
        tcpm_debugfs_exit(port);
        destroy_workqueue(port->wq);
 }
index 8b8406867c02f1500917ae570369282c5f9b1328..4b4c8d271b276f425c6b5a60658dcf6f7574cce9 100644 (file)
@@ -73,6 +73,7 @@ struct tps6598x {
        struct device *dev;
        struct regmap *regmap;
        struct mutex lock; /* device lock */
+       u8 i2c_protocol:1;
 
        struct typec_port *port;
        struct typec_partner *partner;
@@ -80,19 +81,39 @@ struct tps6598x {
        struct typec_capability typec_cap;
 };
 
+static int
+tps6598x_block_read(struct tps6598x *tps, u8 reg, void *val, size_t len)
+{
+       u8 data[len + 1];
+       int ret;
+
+       if (!tps->i2c_protocol)
+               return regmap_raw_read(tps->regmap, reg, val, len);
+
+       ret = regmap_raw_read(tps->regmap, reg, data, sizeof(data));
+       if (ret)
+               return ret;
+
+       if (data[0] < len)
+               return -EIO;
+
+       memcpy(val, &data[1], len);
+       return 0;
+}
+
 static inline int tps6598x_read16(struct tps6598x *tps, u8 reg, u16 *val)
 {
-       return regmap_raw_read(tps->regmap, reg, val, sizeof(u16));
+       return tps6598x_block_read(tps, reg, val, sizeof(u16));
 }
 
 static inline int tps6598x_read32(struct tps6598x *tps, u8 reg, u32 *val)
 {
-       return regmap_raw_read(tps->regmap, reg, val, sizeof(u32));
+       return tps6598x_block_read(tps, reg, val, sizeof(u32));
 }
 
 static inline int tps6598x_read64(struct tps6598x *tps, u8 reg, u64 *val)
 {
-       return regmap_raw_read(tps->regmap, reg, val, sizeof(u64));
+       return tps6598x_block_read(tps, reg, val, sizeof(u64));
 }
 
 static inline int tps6598x_write16(struct tps6598x *tps, u8 reg, u16 val)
@@ -121,8 +142,8 @@ static int tps6598x_read_partner_identity(struct tps6598x *tps)
        struct tps6598x_rx_identity_reg id;
        int ret;
 
-       ret = regmap_raw_read(tps->regmap, TPS_REG_RX_IDENTITY_SOP,
-                             &id, sizeof(id));
+       ret = tps6598x_block_read(tps, TPS_REG_RX_IDENTITY_SOP,
+                                 &id, sizeof(id));
        if (ret)
                return ret;
 
@@ -224,13 +245,13 @@ static int tps6598x_exec_cmd(struct tps6598x *tps, const char *cmd,
        } while (val);
 
        if (out_len) {
-               ret = regmap_raw_read(tps->regmap, TPS_REG_DATA1,
-                                     out_data, out_len);
+               ret = tps6598x_block_read(tps, TPS_REG_DATA1,
+                                         out_data, out_len);
                if (ret)
                        return ret;
                val = out_data[0];
        } else {
-               ret = regmap_read(tps->regmap, TPS_REG_DATA1, &val);
+               ret = tps6598x_block_read(tps, TPS_REG_DATA1, &val, sizeof(u8));
                if (ret)
                        return ret;
        }
@@ -385,6 +406,16 @@ static int tps6598x_probe(struct i2c_client *client)
        if (!vid)
                return -ENODEV;
 
+       /*
+        * Checking can the adapter handle SMBus protocol. If it can not, the
+        * driver needs to take care of block reads separately.
+        *
+        * FIXME: Testing with I2C_FUNC_I2C. regmap-i2c uses I2C protocol
+        * unconditionally if the adapter has I2C_FUNC_I2C set.
+        */
+       if (i2c_check_functionality(client->adapter, I2C_FUNC_I2C))
+               tps->i2c_protocol = true;
+
        ret = tps6598x_read32(tps, TPS_REG_STATUS, &status);
        if (ret < 0)
                return ret;
index b57891c1fd31a7e70b14ada774f824d30b36196e..7afbea5122077b3dd0cbe217ad7c839837f499b4 100644 (file)
@@ -5,6 +5,6 @@ obj-$(CONFIG_TYPEC_UCSI)        += typec_ucsi.o
 
 typec_ucsi-y                   := ucsi.o
 
-typec_ucsi-$(CONFIG_FTRACE)    += trace.o
+typec_ucsi-$(CONFIG_TRACING)   += trace.o
 
 obj-$(CONFIG_UCSI_ACPI)                += ucsi_acpi.o
index bf0977fbd100a72b2384d52a91c114dbbf694813..bd5cca5632b395def6384ec233d8ba5926e81c93 100644 (file)
@@ -28,7 +28,7 @@
  * difficult to estimate the time it takes for the system to process the command
  * before it is actually passed to the PPM.
  */
-#define UCSI_TIMEOUT_MS                1000
+#define UCSI_TIMEOUT_MS                5000
 
 /*
  * UCSI_SWAP_TIMEOUT_MS - Timeout for role swap requests
index 14a72357800ac2b10d067da9b36b7180ba5f70df..35618ceb279134bc02c6d8ff83671f55b25c82a8 100644 (file)
@@ -73,6 +73,7 @@ struct bus_id_priv {
        struct stub_device *sdev;
        struct usb_device *udev;
        char shutdown_busid;
+       spinlock_t busid_lock;
 };
 
 /* stub_priv is allocated from stub_priv_cache */
@@ -83,6 +84,7 @@ extern struct usb_device_driver stub_driver;
 
 /* stub_main.c */
 struct bus_id_priv *get_busid_priv(const char *busid);
+void put_busid_priv(struct bus_id_priv *bid);
 int del_match_busid(char *busid);
 void stub_device_cleanup_urbs(struct stub_device *sdev);
 
index dd8ef36ab10ec7d612bdb2358017f142a43906d8..c0d6ff1baa721754d42d1cae3b076685dcd86fe2 100644 (file)
@@ -300,9 +300,9 @@ static int stub_probe(struct usb_device *udev)
        struct stub_device *sdev = NULL;
        const char *udev_busid = dev_name(&udev->dev);
        struct bus_id_priv *busid_priv;
-       int rc;
+       int rc = 0;
 
-       dev_dbg(&udev->dev, "Enter\n");
+       dev_dbg(&udev->dev, "Enter probe\n");
 
        /* check we should claim or not by busid_table */
        busid_priv = get_busid_priv(udev_busid);
@@ -317,13 +317,15 @@ static int stub_probe(struct usb_device *udev)
                 * other matched drivers by the driver core.
                 * See driver_probe_device() in driver/base/dd.c
                 */
-               return -ENODEV;
+               rc = -ENODEV;
+               goto call_put_busid_priv;
        }
 
        if (udev->descriptor.bDeviceClass == USB_CLASS_HUB) {
                dev_dbg(&udev->dev, "%s is a usb hub device... skip!\n",
                         udev_busid);
-               return -ENODEV;
+               rc = -ENODEV;
+               goto call_put_busid_priv;
        }
 
        if (!strcmp(udev->bus->bus_name, "vhci_hcd")) {
@@ -331,13 +333,16 @@ static int stub_probe(struct usb_device *udev)
                        "%s is attached on vhci_hcd... skip!\n",
                        udev_busid);
 
-               return -ENODEV;
+               rc = -ENODEV;
+               goto call_put_busid_priv;
        }
 
        /* ok, this is my device */
        sdev = stub_device_alloc(udev);
-       if (!sdev)
-               return -ENOMEM;
+       if (!sdev) {
+               rc = -ENOMEM;
+               goto call_put_busid_priv;
+       }
 
        dev_info(&udev->dev,
                "usbip-host: register new device (bus %u dev %u)\n",
@@ -369,7 +374,9 @@ static int stub_probe(struct usb_device *udev)
        }
        busid_priv->status = STUB_BUSID_ALLOC;
 
-       return 0;
+       rc = 0;
+       goto call_put_busid_priv;
+
 err_files:
        usb_hub_release_port(udev->parent, udev->portnum,
                             (struct usb_dev_state *) udev);
@@ -379,6 +386,9 @@ static int stub_probe(struct usb_device *udev)
 
        busid_priv->sdev = NULL;
        stub_device_free(sdev);
+
+call_put_busid_priv:
+       put_busid_priv(busid_priv);
        return rc;
 }
 
@@ -404,7 +414,7 @@ static void stub_disconnect(struct usb_device *udev)
        struct bus_id_priv *busid_priv;
        int rc;
 
-       dev_dbg(&udev->dev, "Enter\n");
+       dev_dbg(&udev->dev, "Enter disconnect\n");
 
        busid_priv = get_busid_priv(udev_busid);
        if (!busid_priv) {
@@ -417,7 +427,7 @@ static void stub_disconnect(struct usb_device *udev)
        /* get stub_device */
        if (!sdev) {
                dev_err(&udev->dev, "could not get device");
-               return;
+               goto call_put_busid_priv;
        }
 
        dev_set_drvdata(&udev->dev, NULL);
@@ -432,12 +442,12 @@ static void stub_disconnect(struct usb_device *udev)
                                  (struct usb_dev_state *) udev);
        if (rc) {
                dev_dbg(&udev->dev, "unable to release port\n");
-               return;
+               goto call_put_busid_priv;
        }
 
        /* If usb reset is called from event handler */
        if (usbip_in_eh(current))
-               return;
+               goto call_put_busid_priv;
 
        /* shutdown the current connection */
        shutdown_busid(busid_priv);
@@ -448,12 +458,11 @@ static void stub_disconnect(struct usb_device *udev)
        busid_priv->sdev = NULL;
        stub_device_free(sdev);
 
-       if (busid_priv->status == STUB_BUSID_ALLOC) {
+       if (busid_priv->status == STUB_BUSID_ALLOC)
                busid_priv->status = STUB_BUSID_ADDED;
-       } else {
-               busid_priv->status = STUB_BUSID_OTHER;
-               del_match_busid((char *)udev_busid);
-       }
+
+call_put_busid_priv:
+       put_busid_priv(busid_priv);
 }
 
 #ifdef CONFIG_PM
index c31c8402a0c55ddd2f4b463ebabd28be6438f490..bf8a5feb0ee937a35ccd7a478e7c3dd01770b4e1 100644 (file)
@@ -14,6 +14,7 @@
 #define DRIVER_DESC "USB/IP Host Driver"
 
 struct kmem_cache *stub_priv_cache;
+
 /*
  * busid_tables defines matching busids that usbip can grab. A user can change
  * dynamically what device is locally used and what device is exported to a
@@ -25,6 +26,8 @@ static spinlock_t busid_table_lock;
 
 static void init_busid_table(void)
 {
+       int i;
+
        /*
         * This also sets the bus_table[i].status to
         * STUB_BUSID_OTHER, which is 0.
@@ -32,6 +35,9 @@ static void init_busid_table(void)
        memset(busid_table, 0, sizeof(busid_table));
 
        spin_lock_init(&busid_table_lock);
+
+       for (i = 0; i < MAX_BUSID; i++)
+               spin_lock_init(&busid_table[i].busid_lock);
 }
 
 /*
@@ -43,15 +49,20 @@ static int get_busid_idx(const char *busid)
        int i;
        int idx = -1;
 
-       for (i = 0; i < MAX_BUSID; i++)
+       for (i = 0; i < MAX_BUSID; i++) {
+               spin_lock(&busid_table[i].busid_lock);
                if (busid_table[i].name[0])
                        if (!strncmp(busid_table[i].name, busid, BUSID_SIZE)) {
                                idx = i;
+                               spin_unlock(&busid_table[i].busid_lock);
                                break;
                        }
+               spin_unlock(&busid_table[i].busid_lock);
+       }
        return idx;
 }
 
+/* Returns holding busid_lock. Should call put_busid_priv() to unlock */
 struct bus_id_priv *get_busid_priv(const char *busid)
 {
        int idx;
@@ -59,13 +70,22 @@ struct bus_id_priv *get_busid_priv(const char *busid)
 
        spin_lock(&busid_table_lock);
        idx = get_busid_idx(busid);
-       if (idx >= 0)
+       if (idx >= 0) {
                bid = &(busid_table[idx]);
+               /* get busid_lock before returning */
+               spin_lock(&bid->busid_lock);
+       }
        spin_unlock(&busid_table_lock);
 
        return bid;
 }
 
+void put_busid_priv(struct bus_id_priv *bid)
+{
+       if (bid)
+               spin_unlock(&bid->busid_lock);
+}
+
 static int add_match_busid(char *busid)
 {
        int i;
@@ -78,15 +98,19 @@ static int add_match_busid(char *busid)
                goto out;
        }
 
-       for (i = 0; i < MAX_BUSID; i++)
+       for (i = 0; i < MAX_BUSID; i++) {
+               spin_lock(&busid_table[i].busid_lock);
                if (!busid_table[i].name[0]) {
                        strlcpy(busid_table[i].name, busid, BUSID_SIZE);
                        if ((busid_table[i].status != STUB_BUSID_ALLOC) &&
                            (busid_table[i].status != STUB_BUSID_REMOV))
                                busid_table[i].status = STUB_BUSID_ADDED;
                        ret = 0;
+                       spin_unlock(&busid_table[i].busid_lock);
                        break;
                }
+               spin_unlock(&busid_table[i].busid_lock);
+       }
 
 out:
        spin_unlock(&busid_table_lock);
@@ -107,6 +131,8 @@ int del_match_busid(char *busid)
        /* found */
        ret = 0;
 
+       spin_lock(&busid_table[idx].busid_lock);
+
        if (busid_table[idx].status == STUB_BUSID_OTHER)
                memset(busid_table[idx].name, 0, BUSID_SIZE);
 
@@ -114,6 +140,7 @@ int del_match_busid(char *busid)
            (busid_table[idx].status != STUB_BUSID_ADDED))
                busid_table[idx].status = STUB_BUSID_REMOV;
 
+       spin_unlock(&busid_table[idx].busid_lock);
 out:
        spin_unlock(&busid_table_lock);
 
@@ -126,9 +153,12 @@ static ssize_t match_busid_show(struct device_driver *drv, char *buf)
        char *out = buf;
 
        spin_lock(&busid_table_lock);
-       for (i = 0; i < MAX_BUSID; i++)
+       for (i = 0; i < MAX_BUSID; i++) {
+               spin_lock(&busid_table[i].busid_lock);
                if (busid_table[i].name[0])
                        out += sprintf(out, "%s ", busid_table[i].name);
+               spin_unlock(&busid_table[i].busid_lock);
+       }
        spin_unlock(&busid_table_lock);
        out += sprintf(out, "\n");
 
@@ -169,6 +199,51 @@ static ssize_t match_busid_store(struct device_driver *dev, const char *buf,
 }
 static DRIVER_ATTR_RW(match_busid);
 
+static int do_rebind(char *busid, struct bus_id_priv *busid_priv)
+{
+       int ret;
+
+       /* device_attach() callers should hold parent lock for USB */
+       if (busid_priv->udev->dev.parent)
+               device_lock(busid_priv->udev->dev.parent);
+       ret = device_attach(&busid_priv->udev->dev);
+       if (busid_priv->udev->dev.parent)
+               device_unlock(busid_priv->udev->dev.parent);
+       if (ret < 0) {
+               dev_err(&busid_priv->udev->dev, "rebind failed\n");
+               return ret;
+       }
+       return 0;
+}
+
+static void stub_device_rebind(void)
+{
+#if IS_MODULE(CONFIG_USBIP_HOST)
+       struct bus_id_priv *busid_priv;
+       int i;
+
+       /* update status to STUB_BUSID_OTHER so probe ignores the device */
+       spin_lock(&busid_table_lock);
+       for (i = 0; i < MAX_BUSID; i++) {
+               if (busid_table[i].name[0] &&
+                   busid_table[i].shutdown_busid) {
+                       busid_priv = &(busid_table[i]);
+                       busid_priv->status = STUB_BUSID_OTHER;
+               }
+       }
+       spin_unlock(&busid_table_lock);
+
+       /* now run rebind - no need to hold locks. driver files are removed */
+       for (i = 0; i < MAX_BUSID; i++) {
+               if (busid_table[i].name[0] &&
+                   busid_table[i].shutdown_busid) {
+                       busid_priv = &(busid_table[i]);
+                       do_rebind(busid_table[i].name, busid_priv);
+               }
+       }
+#endif
+}
+
 static ssize_t rebind_store(struct device_driver *dev, const char *buf,
                                 size_t count)
 {
@@ -186,11 +261,17 @@ static ssize_t rebind_store(struct device_driver *dev, const char *buf,
        if (!bid)
                return -ENODEV;
 
-       ret = device_attach(&bid->udev->dev);
-       if (ret < 0) {
-               dev_err(&bid->udev->dev, "rebind failed\n");
+       /* mark the device for deletion so probe ignores it during rescan */
+       bid->status = STUB_BUSID_OTHER;
+       /* release the busid lock */
+       put_busid_priv(bid);
+
+       ret = do_rebind((char *) buf, bid);
+       if (ret < 0)
                return ret;
-       }
+
+       /* delete device from busid_table */
+       del_match_busid((char *) buf);
 
        return count;
 }
@@ -312,6 +393,9 @@ static void __exit usbip_host_exit(void)
         */
        usb_deregister_device_driver(&stub_driver);
 
+       /* initiate scan to attach devices */
+       stub_device_rebind();
+
        kmem_cache_destroy(stub_priv_cache);
 }
 
index 473fb8a872893caa3494fe9a4a85b60600bdfd28..bf8afe9b5883850325fb70fc3873bff20237040b 100644 (file)
@@ -243,7 +243,7 @@ enum usbip_side {
 #define        VUDC_EVENT_ERROR_USB    (USBIP_EH_SHUTDOWN | USBIP_EH_UNUSABLE)
 #define        VUDC_EVENT_ERROR_MALLOC (USBIP_EH_SHUTDOWN | USBIP_EH_UNUSABLE)
 
-#define        VDEV_EVENT_REMOVED      (USBIP_EH_SHUTDOWN | USBIP_EH_BYE)
+#define        VDEV_EVENT_REMOVED (USBIP_EH_SHUTDOWN | USBIP_EH_RESET | USBIP_EH_BYE)
 #define        VDEV_EVENT_DOWN         (USBIP_EH_SHUTDOWN | USBIP_EH_RESET)
 #define        VDEV_EVENT_ERROR_TCP    (USBIP_EH_SHUTDOWN | USBIP_EH_RESET)
 #define        VDEV_EVENT_ERROR_MALLOC (USBIP_EH_SHUTDOWN | USBIP_EH_UNUSABLE)
index 5b4c0864ad92ae6ac40e19b5b10f30667bc09207..5d88917c963149b0ba1daa02cc4da4e84ca3c106 100644 (file)
@@ -91,10 +91,6 @@ static void event_handler(struct work_struct *work)
                        unset_event(ud, USBIP_EH_UNUSABLE);
                }
 
-               /* Stop the error handler. */
-               if (ud->event & USBIP_EH_BYE)
-                       usbip_dbg_eh("removed %p\n", ud);
-
                wake_up(&ud->eh_waitq);
        }
 }
index 20e3d4609583848f8a14271d5c4bf3ffb8776123..d11f3f8dad4045e9c51bce1789b9473b60237f61 100644 (file)
@@ -354,6 +354,8 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
                usbip_dbg_vhci_rh(" ClearHubFeature\n");
                break;
        case ClearPortFeature:
+               if (rhport < 0)
+                       goto error;
                switch (wValue) {
                case USB_PORT_FEAT_SUSPEND:
                        if (hcd->speed == HCD_USB3) {
@@ -511,11 +513,16 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
                                goto error;
                        }
 
+                       if (rhport < 0)
+                               goto error;
+
                        vhci_hcd->port_status[rhport] |= USB_PORT_STAT_SUSPEND;
                        break;
                case USB_PORT_FEAT_POWER:
                        usbip_dbg_vhci_rh(
                                " SetPortFeature: USB_PORT_FEAT_POWER\n");
+                       if (rhport < 0)
+                               goto error;
                        if (hcd->speed == HCD_USB3)
                                vhci_hcd->port_status[rhport] |= USB_SS_PORT_STAT_POWER;
                        else
@@ -524,6 +531,8 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
                case USB_PORT_FEAT_BH_PORT_RESET:
                        usbip_dbg_vhci_rh(
                                " SetPortFeature: USB_PORT_FEAT_BH_PORT_RESET\n");
+                       if (rhport < 0)
+                               goto error;
                        /* Applicable only for USB3.0 hub */
                        if (hcd->speed != HCD_USB3) {
                                pr_err("USB_PORT_FEAT_BH_PORT_RESET req not "
@@ -534,6 +543,8 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
                case USB_PORT_FEAT_RESET:
                        usbip_dbg_vhci_rh(
                                " SetPortFeature: USB_PORT_FEAT_RESET\n");
+                       if (rhport < 0)
+                               goto error;
                        /* if it's already enabled, disable */
                        if (hcd->speed == HCD_USB3) {
                                vhci_hcd->port_status[rhport] = 0;
@@ -554,6 +565,8 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
                default:
                        usbip_dbg_vhci_rh(" SetPortFeature: default %d\n",
                                          wValue);
+                       if (rhport < 0)
+                               goto error;
                        if (hcd->speed == HCD_USB3) {
                                if ((vhci_hcd->port_status[rhport] &
                                     USB_SS_PORT_STAT_POWER) != 0) {
index bbf38befefb28d9153c94b01eaba1f2a6a785279..c4b49fca487154be53ba2637d19284b4fab8c277 100644 (file)
@@ -46,8 +46,10 @@ MODULE_PARM_DESC(experimental_zcopytx, "Enable Zero Copy TX;"
 #define VHOST_NET_WEIGHT 0x80000
 
 /* Max number of packets transferred before requeueing the job.
- * Using this limit prevents one virtqueue from starving rx. */
-#define VHOST_NET_PKT_WEIGHT(vq) ((vq)->num * 2)
+ * Using this limit prevents one virtqueue from starving others with small
+ * pkts.
+ */
+#define VHOST_NET_PKT_WEIGHT 256
 
 /* MAX number of TX used buffers for outstanding zerocopy */
 #define VHOST_MAX_PEND 128
@@ -587,7 +589,7 @@ static void handle_tx(struct vhost_net *net)
                        vhost_zerocopy_signal_used(net, vq);
                vhost_net_tx_packet(net);
                if (unlikely(total_len >= VHOST_NET_WEIGHT) ||
-                   unlikely(++sent_pkts >= VHOST_NET_PKT_WEIGHT(vq))) {
+                   unlikely(++sent_pkts >= VHOST_NET_PKT_WEIGHT)) {
                        vhost_poll_queue(&vq->poll);
                        break;
                }
@@ -769,6 +771,7 @@ static void handle_rx(struct vhost_net *net)
        struct socket *sock;
        struct iov_iter fixup;
        __virtio16 num_buffers;
+       int recv_pkts = 0;
 
        mutex_lock_nested(&vq->mutex, 0);
        sock = vq->private_data;
@@ -872,7 +875,8 @@ static void handle_rx(struct vhost_net *net)
                if (unlikely(vq_log))
                        vhost_log_write(vq, vq_log, log, vhost_len);
                total_len += vhost_len;
-               if (unlikely(total_len >= VHOST_NET_WEIGHT)) {
+               if (unlikely(total_len >= VHOST_NET_WEIGHT) ||
+                   unlikely(++recv_pkts >= VHOST_NET_PKT_WEIGHT)) {
                        vhost_poll_queue(&vq->poll);
                        goto out;
                }
index 190dbf8cfcb564f3c36c5504472b4dfec8070f51..2f3856a95856be43bcbfc6a4c3865f7099045368 100644 (file)
@@ -114,7 +114,7 @@ static void vbg_guest_mappings_init(struct vbg_dev *gdev)
        }
 
 out:
-       kfree(req);
+       vbg_req_free(req, sizeof(*req));
        kfree(pages);
 }
 
@@ -144,7 +144,7 @@ static void vbg_guest_mappings_exit(struct vbg_dev *gdev)
 
        rc = vbg_req_perform(gdev, req);
 
-       kfree(req);
+       vbg_req_free(req, sizeof(*req));
 
        if (rc < 0) {
                vbg_err("%s error: %d\n", __func__, rc);
@@ -214,8 +214,8 @@ static int vbg_report_guest_info(struct vbg_dev *gdev)
        ret = vbg_status_code_to_errno(rc);
 
 out_free:
-       kfree(req2);
-       kfree(req1);
+       vbg_req_free(req2, sizeof(*req2));
+       vbg_req_free(req1, sizeof(*req1));
        return ret;
 }
 
@@ -245,7 +245,7 @@ static int vbg_report_driver_status(struct vbg_dev *gdev, bool active)
        if (rc == VERR_NOT_IMPLEMENTED) /* Compatibility with older hosts. */
                rc = VINF_SUCCESS;
 
-       kfree(req);
+       vbg_req_free(req, sizeof(*req));
 
        return vbg_status_code_to_errno(rc);
 }
@@ -431,7 +431,7 @@ static int vbg_heartbeat_host_config(struct vbg_dev *gdev, bool enabled)
        rc = vbg_req_perform(gdev, req);
        do_div(req->interval_ns, 1000000); /* ns -> ms */
        gdev->heartbeat_interval_ms = req->interval_ns;
-       kfree(req);
+       vbg_req_free(req, sizeof(*req));
 
        return vbg_status_code_to_errno(rc);
 }
@@ -454,12 +454,6 @@ static int vbg_heartbeat_init(struct vbg_dev *gdev)
        if (ret < 0)
                return ret;
 
-       /*
-        * Preallocate the request to use it from the timer callback because:
-        *    1) on Windows vbg_req_alloc must be called at IRQL <= APC_LEVEL
-        *       and the timer callback runs at DISPATCH_LEVEL;
-        *    2) avoid repeated allocations.
-        */
        gdev->guest_heartbeat_req = vbg_req_alloc(
                                        sizeof(*gdev->guest_heartbeat_req),
                                        VMMDEVREQ_GUEST_HEARTBEAT);
@@ -481,8 +475,8 @@ static void vbg_heartbeat_exit(struct vbg_dev *gdev)
 {
        del_timer_sync(&gdev->heartbeat_timer);
        vbg_heartbeat_host_config(gdev, false);
-       kfree(gdev->guest_heartbeat_req);
-
+       vbg_req_free(gdev->guest_heartbeat_req,
+                    sizeof(*gdev->guest_heartbeat_req));
 }
 
 /**
@@ -543,7 +537,7 @@ static int vbg_reset_host_event_filter(struct vbg_dev *gdev,
        if (rc < 0)
                vbg_err("%s error, rc: %d\n", __func__, rc);
 
-       kfree(req);
+       vbg_req_free(req, sizeof(*req));
        return vbg_status_code_to_errno(rc);
 }
 
@@ -617,7 +611,7 @@ static int vbg_set_session_event_filter(struct vbg_dev *gdev,
 
 out:
        mutex_unlock(&gdev->session_mutex);
-       kfree(req);
+       vbg_req_free(req, sizeof(*req));
 
        return ret;
 }
@@ -642,7 +636,7 @@ static int vbg_reset_host_capabilities(struct vbg_dev *gdev)
        if (rc < 0)
                vbg_err("%s error, rc: %d\n", __func__, rc);
 
-       kfree(req);
+       vbg_req_free(req, sizeof(*req));
        return vbg_status_code_to_errno(rc);
 }
 
@@ -712,7 +706,7 @@ static int vbg_set_session_capabilities(struct vbg_dev *gdev,
 
 out:
        mutex_unlock(&gdev->session_mutex);
-       kfree(req);
+       vbg_req_free(req, sizeof(*req));
 
        return ret;
 }
@@ -733,8 +727,10 @@ static int vbg_query_host_version(struct vbg_dev *gdev)
 
        rc = vbg_req_perform(gdev, req);
        ret = vbg_status_code_to_errno(rc);
-       if (ret)
+       if (ret) {
+               vbg_err("%s error: %d\n", __func__, rc);
                goto out;
+       }
 
        snprintf(gdev->host_version, sizeof(gdev->host_version), "%u.%u.%ur%u",
                 req->major, req->minor, req->build, req->revision);
@@ -749,7 +745,7 @@ static int vbg_query_host_version(struct vbg_dev *gdev)
        }
 
 out:
-       kfree(req);
+       vbg_req_free(req, sizeof(*req));
        return ret;
 }
 
@@ -847,11 +843,16 @@ int vbg_core_init(struct vbg_dev *gdev, u32 fixed_events)
        return 0;
 
 err_free_reqs:
-       kfree(gdev->mouse_status_req);
-       kfree(gdev->ack_events_req);
-       kfree(gdev->cancel_req);
-       kfree(gdev->mem_balloon.change_req);
-       kfree(gdev->mem_balloon.get_req);
+       vbg_req_free(gdev->mouse_status_req,
+                    sizeof(*gdev->mouse_status_req));
+       vbg_req_free(gdev->ack_events_req,
+                    sizeof(*gdev->ack_events_req));
+       vbg_req_free(gdev->cancel_req,
+                    sizeof(*gdev->cancel_req));
+       vbg_req_free(gdev->mem_balloon.change_req,
+                    sizeof(*gdev->mem_balloon.change_req));
+       vbg_req_free(gdev->mem_balloon.get_req,
+                    sizeof(*gdev->mem_balloon.get_req));
        return ret;
 }
 
@@ -872,11 +873,16 @@ void vbg_core_exit(struct vbg_dev *gdev)
        vbg_reset_host_capabilities(gdev);
        vbg_core_set_mouse_status(gdev, 0);
 
-       kfree(gdev->mouse_status_req);
-       kfree(gdev->ack_events_req);
-       kfree(gdev->cancel_req);
-       kfree(gdev->mem_balloon.change_req);
-       kfree(gdev->mem_balloon.get_req);
+       vbg_req_free(gdev->mouse_status_req,
+                    sizeof(*gdev->mouse_status_req));
+       vbg_req_free(gdev->ack_events_req,
+                    sizeof(*gdev->ack_events_req));
+       vbg_req_free(gdev->cancel_req,
+                    sizeof(*gdev->cancel_req));
+       vbg_req_free(gdev->mem_balloon.change_req,
+                    sizeof(*gdev->mem_balloon.change_req));
+       vbg_req_free(gdev->mem_balloon.get_req,
+                    sizeof(*gdev->mem_balloon.get_req));
 }
 
 /**
@@ -1415,7 +1421,7 @@ static int vbg_ioctl_write_core_dump(struct vbg_dev *gdev,
        req->flags = dump->u.in.flags;
        dump->hdr.rc = vbg_req_perform(gdev, req);
 
-       kfree(req);
+       vbg_req_free(req, sizeof(*req));
        return 0;
 }
 
@@ -1513,7 +1519,7 @@ int vbg_core_set_mouse_status(struct vbg_dev *gdev, u32 features)
        if (rc < 0)
                vbg_err("%s error, rc: %d\n", __func__, rc);
 
-       kfree(req);
+       vbg_req_free(req, sizeof(*req));
        return vbg_status_code_to_errno(rc);
 }
 
index 6c784bf4fa6d50c23ff4c32067d5892aca7b5adb..7ad9ec45bfa9d649627f45e9410aebff43cd22c7 100644 (file)
@@ -171,4 +171,13 @@ irqreturn_t vbg_core_isr(int irq, void *dev_id);
 
 void vbg_linux_mouse_event(struct vbg_dev *gdev);
 
+/* Private (non exported) functions form vboxguest_utils.c */
+void *vbg_req_alloc(size_t len, enum vmmdev_request_type req_type);
+void vbg_req_free(void *req, size_t len);
+int vbg_req_perform(struct vbg_dev *gdev, void *req);
+int vbg_hgcm_call32(
+       struct vbg_dev *gdev, u32 client_id, u32 function, u32 timeout_ms,
+       struct vmmdev_hgcm_function_parameter32 *parm32, u32 parm_count,
+       int *vbox_status);
+
 #endif
index 82e280d38cc2e1bd1da2b2e6847b3174b5854548..398d2269323471305088762ba92ed9da3d53d9af 100644 (file)
@@ -87,6 +87,7 @@ static long vbg_misc_device_ioctl(struct file *filp, unsigned int req,
        struct vbg_session *session = filp->private_data;
        size_t returned_size, size;
        struct vbg_ioctl_hdr hdr;
+       bool is_vmmdev_req;
        int ret = 0;
        void *buf;
 
@@ -106,8 +107,17 @@ static long vbg_misc_device_ioctl(struct file *filp, unsigned int req,
        if (size > SZ_16M)
                return -E2BIG;
 
-       /* __GFP_DMA32 because IOCTL_VMMDEV_REQUEST passes this to the host */
-       buf = kmalloc(size, GFP_KERNEL | __GFP_DMA32);
+       /*
+        * IOCTL_VMMDEV_REQUEST needs the buffer to be below 4G to avoid
+        * the need for a bounce-buffer and another copy later on.
+        */
+       is_vmmdev_req = (req & ~IOCSIZE_MASK) == VBG_IOCTL_VMMDEV_REQUEST(0) ||
+                        req == VBG_IOCTL_VMMDEV_REQUEST_BIG;
+
+       if (is_vmmdev_req)
+               buf = vbg_req_alloc(size, VBG_IOCTL_HDR_TYPE_DEFAULT);
+       else
+               buf = kmalloc(size, GFP_KERNEL);
        if (!buf)
                return -ENOMEM;
 
@@ -132,7 +142,10 @@ static long vbg_misc_device_ioctl(struct file *filp, unsigned int req,
                ret = -EFAULT;
 
 out:
-       kfree(buf);
+       if (is_vmmdev_req)
+               vbg_req_free(buf, size);
+       else
+               kfree(buf);
 
        return ret;
 }
index 0f0dab8023cf6dfd5cd47ecc507cae0a2df550d8..bf4474214b4d31bb708c3d9c302d6ce415e17c7a 100644 (file)
@@ -65,8 +65,9 @@ VBG_LOG(vbg_debug, pr_debug);
 void *vbg_req_alloc(size_t len, enum vmmdev_request_type req_type)
 {
        struct vmmdev_request_header *req;
+       int order = get_order(PAGE_ALIGN(len));
 
-       req = kmalloc(len, GFP_KERNEL | __GFP_DMA32);
+       req = (void *)__get_free_pages(GFP_KERNEL | GFP_DMA32, order);
        if (!req)
                return NULL;
 
@@ -82,6 +83,14 @@ void *vbg_req_alloc(size_t len, enum vmmdev_request_type req_type)
        return req;
 }
 
+void vbg_req_free(void *req, size_t len)
+{
+       if (!req)
+               return;
+
+       free_pages((unsigned long)req, get_order(PAGE_ALIGN(len)));
+}
+
 /* Note this function returns a VBox status code, not a negative errno!! */
 int vbg_req_perform(struct vbg_dev *gdev, void *req)
 {
@@ -137,7 +146,7 @@ int vbg_hgcm_connect(struct vbg_dev *gdev,
                rc = hgcm_connect->header.result;
        }
 
-       kfree(hgcm_connect);
+       vbg_req_free(hgcm_connect, sizeof(*hgcm_connect));
 
        *vbox_status = rc;
        return 0;
@@ -166,7 +175,7 @@ int vbg_hgcm_disconnect(struct vbg_dev *gdev, u32 client_id, int *vbox_status)
        if (rc >= 0)
                rc = hgcm_disconnect->header.result;
 
-       kfree(hgcm_disconnect);
+       vbg_req_free(hgcm_disconnect, sizeof(*hgcm_disconnect));
 
        *vbox_status = rc;
        return 0;
@@ -623,7 +632,7 @@ int vbg_hgcm_call(struct vbg_dev *gdev, u32 client_id, u32 function,
        }
 
        if (!leak_it)
-               kfree(call);
+               vbg_req_free(call, size);
 
 free_bounce_bufs:
        if (bounce_bufs) {
index a5b8eb21201fd0cefb9918acee38cbdf49307ed3..1abe4d021fd27171bae9fb7a289c72916f041486 100644 (file)
@@ -55,6 +55,8 @@ MODULE_DEVICE_TABLE(of, aspeed_wdt_of_table);
 #define   WDT_CTRL_WDT_INTR            BIT(2)
 #define   WDT_CTRL_RESET_SYSTEM                BIT(1)
 #define   WDT_CTRL_ENABLE              BIT(0)
+#define WDT_TIMEOUT_STATUS     0x10
+#define   WDT_TIMEOUT_STATUS_BOOT_SECONDARY    BIT(1)
 
 /*
  * WDT_RESET_WIDTH controls the characteristics of the external pulse (if
@@ -192,6 +194,7 @@ static int aspeed_wdt_probe(struct platform_device *pdev)
        struct device_node *np;
        const char *reset_type;
        u32 duration;
+       u32 status;
        int ret;
 
        wdt = devm_kzalloc(&pdev->dev, sizeof(*wdt), GFP_KERNEL);
@@ -307,6 +310,10 @@ static int aspeed_wdt_probe(struct platform_device *pdev)
                writel(duration - 1, wdt->base + WDT_RESET_WIDTH);
        }
 
+       status = readl(wdt->base + WDT_TIMEOUT_STATUS);
+       if (status & WDT_TIMEOUT_STATUS_BOOT_SECONDARY)
+               wdt->wdd.bootstatus = WDIOF_CARDRESET;
+
        ret = devm_watchdog_register_device(&pdev->dev, &wdt->wdd);
        if (ret) {
                dev_err(&pdev->dev, "failed to register\n");
index 6b8c6ddfe30b3185fccaa91c05a8d4f49925ab21..514db5cc159511254f62c2eb86714a2ecddb978f 100644 (file)
@@ -121,7 +121,8 @@ static int rwdt_restart(struct watchdog_device *wdev, unsigned long action,
 }
 
 static const struct watchdog_info rwdt_ident = {
-       .options = WDIOF_MAGICCLOSE | WDIOF_KEEPALIVEPING | WDIOF_SETTIMEOUT,
+       .options = WDIOF_MAGICCLOSE | WDIOF_KEEPALIVEPING | WDIOF_SETTIMEOUT |
+               WDIOF_CARDRESET,
        .identity = "Renesas WDT Watchdog",
 };
 
@@ -197,9 +198,10 @@ static int rwdt_probe(struct platform_device *pdev)
                return PTR_ERR(clk);
 
        pm_runtime_enable(&pdev->dev);
-
        pm_runtime_get_sync(&pdev->dev);
        priv->clk_rate = clk_get_rate(clk);
+       priv->wdev.bootstatus = (readb_relaxed(priv->base + RWTCSRA) &
+                               RWTCSRA_WOVF) ? WDIOF_CARDRESET : 0;
        pm_runtime_put(&pdev->dev);
 
        if (!priv->clk_rate) {
index 43d0cbb7ba0b9dfe8c662df42332c212f8997932..814cdf539b0f6ac6eb17cc4e5ab5a4208ea7837c 100644 (file)
@@ -299,7 +299,7 @@ static long sch311x_wdt_ioctl(struct file *file, unsigned int cmd,
                if (sch311x_wdt_set_heartbeat(new_timeout))
                        return -EINVAL;
                sch311x_wdt_keepalive();
-               /* Fall */
+               /* Fall through */
        case WDIOC_GETTIMEOUT:
                return put_user(timeout, p);
        default:
index 20e2bba10400910cfbd86a68d3d6f988b9d77dec..672b61a7f9a363a1b4b403b66c505e6d2d0978e0 100644 (file)
@@ -427,7 +427,7 @@ static long wdt_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
                        return -EINVAL;
 
                wdt_keepalive();
-               /* Fall */
+               /* Fall through */
 
        case WDIOC_GETTIMEOUT:
                return put_user(timeout, uarg.i);
index db0da7ea4fd8d8c12514f1f8552b1f425c84d9bc..93c5b610e2648ad9f2e87a8612b4f85389bd253c 100644 (file)
@@ -178,7 +178,7 @@ static long wafwdt_ioctl(struct file *file, unsigned int cmd,
                timeout = new_timeout;
                wafwdt_stop();
                wafwdt_start();
-               /* Fall */
+               /* Fall through */
        case WDIOC_GETTIMEOUT:
                return put_user(timeout, p);
 
index 3bedfed608a22eb77b1addd59f7a263f1bd722b8..7587fb665ff189b088b1a9bb9e8c1b7c7005c083 100644 (file)
@@ -121,7 +121,7 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len,
        p = text;
        do {
                struct sockaddr_rxrpc *srx = &alist->addrs[alist->nr_addrs];
-               char tdelim = delim;
+               const char *q, *stop;
 
                if (*p == delim) {
                        p++;
@@ -130,28 +130,33 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len,
 
                if (*p == '[') {
                        p++;
-                       tdelim = ']';
+                       q = memchr(p, ']', end - p);
+               } else {
+                       for (q = p; q < end; q++)
+                               if (*q == '+' || *q == delim)
+                                       break;
                }
 
-               if (in4_pton(p, end - p,
+               if (in4_pton(p, q - p,
                             (u8 *)&srx->transport.sin6.sin6_addr.s6_addr32[3],
-                            tdelim, &p)) {
+                            -1, &stop)) {
                        srx->transport.sin6.sin6_addr.s6_addr32[0] = 0;
                        srx->transport.sin6.sin6_addr.s6_addr32[1] = 0;
                        srx->transport.sin6.sin6_addr.s6_addr32[2] = htonl(0xffff);
-               } else if (in6_pton(p, end - p,
+               } else if (in6_pton(p, q - p,
                                    srx->transport.sin6.sin6_addr.s6_addr,
-                                   tdelim, &p)) {
+                                   -1, &stop)) {
                        /* Nothing to do */
                } else {
                        goto bad_address;
                }
 
-               if (tdelim == ']') {
-                       if (p == end || *p != ']')
-                               goto bad_address;
+               if (stop != q)
+                       goto bad_address;
+
+               p = q;
+               if (q < end && *q == ']')
                        p++;
-               }
 
                if (p < end) {
                        if (*p == '+') {
index abd9a84f4e88a6dbded5eb02aa75e46a239a4c56..571437dcb252842578b92a6f3b5b60a574703b94 100644 (file)
 /*
  * Set up an interest-in-callbacks record for a volume on a server and
  * register it with the server.
- * - Called with volume->server_sem held.
+ * - Called with vnode->io_lock held.
  */
 int afs_register_server_cb_interest(struct afs_vnode *vnode,
-                                   struct afs_server_entry *entry)
+                                   struct afs_server_list *slist,
+                                   unsigned int index)
 {
-       struct afs_cb_interest *cbi = entry->cb_interest, *vcbi, *new, *x;
+       struct afs_server_entry *entry = &slist->servers[index];
+       struct afs_cb_interest *cbi, *vcbi, *new, *old;
        struct afs_server *server = entry->server;
 
 again:
+       if (vnode->cb_interest &&
+           likely(vnode->cb_interest == entry->cb_interest))
+               return 0;
+
+       read_lock(&slist->lock);
+       cbi = afs_get_cb_interest(entry->cb_interest);
+       read_unlock(&slist->lock);
+
        vcbi = vnode->cb_interest;
        if (vcbi) {
-               if (vcbi == cbi)
+               if (vcbi == cbi) {
+                       afs_put_cb_interest(afs_v2net(vnode), cbi);
                        return 0;
+               }
 
+               /* Use a new interest in the server list for the same server
+                * rather than an old one that's still attached to a vnode.
+                */
                if (cbi && vcbi->server == cbi->server) {
                        write_seqlock(&vnode->cb_lock);
-                       vnode->cb_interest = afs_get_cb_interest(cbi);
+                       old = vnode->cb_interest;
+                       vnode->cb_interest = cbi;
                        write_sequnlock(&vnode->cb_lock);
-                       afs_put_cb_interest(afs_v2net(vnode), cbi);
+                       afs_put_cb_interest(afs_v2net(vnode), old);
                        return 0;
                }
 
+               /* Re-use the one attached to the vnode. */
                if (!cbi && vcbi->server == server) {
-                       afs_get_cb_interest(vcbi);
-                       x = cmpxchg(&entry->cb_interest, cbi, vcbi);
-                       if (x != cbi) {
-                               cbi = x;
-                               afs_put_cb_interest(afs_v2net(vnode), vcbi);
+                       write_lock(&slist->lock);
+                       if (entry->cb_interest) {
+                               write_unlock(&slist->lock);
+                               afs_put_cb_interest(afs_v2net(vnode), cbi);
                                goto again;
                        }
+
+                       entry->cb_interest = cbi;
+                       write_unlock(&slist->lock);
                        return 0;
                }
        }
@@ -72,13 +91,16 @@ int afs_register_server_cb_interest(struct afs_vnode *vnode,
                list_add_tail(&new->cb_link, &server->cb_interests);
                write_unlock(&server->cb_break_lock);
 
-               x = cmpxchg(&entry->cb_interest, cbi, new);
-               if (x == cbi) {
+               write_lock(&slist->lock);
+               if (!entry->cb_interest) {
+                       entry->cb_interest = afs_get_cb_interest(new);
                        cbi = new;
+                       new = NULL;
                } else {
-                       cbi = x;
-                       afs_put_cb_interest(afs_v2net(vnode), new);
+                       cbi = afs_get_cb_interest(entry->cb_interest);
                }
+               write_unlock(&slist->lock);
+               afs_put_cb_interest(afs_v2net(vnode), new);
        }
 
        ASSERT(cbi);
@@ -88,11 +110,14 @@ int afs_register_server_cb_interest(struct afs_vnode *vnode,
         */
        write_seqlock(&vnode->cb_lock);
 
-       vnode->cb_interest = afs_get_cb_interest(cbi);
+       old = vnode->cb_interest;
+       vnode->cb_interest = cbi;
        vnode->cb_s_break = cbi->server->cb_s_break;
+       vnode->cb_v_break = vnode->volume->cb_v_break;
        clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
 
        write_sequnlock(&vnode->cb_lock);
+       afs_put_cb_interest(afs_v2net(vnode), old);
        return 0;
 }
 
@@ -171,13 +196,24 @@ static void afs_break_one_callback(struct afs_server *server,
                if (cbi->vid != fid->vid)
                        continue;
 
-               data.volume = NULL;
-               data.fid = *fid;
-               inode = ilookup5_nowait(cbi->sb, fid->vnode, afs_iget5_test, &data);
-               if (inode) {
-                       vnode = AFS_FS_I(inode);
-                       afs_break_callback(vnode);
-                       iput(inode);
+               if (fid->vnode == 0 && fid->unique == 0) {
+                       /* The callback break applies to an entire volume. */
+                       struct afs_super_info *as = AFS_FS_S(cbi->sb);
+                       struct afs_volume *volume = as->volume;
+
+                       write_lock(&volume->cb_break_lock);
+                       volume->cb_v_break++;
+                       write_unlock(&volume->cb_break_lock);
+               } else {
+                       data.volume = NULL;
+                       data.fid = *fid;
+                       inode = ilookup5_nowait(cbi->sb, fid->vnode,
+                                               afs_iget5_test, &data);
+                       if (inode) {
+                               vnode = AFS_FS_I(inode);
+                               afs_break_callback(vnode);
+                               iput(inode);
+                       }
                }
        }
 
@@ -195,6 +231,8 @@ void afs_break_callbacks(struct afs_server *server, size_t count,
        ASSERT(server != NULL);
        ASSERTCMP(count, <=, AFSCBMAX);
 
+       /* TODO: Sort the callback break list by volume ID */
+
        for (; count > 0; callbacks++, count--) {
                _debug("- Fid { vl=%08x n=%u u=%u }  CB { v=%u x=%u t=%u }",
                       callbacks->fid.vid,
index 357de908df3ab258c6b0d2d3fbf8cdf94e7b1575..c332c95a6940f50fe3a9e220ebbb6928b0f5d27c 100644 (file)
@@ -133,21 +133,10 @@ bool afs_cm_incoming_call(struct afs_call *call)
 }
 
 /*
- * clean up a cache manager call
+ * Clean up a cache manager call.
  */
 static void afs_cm_destructor(struct afs_call *call)
 {
-       _enter("");
-
-       /* Break the callbacks here so that we do it after the final ACK is
-        * received.  The step number here must match the final number in
-        * afs_deliver_cb_callback().
-        */
-       if (call->unmarshall == 5) {
-               ASSERT(call->cm_server && call->count && call->request);
-               afs_break_callbacks(call->cm_server, call->count, call->request);
-       }
-
        kfree(call->buffer);
        call->buffer = NULL;
 }
@@ -161,14 +150,14 @@ static void SRXAFSCB_CallBack(struct work_struct *work)
 
        _enter("");
 
-       /* be sure to send the reply *before* attempting to spam the AFS server
-        * with FSFetchStatus requests on the vnodes with broken callbacks lest
-        * the AFS server get into a vicious cycle of trying to break further
-        * callbacks because it hadn't received completion of the CBCallBack op
-        * yet */
-       afs_send_empty_reply(call);
+       /* We need to break the callbacks before sending the reply as the
+        * server holds up change visibility till it receives our reply so as
+        * to maintain cache coherency.
+        */
+       if (call->cm_server)
+               afs_break_callbacks(call->cm_server, call->count, call->request);
 
-       afs_break_callbacks(call->cm_server, call->count, call->request);
+       afs_send_empty_reply(call);
        afs_put_call(call);
        _leave("");
 }
@@ -180,7 +169,6 @@ static int afs_deliver_cb_callback(struct afs_call *call)
 {
        struct afs_callback_break *cb;
        struct sockaddr_rxrpc srx;
-       struct afs_server *server;
        __be32 *bp;
        int ret, loop;
 
@@ -267,15 +255,6 @@ static int afs_deliver_cb_callback(struct afs_call *call)
 
                call->offset = 0;
                call->unmarshall++;
-
-               /* Record that the message was unmarshalled successfully so
-                * that the call destructor can know do the callback breaking
-                * work, even if the final ACK isn't received.
-                *
-                * If the step number changes, then afs_cm_destructor() must be
-                * updated also.
-                */
-               call->unmarshall++;
        case 5:
                break;
        }
@@ -286,10 +265,9 @@ static int afs_deliver_cb_callback(struct afs_call *call)
        /* we'll need the file server record as that tells us which set of
         * vnodes to operate upon */
        rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx);
-       server = afs_find_server(call->net, &srx);
-       if (!server)
-               return -ENOTCONN;
-       call->cm_server = server;
+       call->cm_server = afs_find_server(call->net, &srx);
+       if (!call->cm_server)
+               trace_afs_cm_no_server(call, &srx);
 
        return afs_queue_call_work(call);
 }
@@ -303,7 +281,8 @@ static void SRXAFSCB_InitCallBackState(struct work_struct *work)
 
        _enter("{%p}", call->cm_server);
 
-       afs_init_callback_state(call->cm_server);
+       if (call->cm_server)
+               afs_init_callback_state(call->cm_server);
        afs_send_empty_reply(call);
        afs_put_call(call);
        _leave("");
@@ -315,7 +294,6 @@ static void SRXAFSCB_InitCallBackState(struct work_struct *work)
 static int afs_deliver_cb_init_call_back_state(struct afs_call *call)
 {
        struct sockaddr_rxrpc srx;
-       struct afs_server *server;
        int ret;
 
        _enter("");
@@ -328,10 +306,9 @@ static int afs_deliver_cb_init_call_back_state(struct afs_call *call)
 
        /* we'll need the file server record as that tells us which set of
         * vnodes to operate upon */
-       server = afs_find_server(call->net, &srx);
-       if (!server)
-               return -ENOTCONN;
-       call->cm_server = server;
+       call->cm_server = afs_find_server(call->net, &srx);
+       if (!call->cm_server)
+               trace_afs_cm_no_server(call, &srx);
 
        return afs_queue_call_work(call);
 }
@@ -341,8 +318,6 @@ static int afs_deliver_cb_init_call_back_state(struct afs_call *call)
  */
 static int afs_deliver_cb_init_call_back_state3(struct afs_call *call)
 {
-       struct sockaddr_rxrpc srx;
-       struct afs_server *server;
        struct afs_uuid *r;
        unsigned loop;
        __be32 *b;
@@ -398,11 +373,11 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call)
 
        /* we'll need the file server record as that tells us which set of
         * vnodes to operate upon */
-       rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx);
-       server = afs_find_server(call->net, &srx);
-       if (!server)
-               return -ENOTCONN;
-       call->cm_server = server;
+       rcu_read_lock();
+       call->cm_server = afs_find_server_by_uuid(call->net, call->request);
+       rcu_read_unlock();
+       if (!call->cm_server)
+               trace_afs_cm_no_server_u(call, call->request);
 
        return afs_queue_call_work(call);
 }
index 5889f70d4d273a8622aefbd32148e8ff8e1b7d55..7d623008157ffaf9c77ac6a614a0e329f769c35e 100644 (file)
@@ -180,6 +180,7 @@ static int afs_dir_open(struct inode *inode, struct file *file)
  * get reclaimed during the iteration.
  */
 static struct afs_read *afs_read_dir(struct afs_vnode *dvnode, struct key *key)
+       __acquires(&dvnode->validate_lock)
 {
        struct afs_read *req;
        loff_t i_size;
@@ -261,18 +262,21 @@ static struct afs_read *afs_read_dir(struct afs_vnode *dvnode, struct key *key)
        /* If we're going to reload, we need to lock all the pages to prevent
         * races.
         */
-       if (!test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) {
-               ret = -ERESTARTSYS;
-               for (i = 0; i < req->nr_pages; i++)
-                       if (lock_page_killable(req->pages[i]) < 0)
-                               goto error_unlock;
+       ret = -ERESTARTSYS;
+       if (down_read_killable(&dvnode->validate_lock) < 0)
+               goto error;
 
-               if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
-                       goto success;
+       if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
+               goto success;
+
+       up_read(&dvnode->validate_lock);
+       if (down_write_killable(&dvnode->validate_lock) < 0)
+               goto error;
 
+       if (!test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) {
                ret = afs_fetch_data(dvnode, key, req);
                if (ret < 0)
-                       goto error_unlock_all;
+                       goto error_unlock;
 
                task_io_account_read(PAGE_SIZE * req->nr_pages);
 
@@ -284,33 +288,26 @@ static struct afs_read *afs_read_dir(struct afs_vnode *dvnode, struct key *key)
                for (i = 0; i < req->nr_pages; i++)
                        if (!afs_dir_check_page(dvnode, req->pages[i],
                                                req->actual_len))
-                               goto error_unlock_all;
+                               goto error_unlock;
 
                // TODO: Trim excess pages
 
                set_bit(AFS_VNODE_DIR_VALID, &dvnode->flags);
        }
 
+       downgrade_write(&dvnode->validate_lock);
 success:
-       i = req->nr_pages;
-       while (i > 0)
-               unlock_page(req->pages[--i]);
        return req;
 
-error_unlock_all:
-       i = req->nr_pages;
 error_unlock:
-       while (i > 0)
-               unlock_page(req->pages[--i]);
+       up_write(&dvnode->validate_lock);
 error:
        afs_put_read(req);
        _leave(" = %d", ret);
        return ERR_PTR(ret);
 
 content_has_grown:
-       i = req->nr_pages;
-       while (i > 0)
-               unlock_page(req->pages[--i]);
+       up_write(&dvnode->validate_lock);
        afs_put_read(req);
        goto retry;
 }
@@ -473,6 +470,7 @@ static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx,
        }
 
 out:
+       up_read(&dvnode->validate_lock);
        afs_put_read(req);
        _leave(" = %d", ret);
        return ret;
@@ -1143,7 +1141,7 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
        ret = -ERESTARTSYS;
        if (afs_begin_vnode_operation(&fc, dvnode, key)) {
                while (afs_select_fileserver(&fc)) {
-                       fc.cb_break = dvnode->cb_break + dvnode->cb_s_break;
+                       fc.cb_break = afs_calc_vnode_cb_break(dvnode);
                        afs_fs_create(&fc, dentry->d_name.name, mode, data_version,
                                      &newfid, &newstatus, &newcb);
                }
@@ -1213,7 +1211,7 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry)
        ret = -ERESTARTSYS;
        if (afs_begin_vnode_operation(&fc, dvnode, key)) {
                while (afs_select_fileserver(&fc)) {
-                       fc.cb_break = dvnode->cb_break + dvnode->cb_s_break;
+                       fc.cb_break = afs_calc_vnode_cb_break(dvnode);
                        afs_fs_remove(&fc, dentry->d_name.name, true,
                                      data_version);
                }
@@ -1316,7 +1314,7 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
        ret = -ERESTARTSYS;
        if (afs_begin_vnode_operation(&fc, dvnode, key)) {
                while (afs_select_fileserver(&fc)) {
-                       fc.cb_break = dvnode->cb_break + dvnode->cb_s_break;
+                       fc.cb_break = afs_calc_vnode_cb_break(dvnode);
                        afs_fs_remove(&fc, dentry->d_name.name, false,
                                      data_version);
                }
@@ -1373,7 +1371,7 @@ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
        ret = -ERESTARTSYS;
        if (afs_begin_vnode_operation(&fc, dvnode, key)) {
                while (afs_select_fileserver(&fc)) {
-                       fc.cb_break = dvnode->cb_break + dvnode->cb_s_break;
+                       fc.cb_break = afs_calc_vnode_cb_break(dvnode);
                        afs_fs_create(&fc, dentry->d_name.name, mode, data_version,
                                      &newfid, &newstatus, &newcb);
                }
@@ -1443,8 +1441,8 @@ static int afs_link(struct dentry *from, struct inode *dir,
                }
 
                while (afs_select_fileserver(&fc)) {
-                       fc.cb_break = dvnode->cb_break + dvnode->cb_s_break;
-                       fc.cb_break_2 = vnode->cb_break + vnode->cb_s_break;
+                       fc.cb_break = afs_calc_vnode_cb_break(dvnode);
+                       fc.cb_break_2 = afs_calc_vnode_cb_break(vnode);
                        afs_fs_link(&fc, vnode, dentry->d_name.name, data_version);
                }
 
@@ -1512,7 +1510,7 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry,
        ret = -ERESTARTSYS;
        if (afs_begin_vnode_operation(&fc, dvnode, key)) {
                while (afs_select_fileserver(&fc)) {
-                       fc.cb_break = dvnode->cb_break + dvnode->cb_s_break;
+                       fc.cb_break = afs_calc_vnode_cb_break(dvnode);
                        afs_fs_symlink(&fc, dentry->d_name.name,
                                       content, data_version,
                                       &newfid, &newstatus);
@@ -1588,8 +1586,8 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
                        }
                }
                while (afs_select_fileserver(&fc)) {
-                       fc.cb_break = orig_dvnode->cb_break + orig_dvnode->cb_s_break;
-                       fc.cb_break_2 = new_dvnode->cb_break + new_dvnode->cb_s_break;
+                       fc.cb_break = afs_calc_vnode_cb_break(orig_dvnode);
+                       fc.cb_break_2 = afs_calc_vnode_cb_break(new_dvnode);
                        afs_fs_rename(&fc, old_dentry->d_name.name,
                                      new_dvnode, new_dentry->d_name.name,
                                      orig_data_version, new_data_version);
index c24c08016dd96e2309ce9e8052d643b2af6da2a6..7d4f26198573d7f6a4dffb7ff4a82ee0f8fbb573 100644 (file)
@@ -238,7 +238,7 @@ int afs_fetch_data(struct afs_vnode *vnode, struct key *key, struct afs_read *de
        ret = -ERESTARTSYS;
        if (afs_begin_vnode_operation(&fc, vnode, key)) {
                while (afs_select_fileserver(&fc)) {
-                       fc.cb_break = vnode->cb_break + vnode->cb_s_break;
+                       fc.cb_break = afs_calc_vnode_cb_break(vnode);
                        afs_fs_fetch_data(&fc, desc);
                }
 
index 7a0e017070ecede45ed1eb622ac89f6f5a33a5d1..dc62d15a964b8809d7028d33a393c41b6963242b 100644 (file)
@@ -86,7 +86,7 @@ static int afs_set_lock(struct afs_vnode *vnode, struct key *key,
        ret = -ERESTARTSYS;
        if (afs_begin_vnode_operation(&fc, vnode, key)) {
                while (afs_select_fileserver(&fc)) {
-                       fc.cb_break = vnode->cb_break + vnode->cb_s_break;
+                       fc.cb_break = afs_calc_vnode_cb_break(vnode);
                        afs_fs_set_lock(&fc, type);
                }
 
@@ -117,7 +117,7 @@ static int afs_extend_lock(struct afs_vnode *vnode, struct key *key)
        ret = -ERESTARTSYS;
        if (afs_begin_vnode_operation(&fc, vnode, key)) {
                while (afs_select_current_fileserver(&fc)) {
-                       fc.cb_break = vnode->cb_break + vnode->cb_s_break;
+                       fc.cb_break = afs_calc_vnode_cb_break(vnode);
                        afs_fs_extend_lock(&fc);
                }
 
@@ -148,7 +148,7 @@ static int afs_release_lock(struct afs_vnode *vnode, struct key *key)
        ret = -ERESTARTSYS;
        if (afs_begin_vnode_operation(&fc, vnode, key)) {
                while (afs_select_current_fileserver(&fc)) {
-                       fc.cb_break = vnode->cb_break + vnode->cb_s_break;
+                       fc.cb_break = afs_calc_vnode_cb_break(vnode);
                        afs_fs_release_lock(&fc);
                }
 
index efacdb7c1dee59ad78e0c600e73468ea90285638..b273e1d60478c3c9f89a84a7b85fb6325a9a2c84 100644 (file)
@@ -134,6 +134,7 @@ static int xdr_decode_AFSFetchStatus(struct afs_call *call,
                                     struct afs_read *read_req)
 {
        const struct afs_xdr_AFSFetchStatus *xdr = (const void *)*_bp;
+       bool inline_error = (call->operation_ID == afs_FS_InlineBulkStatus);
        u64 data_version, size;
        u32 type, abort_code;
        u8 flags = 0;
@@ -142,13 +143,32 @@ static int xdr_decode_AFSFetchStatus(struct afs_call *call,
        if (vnode)
                write_seqlock(&vnode->cb_lock);
 
+       abort_code = ntohl(xdr->abort_code);
+
        if (xdr->if_version != htonl(AFS_FSTATUS_VERSION)) {
+               if (xdr->if_version == htonl(0) &&
+                   abort_code != 0 &&
+                   inline_error) {
+                       /* The OpenAFS fileserver has a bug in FS.InlineBulkStatus
+                        * whereby it doesn't set the interface version in the error
+                        * case.
+                        */
+                       status->abort_code = abort_code;
+                       ret = 0;
+                       goto out;
+               }
+
                pr_warn("Unknown AFSFetchStatus version %u\n", ntohl(xdr->if_version));
                goto bad;
        }
 
+       if (abort_code != 0 && inline_error) {
+               status->abort_code = abort_code;
+               ret = 0;
+               goto out;
+       }
+
        type = ntohl(xdr->type);
-       abort_code = ntohl(xdr->abort_code);
        switch (type) {
        case AFS_FTYPE_FILE:
        case AFS_FTYPE_DIR:
@@ -165,13 +185,6 @@ static int xdr_decode_AFSFetchStatus(struct afs_call *call,
                }
                status->type = type;
                break;
-       case AFS_FTYPE_INVALID:
-               if (abort_code != 0) {
-                       status->abort_code = abort_code;
-                       ret = 0;
-                       goto out;
-               }
-               /* Fall through */
        default:
                goto bad;
        }
@@ -248,7 +261,7 @@ static void xdr_decode_AFSCallBack(struct afs_call *call,
 
        write_seqlock(&vnode->cb_lock);
 
-       if (call->cb_break == (vnode->cb_break + cbi->server->cb_s_break)) {
+       if (call->cb_break == afs_cb_break_sum(vnode, cbi)) {
                vnode->cb_version       = ntohl(*bp++);
                cb_expiry               = ntohl(*bp++);
                vnode->cb_type          = ntohl(*bp++);
index 06194cfe9724ca8bfa9ce7e0cad9d44bbe06a262..479b7fdda1244f5bf210694e275826cba99b5553 100644 (file)
@@ -108,7 +108,7 @@ int afs_fetch_status(struct afs_vnode *vnode, struct key *key, bool new_inode)
        ret = -ERESTARTSYS;
        if (afs_begin_vnode_operation(&fc, vnode, key)) {
                while (afs_select_fileserver(&fc)) {
-                       fc.cb_break = vnode->cb_break + vnode->cb_s_break;
+                       fc.cb_break = afs_calc_vnode_cb_break(vnode);
                        afs_fs_fetch_file_status(&fc, NULL, new_inode);
                }
 
@@ -393,15 +393,18 @@ int afs_validate(struct afs_vnode *vnode, struct key *key)
        read_seqlock_excl(&vnode->cb_lock);
 
        if (test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) {
-               if (vnode->cb_s_break != vnode->cb_interest->server->cb_s_break) {
+               if (vnode->cb_s_break != vnode->cb_interest->server->cb_s_break ||
+                   vnode->cb_v_break != vnode->volume->cb_v_break) {
                        vnode->cb_s_break = vnode->cb_interest->server->cb_s_break;
+                       vnode->cb_v_break = vnode->volume->cb_v_break;
+                       valid = false;
                } else if (vnode->status.type == AFS_FTYPE_DIR &&
                           test_bit(AFS_VNODE_DIR_VALID, &vnode->flags) &&
                           vnode->cb_expires_at - 10 > now) {
-                               valid = true;
+                       valid = true;
                } else if (!test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags) &&
                           vnode->cb_expires_at - 10 > now) {
-                               valid = true;
+                       valid = true;
                }
        } else if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
                valid = true;
@@ -415,7 +418,7 @@ int afs_validate(struct afs_vnode *vnode, struct key *key)
        if (valid)
                goto valid;
 
-       mutex_lock(&vnode->validate_lock);
+       down_write(&vnode->validate_lock);
 
        /* if the promise has expired, we need to check the server again to get
         * a new promise - note that if the (parent) directory's metadata was
@@ -444,13 +447,13 @@ int afs_validate(struct afs_vnode *vnode, struct key *key)
         * different */
        if (test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags))
                afs_zap_data(vnode);
-       mutex_unlock(&vnode->validate_lock);
+       up_write(&vnode->validate_lock);
 valid:
        _leave(" = 0");
        return 0;
 
 error_unlock:
-       mutex_unlock(&vnode->validate_lock);
+       up_write(&vnode->validate_lock);
        _leave(" = %d", ret);
        return ret;
 }
@@ -574,7 +577,7 @@ int afs_setattr(struct dentry *dentry, struct iattr *attr)
        ret = -ERESTARTSYS;
        if (afs_begin_vnode_operation(&fc, vnode, key)) {
                while (afs_select_fileserver(&fc)) {
-                       fc.cb_break = vnode->cb_break + vnode->cb_s_break;
+                       fc.cb_break = afs_calc_vnode_cb_break(vnode);
                        afs_fs_setattr(&fc, attr);
                }
 
index f8086ec95e24161eb9b9900745275e1cc4c1cfc6..e3f8a46663dbade0149d44e0d543bb7f171170a7 100644 (file)
@@ -396,6 +396,7 @@ struct afs_server {
 #define AFS_SERVER_FL_PROBED   5               /* The fileserver has been probed */
 #define AFS_SERVER_FL_PROBING  6               /* Fileserver is being probed */
 #define AFS_SERVER_FL_NO_IBULK 7               /* Fileserver doesn't support FS.InlineBulkStatus */
+#define AFS_SERVER_FL_MAY_HAVE_CB 8            /* May have callbacks on this fileserver */
        atomic_t                usage;
        u32                     addr_version;   /* Address list version */
 
@@ -433,6 +434,7 @@ struct afs_server_list {
        unsigned short          index;          /* Server currently in use */
        unsigned short          vnovol_mask;    /* Servers to be skipped due to VNOVOL */
        unsigned int            seq;            /* Set to ->servers_seq when installed */
+       rwlock_t                lock;
        struct afs_server_entry servers[];
 };
 
@@ -459,6 +461,9 @@ struct afs_volume {
        rwlock_t                servers_lock;   /* Lock for ->servers */
        unsigned int            servers_seq;    /* Incremented each time ->servers changes */
 
+       unsigned                cb_v_break;     /* Break-everything counter. */
+       rwlock_t                cb_break_lock;
+
        afs_voltype_t           type;           /* type of volume */
        short                   error;
        char                    type_force;     /* force volume type (suppress R/O -> R/W) */
@@ -494,7 +499,7 @@ struct afs_vnode {
 #endif
        struct afs_permits __rcu *permit_cache; /* cache of permits so far obtained */
        struct mutex            io_lock;        /* Lock for serialising I/O on this mutex */
-       struct mutex            validate_lock;  /* lock for validating this vnode */
+       struct rw_semaphore     validate_lock;  /* lock for validating this vnode */
        spinlock_t              wb_lock;        /* lock for wb_keys */
        spinlock_t              lock;           /* waitqueue/flags lock */
        unsigned long           flags;
@@ -519,6 +524,7 @@ struct afs_vnode {
        /* outstanding callback notification on this file */
        struct afs_cb_interest  *cb_interest;   /* Server on which this resides */
        unsigned int            cb_s_break;     /* Mass break counter on ->server */
+       unsigned int            cb_v_break;     /* Mass break counter on ->volume */
        unsigned int            cb_break;       /* Break counter on vnode */
        seqlock_t               cb_lock;        /* Lock for ->cb_interest, ->status, ->cb_*break */
 
@@ -648,16 +654,29 @@ extern void afs_init_callback_state(struct afs_server *);
 extern void afs_break_callback(struct afs_vnode *);
 extern void afs_break_callbacks(struct afs_server *, size_t, struct afs_callback_break*);
 
-extern int afs_register_server_cb_interest(struct afs_vnode *, struct afs_server_entry *);
+extern int afs_register_server_cb_interest(struct afs_vnode *,
+                                          struct afs_server_list *, unsigned int);
 extern void afs_put_cb_interest(struct afs_net *, struct afs_cb_interest *);
 extern void afs_clear_callback_interests(struct afs_net *, struct afs_server_list *);
 
 static inline struct afs_cb_interest *afs_get_cb_interest(struct afs_cb_interest *cbi)
 {
-       refcount_inc(&cbi->usage);
+       if (cbi)
+               refcount_inc(&cbi->usage);
        return cbi;
 }
 
+static inline unsigned int afs_calc_vnode_cb_break(struct afs_vnode *vnode)
+{
+       return vnode->cb_break + vnode->cb_s_break + vnode->cb_v_break;
+}
+
+static inline unsigned int afs_cb_break_sum(struct afs_vnode *vnode,
+                                           struct afs_cb_interest *cbi)
+{
+       return vnode->cb_break + cbi->server->cb_s_break + vnode->volume->cb_v_break;
+}
+
 /*
  * cell.c
  */
index ac0feac9d7468cfeb48b4161ed70961b2088b663..e065bc0768e6a5068ef762e3d3967fc54d50e2b2 100644 (file)
@@ -179,7 +179,7 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
                         */
                        if (fc->flags & AFS_FS_CURSOR_VNOVOL) {
                                fc->ac.error = -EREMOTEIO;
-                               goto failed;
+                               goto next_server;
                        }
 
                        write_lock(&vnode->volume->servers_lock);
@@ -201,7 +201,7 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
                         */
                        if (vnode->volume->servers == fc->server_list) {
                                fc->ac.error = -EREMOTEIO;
-                               goto failed;
+                               goto next_server;
                        }
 
                        /* Try again */
@@ -350,8 +350,8 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
         * break request before we've finished decoding the reply and
         * installing the vnode.
         */
-       fc->ac.error = afs_register_server_cb_interest(
-               vnode, &fc->server_list->servers[fc->index]);
+       fc->ac.error = afs_register_server_cb_interest(vnode, fc->server_list,
+                                                      fc->index);
        if (fc->ac.error < 0)
                goto failed;
 
@@ -369,8 +369,16 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
        if (!test_bit(AFS_SERVER_FL_PROBED, &server->flags)) {
                fc->ac.alist = afs_get_addrlist(alist);
 
-               if (!afs_probe_fileserver(fc))
-                       goto failed;
+               if (!afs_probe_fileserver(fc)) {
+                       switch (fc->ac.error) {
+                       case -ENOMEM:
+                       case -ERESTARTSYS:
+                       case -EINTR:
+                               goto failed;
+                       default:
+                               goto next_server;
+                       }
+               }
        }
 
        if (!fc->ac.alist)
index 5c6263972ec9a5651a28fd7ea7c80398e737a090..08735948f15d4caec78be59d5e1c4623591e92d4 100644 (file)
@@ -41,6 +41,7 @@ int afs_open_socket(struct afs_net *net)
 {
        struct sockaddr_rxrpc srx;
        struct socket *socket;
+       unsigned int min_level;
        int ret;
 
        _enter("");
@@ -60,6 +61,12 @@ int afs_open_socket(struct afs_net *net)
        srx.transport.sin6.sin6_family  = AF_INET6;
        srx.transport.sin6.sin6_port    = htons(AFS_CM_PORT);
 
+       min_level = RXRPC_SECURITY_ENCRYPT;
+       ret = kernel_setsockopt(socket, SOL_RXRPC, RXRPC_MIN_SECURITY_LEVEL,
+                               (void *)&min_level, sizeof(min_level));
+       if (ret < 0)
+               goto error_2;
+
        ret = kernel_bind(socket, (struct sockaddr *) &srx, sizeof(srx));
        if (ret == -EADDRINUSE) {
                srx.transport.sin6.sin6_port = 0;
@@ -482,8 +489,12 @@ static void afs_deliver_to_call(struct afs_call *call)
                state = READ_ONCE(call->state);
                switch (ret) {
                case 0:
-                       if (state == AFS_CALL_CL_PROC_REPLY)
+                       if (state == AFS_CALL_CL_PROC_REPLY) {
+                               if (call->cbi)
+                                       set_bit(AFS_SERVER_FL_MAY_HAVE_CB,
+                                               &call->cbi->server->flags);
                                goto call_complete;
+                       }
                        ASSERTCMP(state, >, AFS_CALL_CL_PROC_REPLY);
                        goto done;
                case -EINPROGRESS:
@@ -493,11 +504,6 @@ static void afs_deliver_to_call(struct afs_call *call)
                case -ECONNABORTED:
                        ASSERTCMP(state, ==, AFS_CALL_COMPLETE);
                        goto done;
-               case -ENOTCONN:
-                       abort_code = RX_CALL_DEAD;
-                       rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
-                                               abort_code, ret, "KNC");
-                       goto local_abort;
                case -ENOTSUPP:
                        abort_code = RXGEN_OPCODE;
                        rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
index cea2fff313dc6e3a1efb3c3030a8a59f92136933..1992b0ffa54374da648efbd7ab5b9a87ae3c1889 100644 (file)
@@ -147,8 +147,7 @@ void afs_cache_permit(struct afs_vnode *vnode, struct key *key,
                                        break;
                                }
 
-                               if (cb_break != (vnode->cb_break +
-                                                vnode->cb_interest->server->cb_s_break)) {
+                               if (cb_break != afs_cb_break_sum(vnode, vnode->cb_interest)) {
                                        changed = true;
                                        break;
                                }
@@ -178,7 +177,7 @@ void afs_cache_permit(struct afs_vnode *vnode, struct key *key,
                }
        }
 
-       if (cb_break != (vnode->cb_break + vnode->cb_interest->server->cb_s_break))
+       if (cb_break != afs_cb_break_sum(vnode, vnode->cb_interest))
                goto someone_else_changed_it;
 
        /* We need a ref on any permits list we want to copy as we'll have to
@@ -257,7 +256,7 @@ void afs_cache_permit(struct afs_vnode *vnode, struct key *key,
 
        spin_lock(&vnode->lock);
        zap = rcu_access_pointer(vnode->permit_cache);
-       if (cb_break == (vnode->cb_break + vnode->cb_interest->server->cb_s_break) &&
+       if (cb_break == afs_cb_break_sum(vnode, vnode->cb_interest) &&
            zap == permits)
                rcu_assign_pointer(vnode->permit_cache, replacement);
        else
index 629c74986cff4ad1535fcc3c814b6150011c5dec..3af4625e2f8cc7049185048a602826c53a169c03 100644 (file)
@@ -67,12 +67,6 @@ struct afs_server *afs_find_server(struct afs_net *net,
                                                              sizeof(struct in6_addr));
                                        if (diff == 0)
                                                goto found;
-                                       if (diff < 0) {
-                                               // TODO: Sort the list
-                                               //if (i == alist->nr_ipv4)
-                                               //      goto not_found;
-                                               break;
-                                       }
                                }
                        }
                } else {
@@ -87,17 +81,10 @@ struct afs_server *afs_find_server(struct afs_net *net,
                                                        (u32 __force)b->sin6_addr.s6_addr32[3]);
                                        if (diff == 0)
                                                goto found;
-                                       if (diff < 0) {
-                                               // TODO: Sort the list
-                                               //if (i == 0)
-                                               //      goto not_found;
-                                               break;
-                                       }
                                }
                        }
                }
 
-       //not_found:
                server = NULL;
        found:
                if (server && !atomic_inc_not_zero(&server->usage))
@@ -395,14 +382,16 @@ static void afs_destroy_server(struct afs_net *net, struct afs_server *server)
        struct afs_addr_list *alist = rcu_access_pointer(server->addresses);
        struct afs_addr_cursor ac = {
                .alist  = alist,
-               .addr   = &alist->addrs[0],
                .start  = alist->index,
-               .index  = alist->index,
+               .index  = 0,
+               .addr   = &alist->addrs[alist->index],
                .error  = 0,
        };
        _enter("%p", server);
 
-       afs_fs_give_up_all_callbacks(net, server, &ac, NULL);
+       if (test_bit(AFS_SERVER_FL_MAY_HAVE_CB, &server->flags))
+               afs_fs_give_up_all_callbacks(net, server, &ac, NULL);
+
        call_rcu(&server->rcu, afs_server_rcu);
        afs_dec_servers_outstanding(net);
 }
index 0f8dc4c8f07c43b3efb0f899b8a40b8f1a697e6c..8a5760aa583213a608d686b60f0782ecbed648e1 100644 (file)
@@ -49,6 +49,7 @@ struct afs_server_list *afs_alloc_server_list(struct afs_cell *cell,
                goto error;
 
        refcount_set(&slist->usage, 1);
+       rwlock_init(&slist->lock);
 
        /* Make sure a records exists for each server in the list. */
        for (i = 0; i < vldb->nr_servers; i++) {
@@ -64,9 +65,11 @@ struct afs_server_list *afs_alloc_server_list(struct afs_cell *cell,
                        goto error_2;
                }
 
-               /* Insertion-sort by server pointer */
+               /* Insertion-sort by UUID */
                for (j = 0; j < slist->nr_servers; j++)
-                       if (slist->servers[j].server >= server)
+                       if (memcmp(&slist->servers[j].server->uuid,
+                                  &server->uuid,
+                                  sizeof(server->uuid)) >= 0)
                                break;
                if (j < slist->nr_servers) {
                        if (slist->servers[j].server == server) {
index 65081ec3c36e572c5822d756b86abb52ed757f90..9e5d7966621c4abaa5cdc51f278a1a05a7e6afac 100644 (file)
@@ -590,7 +590,7 @@ static void afs_i_init_once(void *_vnode)
        memset(vnode, 0, sizeof(*vnode));
        inode_init_once(&vnode->vfs_inode);
        mutex_init(&vnode->io_lock);
-       mutex_init(&vnode->validate_lock);
+       init_rwsem(&vnode->validate_lock);
        spin_lock_init(&vnode->wb_lock);
        spin_lock_init(&vnode->lock);
        INIT_LIST_HEAD(&vnode->wb_keys);
@@ -688,7 +688,7 @@ static int afs_statfs(struct dentry *dentry, struct kstatfs *buf)
        if (afs_begin_vnode_operation(&fc, vnode, key)) {
                fc.flags |= AFS_FS_CURSOR_NO_VSLEEP;
                while (afs_select_fileserver(&fc)) {
-                       fc.cb_break = vnode->cb_break + vnode->cb_s_break;
+                       fc.cb_break = afs_calc_vnode_cb_break(vnode);
                        afs_fs_get_volume_status(&fc, &vs);
                }
 
index c164698dc30481156eb9738e0c3f1b91d5ab5108..8b39e6ebb40bc17905cf2eca24b1eacadf933584 100644 (file)
@@ -351,7 +351,7 @@ static int afs_store_data(struct address_space *mapping,
        ret = -ERESTARTSYS;
        if (afs_begin_vnode_operation(&fc, vnode, wbk->key)) {
                while (afs_select_fileserver(&fc)) {
-                       fc.cb_break = vnode->cb_break + vnode->cb_s_break;
+                       fc.cb_break = afs_calc_vnode_cb_break(vnode);
                        afs_fs_store_data(&fc, mapping, first, last, offset, to);
                }
 
index 82e8f6edfb48d0e8670dd58e3fbdcfb4b5ceb85d..b12e37f275307f04e4465d1c7b80d7342b4b9f15 100644 (file)
@@ -749,7 +749,7 @@ static int autofs4_dir_mkdir(struct inode *dir,
 
        autofs4_del_active(dentry);
 
-       inode = autofs4_get_inode(dir->i_sb, S_IFDIR | 0555);
+       inode = autofs4_get_inode(dir->i_sb, S_IFDIR | mode);
        if (!inode)
                return -ENOMEM;
        d_add(dentry, inode);
index 41e04183e4ce84a38e18a3997eb6708c3e8234f6..4ad6f669fe34b21ec592cdd25b7b284fb0f74398 100644 (file)
@@ -377,10 +377,10 @@ static unsigned long elf_map(struct file *filep, unsigned long addr,
        } else
                map_addr = vm_mmap(filep, addr, size, prot, type, off);
 
-       if ((type & MAP_FIXED_NOREPLACE) && BAD_ADDR(map_addr))
-               pr_info("%d (%s): Uhuuh, elf segment at %p requested but the memory is mapped already\n",
-                               task_pid_nr(current), current->comm,
-                               (void *)addr);
+       if ((type & MAP_FIXED_NOREPLACE) &&
+           PTR_ERR((void *)map_addr) == -EEXIST)
+               pr_info("%d (%s): Uhuuh, elf segment at %px requested but the memory is mapped already\n",
+                       task_pid_nr(current), current->comm, (void *)addr);
 
        return(map_addr);
 }
index 3fd44835b3869effbfe6ced6c3b8b373617698e5..8c68961925b1482517bfe2c96635f5ee1cc79694 100644 (file)
@@ -2436,10 +2436,8 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
        if (p->reada != READA_NONE)
                reada_for_search(fs_info, p, level, slot, key->objectid);
 
-       btrfs_release_path(p);
-
        ret = -EAGAIN;
-       tmp = read_tree_block(fs_info, blocknr, 0, parent_level - 1,
+       tmp = read_tree_block(fs_info, blocknr, gen, parent_level - 1,
                              &first_key);
        if (!IS_ERR(tmp)) {
                /*
@@ -2454,6 +2452,8 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
        } else {
                ret = PTR_ERR(tmp);
        }
+
+       btrfs_release_path(p);
        return ret;
 }
 
@@ -5414,12 +5414,24 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
        down_read(&fs_info->commit_root_sem);
        left_level = btrfs_header_level(left_root->commit_root);
        left_root_level = left_level;
-       left_path->nodes[left_level] = left_root->commit_root;
+       left_path->nodes[left_level] =
+                       btrfs_clone_extent_buffer(left_root->commit_root);
+       if (!left_path->nodes[left_level]) {
+               up_read(&fs_info->commit_root_sem);
+               ret = -ENOMEM;
+               goto out;
+       }
        extent_buffer_get(left_path->nodes[left_level]);
 
        right_level = btrfs_header_level(right_root->commit_root);
        right_root_level = right_level;
-       right_path->nodes[right_level] = right_root->commit_root;
+       right_path->nodes[right_level] =
+                       btrfs_clone_extent_buffer(right_root->commit_root);
+       if (!right_path->nodes[right_level]) {
+               up_read(&fs_info->commit_root_sem);
+               ret = -ENOMEM;
+               goto out;
+       }
        extent_buffer_get(right_path->nodes[right_level]);
        up_read(&fs_info->commit_root_sem);
 
index 5474ef14d6e6797c8bdf3699c9d209fb5760498c..0d422c9908b8085f531a752a4bd6d5bf1b430e02 100644 (file)
@@ -459,6 +459,25 @@ struct btrfs_block_rsv {
        unsigned short full;
        unsigned short type;
        unsigned short failfast;
+
+       /*
+        * Qgroup equivalent for @size @reserved
+        *
+        * Unlike normal @size/@reserved for inode rsv, qgroup doesn't care
+        * about things like csum size nor how many tree blocks it will need to
+        * reserve.
+        *
+        * Qgroup cares more about net change of the extent usage.
+        *
+        * So for one newly inserted file extent, in worst case it will cause
+        * leaf split and level increase, nodesize for each file extent is
+        * already too much.
+        *
+        * In short, qgroup_size/reserved is the upper limit of possible needed
+        * qgroup metadata reservation.
+        */
+       u64 qgroup_rsv_size;
+       u64 qgroup_rsv_reserved;
 };
 
 /*
@@ -714,6 +733,12 @@ struct btrfs_delayed_root;
  */
 #define BTRFS_FS_EXCL_OP                       16
 
+/*
+ * To info transaction_kthread we need an immediate commit so it doesn't
+ * need to wait for commit_interval
+ */
+#define BTRFS_FS_NEED_ASYNC_COMMIT             17
+
 struct btrfs_fs_info {
        u8 fsid[BTRFS_FSID_SIZE];
        u8 chunk_tree_uuid[BTRFS_UUID_SIZE];
@@ -3157,6 +3182,8 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
                              u64 *orig_start, u64 *orig_block_len,
                              u64 *ram_bytes);
 
+void __btrfs_del_delalloc_inode(struct btrfs_root *root,
+                               struct btrfs_inode *inode);
 struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry);
 int btrfs_set_inode_index(struct btrfs_inode *dir, u64 *index);
 int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
index 06ec8ab6d9ba593cc63dd58b6edea72584116e24..a8d492dbd3e7c100715011a9ddad8a22d82932dd 100644 (file)
@@ -556,6 +556,12 @@ static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
        dst_rsv = &fs_info->delayed_block_rsv;
 
        num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
+
+       /*
+        * Here we migrate space rsv from transaction rsv, since have already
+        * reserved space when starting a transaction.  So no need to reserve
+        * qgroup space here.
+        */
        ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1);
        if (!ret) {
                trace_btrfs_space_reservation(fs_info, "delayed_item",
@@ -577,7 +583,10 @@ static void btrfs_delayed_item_release_metadata(struct btrfs_root *root,
                return;
 
        rsv = &fs_info->delayed_block_rsv;
-       btrfs_qgroup_convert_reserved_meta(root, item->bytes_reserved);
+       /*
+        * Check btrfs_delayed_item_reserve_metadata() to see why we don't need
+        * to release/reserve qgroup space.
+        */
        trace_btrfs_space_reservation(fs_info, "delayed_item",
                                      item->key.objectid, item->bytes_reserved,
                                      0);
@@ -602,9 +611,6 @@ static int btrfs_delayed_inode_reserve_metadata(
 
        num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
 
-       ret = btrfs_qgroup_reserve_meta_prealloc(root, num_bytes, true);
-       if (ret < 0)
-               return ret;
        /*
         * btrfs_dirty_inode will update the inode under btrfs_join_transaction
         * which doesn't reserve space for speed.  This is a problem since we
@@ -616,6 +622,10 @@ static int btrfs_delayed_inode_reserve_metadata(
         */
        if (!src_rsv || (!trans->bytes_reserved &&
                         src_rsv->type != BTRFS_BLOCK_RSV_DELALLOC)) {
+               ret = btrfs_qgroup_reserve_meta_prealloc(root,
+                               fs_info->nodesize, true);
+               if (ret < 0)
+                       return ret;
                ret = btrfs_block_rsv_add(root, dst_rsv, num_bytes,
                                          BTRFS_RESERVE_NO_FLUSH);
                /*
@@ -634,6 +644,8 @@ static int btrfs_delayed_inode_reserve_metadata(
                                                      "delayed_inode",
                                                      btrfs_ino(inode),
                                                      num_bytes, 1);
+               } else {
+                       btrfs_qgroup_free_meta_prealloc(root, fs_info->nodesize);
                }
                return ret;
        }
index 9e98295de7ce2f24fb009c99102081c49f2c4ba2..e1b0651686f7c4e988766d927c669306560c468d 100644 (file)
@@ -540,8 +540,10 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
                     struct btrfs_delayed_ref_head *head_ref,
                     struct btrfs_qgroup_extent_record *qrecord,
                     u64 bytenr, u64 num_bytes, u64 ref_root, u64 reserved,
-                    int action, int is_data, int *qrecord_inserted_ret,
+                    int action, int is_data, int is_system,
+                    int *qrecord_inserted_ret,
                     int *old_ref_mod, int *new_ref_mod)
+
 {
        struct btrfs_delayed_ref_head *existing;
        struct btrfs_delayed_ref_root *delayed_refs;
@@ -585,6 +587,7 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
        head_ref->ref_mod = count_mod;
        head_ref->must_insert_reserved = must_insert_reserved;
        head_ref->is_data = is_data;
+       head_ref->is_system = is_system;
        head_ref->ref_tree = RB_ROOT;
        INIT_LIST_HEAD(&head_ref->ref_add_list);
        RB_CLEAR_NODE(&head_ref->href_node);
@@ -772,6 +775,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
        struct btrfs_delayed_ref_root *delayed_refs;
        struct btrfs_qgroup_extent_record *record = NULL;
        int qrecord_inserted;
+       int is_system = (ref_root == BTRFS_CHUNK_TREE_OBJECTID);
 
        BUG_ON(extent_op && extent_op->is_data);
        ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS);
@@ -800,8 +804,8 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
         */
        head_ref = add_delayed_ref_head(fs_info, trans, head_ref, record,
                                        bytenr, num_bytes, 0, 0, action, 0,
-                                       &qrecord_inserted, old_ref_mod,
-                                       new_ref_mod);
+                                       is_system, &qrecord_inserted,
+                                       old_ref_mod, new_ref_mod);
 
        add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr,
                             num_bytes, parent, ref_root, level, action);
@@ -868,7 +872,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
         */
        head_ref = add_delayed_ref_head(fs_info, trans, head_ref, record,
                                        bytenr, num_bytes, ref_root, reserved,
-                                       action, 1, &qrecord_inserted,
+                                       action, 1, 0, &qrecord_inserted,
                                        old_ref_mod, new_ref_mod);
 
        add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr,
@@ -898,9 +902,14 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
        delayed_refs = &trans->transaction->delayed_refs;
        spin_lock(&delayed_refs->lock);
 
+       /*
+        * extent_ops just modify the flags of an extent and they don't result
+        * in ref count changes, hence it's safe to pass false/0 for is_system
+        * argument
+        */
        add_delayed_ref_head(fs_info, trans, head_ref, NULL, bytenr,
                             num_bytes, 0, 0, BTRFS_UPDATE_DELAYED_HEAD,
-                            extent_op->is_data, NULL, NULL, NULL);
+                            extent_op->is_data, 0, NULL, NULL, NULL);
 
        spin_unlock(&delayed_refs->lock);
        return 0;
index 741869dbc316b7593851355b55f532d217aa22aa..7f00db50bd242d0a29f35c430339950b90588697 100644 (file)
@@ -127,6 +127,7 @@ struct btrfs_delayed_ref_head {
         */
        unsigned int must_insert_reserved:1;
        unsigned int is_data:1;
+       unsigned int is_system:1;
        unsigned int processing:1;
 };
 
index 4ac8b1d21bafd07e00a686d6c15228f6be6aef72..c3504b4d281b5cd76bb0861b781e228bcec4b3bb 100644 (file)
@@ -1824,6 +1824,7 @@ static int transaction_kthread(void *arg)
 
                now = get_seconds();
                if (cur->state < TRANS_STATE_BLOCKED &&
+                   !test_bit(BTRFS_FS_NEED_ASYNC_COMMIT, &fs_info->flags) &&
                    (now < cur->start_time ||
                     now - cur->start_time < fs_info->commit_interval)) {
                        spin_unlock(&fs_info->trans_lock);
@@ -3817,6 +3818,7 @@ void close_ctree(struct btrfs_fs_info *fs_info)
        set_bit(BTRFS_FS_CLOSING_DONE, &fs_info->flags);
 
        btrfs_free_qgroup_config(fs_info);
+       ASSERT(list_empty(&fs_info->delalloc_roots));
 
        if (percpu_counter_sum(&fs_info->delalloc_bytes)) {
                btrfs_info(fs_info, "at unmount delalloc count %lld",
@@ -4124,15 +4126,15 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info)
 
 static void btrfs_error_commit_super(struct btrfs_fs_info *fs_info)
 {
+       /* cleanup FS via transaction */
+       btrfs_cleanup_transaction(fs_info);
+
        mutex_lock(&fs_info->cleaner_mutex);
        btrfs_run_delayed_iputs(fs_info);
        mutex_unlock(&fs_info->cleaner_mutex);
 
        down_write(&fs_info->cleanup_work_sem);
        up_write(&fs_info->cleanup_work_sem);
-
-       /* cleanup FS via transaction */
-       btrfs_cleanup_transaction(fs_info);
 }
 
 static void btrfs_destroy_ordered_extents(struct btrfs_root *root)
@@ -4257,19 +4259,23 @@ static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
        list_splice_init(&root->delalloc_inodes, &splice);
 
        while (!list_empty(&splice)) {
+               struct inode *inode = NULL;
                btrfs_inode = list_first_entry(&splice, struct btrfs_inode,
                                               delalloc_inodes);
-
-               list_del_init(&btrfs_inode->delalloc_inodes);
-               clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
-                         &btrfs_inode->runtime_flags);
+               __btrfs_del_delalloc_inode(root, btrfs_inode);
                spin_unlock(&root->delalloc_lock);
 
-               btrfs_invalidate_inodes(btrfs_inode->root);
-
+               /*
+                * Make sure we get a live inode and that it'll not disappear
+                * meanwhile.
+                */
+               inode = igrab(&btrfs_inode->vfs_inode);
+               if (inode) {
+                       invalidate_inode_pages2(inode->i_mapping);
+                       iput(inode);
+               }
                spin_lock(&root->delalloc_lock);
        }
-
        spin_unlock(&root->delalloc_lock);
 }
 
@@ -4285,7 +4291,6 @@ static void btrfs_destroy_all_delalloc_inodes(struct btrfs_fs_info *fs_info)
        while (!list_empty(&splice)) {
                root = list_first_entry(&splice, struct btrfs_root,
                                         delalloc_root);
-               list_del_init(&root->delalloc_root);
                root = btrfs_grab_fs_root(root);
                BUG_ON(!root);
                spin_unlock(&fs_info->delalloc_root_lock);
index 75cfb80d25518692c2b159fee0df477adb71d39a..51b5e2da708c4e77566cb686aea81b4a1cfa3497 100644 (file)
@@ -2601,13 +2601,19 @@ static int cleanup_ref_head(struct btrfs_trans_handle *trans,
        trace_run_delayed_ref_head(fs_info, head, 0);
 
        if (head->total_ref_mod < 0) {
-               struct btrfs_block_group_cache *cache;
+               struct btrfs_space_info *space_info;
+               u64 flags;
 
-               cache = btrfs_lookup_block_group(fs_info, head->bytenr);
-               ASSERT(cache);
-               percpu_counter_add(&cache->space_info->total_bytes_pinned,
+               if (head->is_data)
+                       flags = BTRFS_BLOCK_GROUP_DATA;
+               else if (head->is_system)
+                       flags = BTRFS_BLOCK_GROUP_SYSTEM;
+               else
+                       flags = BTRFS_BLOCK_GROUP_METADATA;
+               space_info = __find_space_info(fs_info, flags);
+               ASSERT(space_info);
+               percpu_counter_add(&space_info->total_bytes_pinned,
                                   -head->num_bytes);
-               btrfs_put_block_group(cache);
 
                if (head->is_data) {
                        spin_lock(&delayed_refs->lock);
@@ -3136,7 +3142,11 @@ static noinline int check_delayed_ref(struct btrfs_root *root,
        struct rb_node *node;
        int ret = 0;
 
+       spin_lock(&root->fs_info->trans_lock);
        cur_trans = root->fs_info->running_transaction;
+       if (cur_trans)
+               refcount_inc(&cur_trans->use_count);
+       spin_unlock(&root->fs_info->trans_lock);
        if (!cur_trans)
                return 0;
 
@@ -3145,6 +3155,7 @@ static noinline int check_delayed_ref(struct btrfs_root *root,
        head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
        if (!head) {
                spin_unlock(&delayed_refs->lock);
+               btrfs_put_transaction(cur_trans);
                return 0;
        }
 
@@ -3161,6 +3172,7 @@ static noinline int check_delayed_ref(struct btrfs_root *root,
                mutex_lock(&head->mutex);
                mutex_unlock(&head->mutex);
                btrfs_put_delayed_ref_head(head);
+               btrfs_put_transaction(cur_trans);
                return -EAGAIN;
        }
        spin_unlock(&delayed_refs->lock);
@@ -3193,6 +3205,7 @@ static noinline int check_delayed_ref(struct btrfs_root *root,
        }
        spin_unlock(&head->lock);
        mutex_unlock(&head->mutex);
+       btrfs_put_transaction(cur_trans);
        return ret;
 }
 
@@ -5559,14 +5572,18 @@ static void space_info_add_new_bytes(struct btrfs_fs_info *fs_info,
 
 static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
                                    struct btrfs_block_rsv *block_rsv,
-                                   struct btrfs_block_rsv *dest, u64 num_bytes)
+                                   struct btrfs_block_rsv *dest, u64 num_bytes,
+                                   u64 *qgroup_to_release_ret)
 {
        struct btrfs_space_info *space_info = block_rsv->space_info;
+       u64 qgroup_to_release = 0;
        u64 ret;
 
        spin_lock(&block_rsv->lock);
-       if (num_bytes == (u64)-1)
+       if (num_bytes == (u64)-1) {
                num_bytes = block_rsv->size;
+               qgroup_to_release = block_rsv->qgroup_rsv_size;
+       }
        block_rsv->size -= num_bytes;
        if (block_rsv->reserved >= block_rsv->size) {
                num_bytes = block_rsv->reserved - block_rsv->size;
@@ -5575,6 +5592,13 @@ static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
        } else {
                num_bytes = 0;
        }
+       if (block_rsv->qgroup_rsv_reserved >= block_rsv->qgroup_rsv_size) {
+               qgroup_to_release = block_rsv->qgroup_rsv_reserved -
+                                   block_rsv->qgroup_rsv_size;
+               block_rsv->qgroup_rsv_reserved = block_rsv->qgroup_rsv_size;
+       } else {
+               qgroup_to_release = 0;
+       }
        spin_unlock(&block_rsv->lock);
 
        ret = num_bytes;
@@ -5597,6 +5621,8 @@ static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
                        space_info_add_old_bytes(fs_info, space_info,
                                                 num_bytes);
        }
+       if (qgroup_to_release_ret)
+               *qgroup_to_release_ret = qgroup_to_release;
        return ret;
 }
 
@@ -5738,17 +5764,21 @@ static int btrfs_inode_rsv_refill(struct btrfs_inode *inode,
        struct btrfs_root *root = inode->root;
        struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
        u64 num_bytes = 0;
+       u64 qgroup_num_bytes = 0;
        int ret = -ENOSPC;
 
        spin_lock(&block_rsv->lock);
        if (block_rsv->reserved < block_rsv->size)
                num_bytes = block_rsv->size - block_rsv->reserved;
+       if (block_rsv->qgroup_rsv_reserved < block_rsv->qgroup_rsv_size)
+               qgroup_num_bytes = block_rsv->qgroup_rsv_size -
+                                  block_rsv->qgroup_rsv_reserved;
        spin_unlock(&block_rsv->lock);
 
        if (num_bytes == 0)
                return 0;
 
-       ret = btrfs_qgroup_reserve_meta_prealloc(root, num_bytes, true);
+       ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_num_bytes, true);
        if (ret)
                return ret;
        ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
@@ -5756,7 +5786,13 @@ static int btrfs_inode_rsv_refill(struct btrfs_inode *inode,
                block_rsv_add_bytes(block_rsv, num_bytes, 0);
                trace_btrfs_space_reservation(root->fs_info, "delalloc",
                                              btrfs_ino(inode), num_bytes, 1);
-       }
+
+               /* Don't forget to increase qgroup_rsv_reserved */
+               spin_lock(&block_rsv->lock);
+               block_rsv->qgroup_rsv_reserved += qgroup_num_bytes;
+               spin_unlock(&block_rsv->lock);
+       } else
+               btrfs_qgroup_free_meta_prealloc(root, qgroup_num_bytes);
        return ret;
 }
 
@@ -5777,20 +5813,23 @@ static void btrfs_inode_rsv_release(struct btrfs_inode *inode, bool qgroup_free)
        struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
        struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
        u64 released = 0;
+       u64 qgroup_to_release = 0;
 
        /*
         * Since we statically set the block_rsv->size we just want to say we
         * are releasing 0 bytes, and then we'll just get the reservation over
         * the size free'd.
         */
-       released = block_rsv_release_bytes(fs_info, block_rsv, global_rsv, 0);
+       released = block_rsv_release_bytes(fs_info, block_rsv, global_rsv, 0,
+                                          &qgroup_to_release);
        if (released > 0)
                trace_btrfs_space_reservation(fs_info, "delalloc",
                                              btrfs_ino(inode), released, 0);
        if (qgroup_free)
-               btrfs_qgroup_free_meta_prealloc(inode->root, released);
+               btrfs_qgroup_free_meta_prealloc(inode->root, qgroup_to_release);
        else
-               btrfs_qgroup_convert_reserved_meta(inode->root, released);
+               btrfs_qgroup_convert_reserved_meta(inode->root,
+                                                  qgroup_to_release);
 }
 
 void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
@@ -5802,7 +5841,7 @@ void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
        if (global_rsv == block_rsv ||
            block_rsv->space_info != global_rsv->space_info)
                global_rsv = NULL;
-       block_rsv_release_bytes(fs_info, block_rsv, global_rsv, num_bytes);
+       block_rsv_release_bytes(fs_info, block_rsv, global_rsv, num_bytes, NULL);
 }
 
 static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
@@ -5882,7 +5921,7 @@ static void init_global_block_rsv(struct btrfs_fs_info *fs_info)
 static void release_global_block_rsv(struct btrfs_fs_info *fs_info)
 {
        block_rsv_release_bytes(fs_info, &fs_info->global_block_rsv, NULL,
-                               (u64)-1);
+                               (u64)-1, NULL);
        WARN_ON(fs_info->trans_block_rsv.size > 0);
        WARN_ON(fs_info->trans_block_rsv.reserved > 0);
        WARN_ON(fs_info->chunk_block_rsv.size > 0);
@@ -5906,7 +5945,7 @@ void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans)
        WARN_ON_ONCE(!list_empty(&trans->new_bgs));
 
        block_rsv_release_bytes(fs_info, &fs_info->chunk_block_rsv, NULL,
-                               trans->chunk_bytes_reserved);
+                               trans->chunk_bytes_reserved, NULL);
        trans->chunk_bytes_reserved = 0;
 }
 
@@ -6011,6 +6050,7 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info,
 {
        struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
        u64 reserve_size = 0;
+       u64 qgroup_rsv_size = 0;
        u64 csum_leaves;
        unsigned outstanding_extents;
 
@@ -6023,9 +6063,17 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info,
                                                 inode->csum_bytes);
        reserve_size += btrfs_calc_trans_metadata_size(fs_info,
                                                       csum_leaves);
+       /*
+        * For qgroup rsv, the calculation is very simple:
+        * account one nodesize for each outstanding extent
+        *
+        * This is overestimating in most cases.
+        */
+       qgroup_rsv_size = outstanding_extents * fs_info->nodesize;
 
        spin_lock(&block_rsv->lock);
        block_rsv->size = reserve_size;
+       block_rsv->qgroup_rsv_size = qgroup_rsv_size;
        spin_unlock(&block_rsv->lock);
 }
 
@@ -8403,7 +8451,7 @@ static void unuse_block_rsv(struct btrfs_fs_info *fs_info,
                            struct btrfs_block_rsv *block_rsv, u32 blocksize)
 {
        block_rsv_add_bytes(block_rsv, blocksize, 0);
-       block_rsv_release_bytes(fs_info, block_rsv, NULL, 0);
+       block_rsv_release_bytes(fs_info, block_rsv, NULL, 0, NULL);
 }
 
 /*
index 0167a9c97c9c2933ccaf8f2456b22a525915f18d..f660ba1e5e58ef30f5c72555d7408b686e507e0e 100644 (file)
@@ -1748,7 +1748,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
                        unlock_extent_cached(&BTRFS_I(inode)->io_tree,
                                             lockstart, lockend, &cached_state);
                btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes,
-                                              (ret != 0));
+                                              true);
                if (ret) {
                        btrfs_drop_pages(pages, num_pages);
                        break;
index e064c49c9a9a01a49a9a862bbacd6d16de25594e..8e604e7071f14cf166652369e6f167fab5b5f462 100644 (file)
@@ -31,6 +31,7 @@
 #include <linux/uio.h>
 #include <linux/magic.h>
 #include <linux/iversion.h>
+#include <asm/unaligned.h>
 #include "ctree.h"
 #include "disk-io.h"
 #include "transaction.h"
@@ -1741,12 +1742,12 @@ static void btrfs_add_delalloc_inodes(struct btrfs_root *root,
        spin_unlock(&root->delalloc_lock);
 }
 
-static void btrfs_del_delalloc_inode(struct btrfs_root *root,
-                                    struct btrfs_inode *inode)
+
+void __btrfs_del_delalloc_inode(struct btrfs_root *root,
+                               struct btrfs_inode *inode)
 {
        struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
 
-       spin_lock(&root->delalloc_lock);
        if (!list_empty(&inode->delalloc_inodes)) {
                list_del_init(&inode->delalloc_inodes);
                clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
@@ -1759,6 +1760,13 @@ static void btrfs_del_delalloc_inode(struct btrfs_root *root,
                        spin_unlock(&fs_info->delalloc_root_lock);
                }
        }
+}
+
+static void btrfs_del_delalloc_inode(struct btrfs_root *root,
+                                    struct btrfs_inode *inode)
+{
+       spin_lock(&root->delalloc_lock);
+       __btrfs_del_delalloc_inode(root, inode);
        spin_unlock(&root->delalloc_lock);
 }
 
@@ -5905,11 +5913,13 @@ static int btrfs_filldir(void *addr, int entries, struct dir_context *ctx)
                struct dir_entry *entry = addr;
                char *name = (char *)(entry + 1);
 
-               ctx->pos = entry->offset;
-               if (!dir_emit(ctx, name, entry->name_len, entry->ino,
-                             entry->type))
+               ctx->pos = get_unaligned(&entry->offset);
+               if (!dir_emit(ctx, name, get_unaligned(&entry->name_len),
+                                        get_unaligned(&entry->ino),
+                                        get_unaligned(&entry->type)))
                        return 1;
-               addr += sizeof(struct dir_entry) + entry->name_len;
+               addr += sizeof(struct dir_entry) +
+                       get_unaligned(&entry->name_len);
                ctx->pos++;
        }
        return 0;
@@ -5999,14 +6009,15 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
                }
 
                entry = addr;
-               entry->name_len = name_len;
+               put_unaligned(name_len, &entry->name_len);
                name_ptr = (char *)(entry + 1);
                read_extent_buffer(leaf, name_ptr, (unsigned long)(di + 1),
                                   name_len);
-               entry->type = btrfs_filetype_table[btrfs_dir_type(leaf, di)];
+               put_unaligned(btrfs_filetype_table[btrfs_dir_type(leaf, di)],
+                               &entry->type);
                btrfs_dir_item_key_to_cpu(leaf, di, &location);
-               entry->ino = location.objectid;
-               entry->offset = found_key.offset;
+               put_unaligned(location.objectid, &entry->ino);
+               put_unaligned(found_key.offset, &entry->offset);
                entries++;
                addr += sizeof(struct dir_entry) + name_len;
                total_len += sizeof(struct dir_entry) + name_len;
index 124276bba8cf310074e9b1d1134c6463e3a98853..21a831d3d087418bc2e4db884f4fd6a25e9df4e5 100644 (file)
@@ -189,9 +189,10 @@ void btrfs_print_leaf(struct extent_buffer *l)
        fs_info = l->fs_info;
        nr = btrfs_header_nritems(l);
 
-       btrfs_info(fs_info, "leaf %llu total ptrs %d free space %d",
-                  btrfs_header_bytenr(l), nr,
-                  btrfs_leaf_free_space(fs_info, l));
+       btrfs_info(fs_info,
+                  "leaf %llu gen %llu total ptrs %d free space %d owner %llu",
+                  btrfs_header_bytenr(l), btrfs_header_generation(l), nr,
+                  btrfs_leaf_free_space(fs_info, l), btrfs_header_owner(l));
        for (i = 0 ; i < nr ; i++) {
                item = btrfs_item_nr(i);
                btrfs_item_key_to_cpu(l, &key, i);
@@ -325,7 +326,7 @@ void btrfs_print_leaf(struct extent_buffer *l)
        }
 }
 
-void btrfs_print_tree(struct extent_buffer *c)
+void btrfs_print_tree(struct extent_buffer *c, bool follow)
 {
        struct btrfs_fs_info *fs_info;
        int i; u32 nr;
@@ -342,15 +343,19 @@ void btrfs_print_tree(struct extent_buffer *c)
                return;
        }
        btrfs_info(fs_info,
-                  "node %llu level %d total ptrs %d free spc %u",
-                  btrfs_header_bytenr(c), level, nr,
-                  (u32)BTRFS_NODEPTRS_PER_BLOCK(fs_info) - nr);
+                  "node %llu level %d gen %llu total ptrs %d free spc %u owner %llu",
+                  btrfs_header_bytenr(c), level, btrfs_header_generation(c),
+                  nr, (u32)BTRFS_NODEPTRS_PER_BLOCK(fs_info) - nr,
+                  btrfs_header_owner(c));
        for (i = 0; i < nr; i++) {
                btrfs_node_key_to_cpu(c, &key, i);
-               pr_info("\tkey %d (%llu %u %llu) block %llu\n",
+               pr_info("\tkey %d (%llu %u %llu) block %llu gen %llu\n",
                       i, key.objectid, key.type, key.offset,
-                      btrfs_node_blockptr(c, i));
+                      btrfs_node_blockptr(c, i),
+                      btrfs_node_ptr_generation(c, i));
        }
+       if (!follow)
+               return;
        for (i = 0; i < nr; i++) {
                struct btrfs_key first_key;
                struct extent_buffer *next;
@@ -372,7 +377,7 @@ void btrfs_print_tree(struct extent_buffer *c)
                if (btrfs_header_level(next) !=
                       level - 1)
                        BUG();
-               btrfs_print_tree(next);
+               btrfs_print_tree(next, follow);
                free_extent_buffer(next);
        }
 }
index 4a98481688f4398e728886be775a0a2a128d8044..e6bb38fd75ad88b38681055c542c73c0c76ba132 100644 (file)
@@ -7,6 +7,6 @@
 #define BTRFS_PRINT_TREE_H
 
 void btrfs_print_leaf(struct extent_buffer *l);
-void btrfs_print_tree(struct extent_buffer *c);
+void btrfs_print_tree(struct extent_buffer *c, bool follow);
 
 #endif
index 53a8c95828e33a537525a193dfaf20e030505bbc..dc6140013ae8194739a8aa6a387f12c35794bdf9 100644 (file)
@@ -380,6 +380,7 @@ static int prop_compression_apply(struct inode *inode,
                                  const char *value,
                                  size_t len)
 {
+       struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
        int type;
 
        if (len == 0) {
@@ -390,14 +391,17 @@ static int prop_compression_apply(struct inode *inode,
                return 0;
        }
 
-       if (!strncmp("lzo", value, 3))
+       if (!strncmp("lzo", value, 3)) {
                type = BTRFS_COMPRESS_LZO;
-       else if (!strncmp("zlib", value, 4))
+               btrfs_set_fs_incompat(fs_info, COMPRESS_LZO);
+       } else if (!strncmp("zlib", value, 4)) {
                type = BTRFS_COMPRESS_ZLIB;
-       else if (!strncmp("zstd", value, len))
+       } else if (!strncmp("zstd", value, len)) {
                type = BTRFS_COMPRESS_ZSTD;
-       else
+               btrfs_set_fs_incompat(fs_info, COMPRESS_ZSTD);
+       } else {
                return -EINVAL;
+       }
 
        BTRFS_I(inode)->flags &= ~BTRFS_INODE_NOCOMPRESS;
        BTRFS_I(inode)->flags |= BTRFS_INODE_COMPRESS;
index 09c7e4fd550f4d10b8febcc745e0ba4138d9f35b..9fb758d5077a8441e994df53652fe32f771c2a49 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/slab.h>
 #include <linux/workqueue.h>
 #include <linux/btrfs.h>
+#include <linux/sizes.h>
 
 #include "ctree.h"
 #include "transaction.h"
@@ -2375,8 +2376,21 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
        return ret;
 }
 
-static bool qgroup_check_limits(const struct btrfs_qgroup *qg, u64 num_bytes)
+/*
+ * Two limits to commit transaction in advance.
+ *
+ * For RATIO, it will be 1/RATIO of the remaining limit
+ * (excluding data and prealloc meta) as threshold.
+ * For SIZE, it will be in byte unit as threshold.
+ */
+#define QGROUP_PERTRANS_RATIO          32
+#define QGROUP_PERTRANS_SIZE           SZ_32M
+static bool qgroup_check_limits(struct btrfs_fs_info *fs_info,
+                               const struct btrfs_qgroup *qg, u64 num_bytes)
 {
+       u64 limit;
+       u64 threshold;
+
        if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) &&
            qgroup_rsv_total(qg) + (s64)qg->rfer + num_bytes > qg->max_rfer)
                return false;
@@ -2385,6 +2399,31 @@ static bool qgroup_check_limits(const struct btrfs_qgroup *qg, u64 num_bytes)
            qgroup_rsv_total(qg) + (s64)qg->excl + num_bytes > qg->max_excl)
                return false;
 
+       /*
+        * Even if we passed the check, it's better to check if reservation
+        * for meta_pertrans is pushing us near limit.
+        * If there is too much pertrans reservation or it's near the limit,
+        * let's try commit transaction to free some, using transaction_kthread
+        */
+       if ((qg->lim_flags & (BTRFS_QGROUP_LIMIT_MAX_RFER |
+                             BTRFS_QGROUP_LIMIT_MAX_EXCL))) {
+               if (qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL)
+                       limit = qg->max_excl;
+               else
+                       limit = qg->max_rfer;
+               threshold = (limit - qg->rsv.values[BTRFS_QGROUP_RSV_DATA] -
+                           qg->rsv.values[BTRFS_QGROUP_RSV_META_PREALLOC]) /
+                           QGROUP_PERTRANS_RATIO;
+               threshold = min_t(u64, threshold, QGROUP_PERTRANS_SIZE);
+
+               /*
+                * Use transaction_kthread to commit transaction, so we no
+                * longer need to bother nested transaction nor lock context.
+                */
+               if (qg->rsv.values[BTRFS_QGROUP_RSV_META_PERTRANS] > threshold)
+                       btrfs_commit_transaction_locksafe(fs_info);
+       }
+
        return true;
 }
 
@@ -2434,7 +2473,7 @@ static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce,
 
                qg = unode_aux_to_qgroup(unode);
 
-               if (enforce && !qgroup_check_limits(qg, num_bytes)) {
+               if (enforce && !qgroup_check_limits(fs_info, qg, num_bytes)) {
                        ret = -EDQUOT;
                        goto out;
                }
index 00b7d3231821eba9e352381c5b7508df9bd4e226..b041b945a7ae81d8d49272871f8c0009d3ea4ae2 100644 (file)
@@ -1841,7 +1841,7 @@ int replace_path(struct btrfs_trans_handle *trans,
                old_bytenr = btrfs_node_blockptr(parent, slot);
                blocksize = fs_info->nodesize;
                old_ptr_gen = btrfs_node_ptr_generation(parent, slot);
-               btrfs_node_key_to_cpu(parent, &key, slot);
+               btrfs_node_key_to_cpu(parent, &first_key, slot);
 
                if (level <= max_level) {
                        eb = path->nodes[level];
index 221e5cdb060be341d86cb0bb792b8601a5a81437..c0074d2d7d6d2788d1909f5a945addaa7fc065c8 100644 (file)
@@ -5236,6 +5236,10 @@ static int send_write_or_clone(struct send_ctx *sctx,
                len = btrfs_file_extent_num_bytes(path->nodes[0], ei);
        }
 
+       if (offset >= sctx->cur_inode_size) {
+               ret = 0;
+               goto out;
+       }
        if (offset + len > sctx->cur_inode_size)
                len = sctx->cur_inode_size - offset;
        if (len == 0) {
index 63fdcab64b016061c9d42cc86f47e8079f3b4634..c944b4769e3c7c5a966e421b5c9c9b4635cab2ac 100644 (file)
@@ -2267,6 +2267,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
         */
        cur_trans->state = TRANS_STATE_COMPLETED;
        wake_up(&cur_trans->commit_wait);
+       clear_bit(BTRFS_FS_NEED_ASYNC_COMMIT, &fs_info->flags);
 
        spin_lock(&fs_info->trans_lock);
        list_del_init(&cur_trans->list);
index c88fccd80bc5c940228a00cc2ce1f88e3d1c9806..d8c0826bc2c7e9aee6ff263884e4621d492f9c1f 100644 (file)
@@ -199,6 +199,20 @@ int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root);
 int btrfs_commit_transaction(struct btrfs_trans_handle *trans);
 int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
                                   int wait_for_unblock);
+
+/*
+ * Try to commit transaction asynchronously, so this is safe to call
+ * even holding a spinlock.
+ *
+ * It's done by informing transaction_kthread to commit transaction without
+ * waiting for commit interval.
+ */
+static inline void btrfs_commit_transaction_locksafe(
+               struct btrfs_fs_info *fs_info)
+{
+       set_bit(BTRFS_FS_NEED_ASYNC_COMMIT, &fs_info->flags);
+       wake_up_process(fs_info->transaction_kthread);
+}
 int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans);
 int btrfs_should_end_transaction(struct btrfs_trans_handle *trans);
 void btrfs_throttle(struct btrfs_fs_info *fs_info);
index 43758e30aa7a97cb240c7b12146f040f5ce8d3d9..8f23a94dab770c00933f39a4ed1fb845edac22dd 100644 (file)
@@ -4320,6 +4320,110 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
        return ret;
 }
 
+/*
+ * Log all prealloc extents beyond the inode's i_size to make sure we do not
+ * lose them after doing a fast fsync and replaying the log. We scan the
+ * subvolume's root instead of iterating the inode's extent map tree because
+ * otherwise we can log incorrect extent items based on extent map conversion.
+ * That can happen due to the fact that extent maps are merged when they
+ * are not in the extent map tree's list of modified extents.
+ */
+static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans,
+                                     struct btrfs_inode *inode,
+                                     struct btrfs_path *path)
+{
+       struct btrfs_root *root = inode->root;
+       struct btrfs_key key;
+       const u64 i_size = i_size_read(&inode->vfs_inode);
+       const u64 ino = btrfs_ino(inode);
+       struct btrfs_path *dst_path = NULL;
+       u64 last_extent = (u64)-1;
+       int ins_nr = 0;
+       int start_slot;
+       int ret;
+
+       if (!(inode->flags & BTRFS_INODE_PREALLOC))
+               return 0;
+
+       key.objectid = ino;
+       key.type = BTRFS_EXTENT_DATA_KEY;
+       key.offset = i_size;
+       ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+       if (ret < 0)
+               goto out;
+
+       while (true) {
+               struct extent_buffer *leaf = path->nodes[0];
+               int slot = path->slots[0];
+
+               if (slot >= btrfs_header_nritems(leaf)) {
+                       if (ins_nr > 0) {
+                               ret = copy_items(trans, inode, dst_path, path,
+                                                &last_extent, start_slot,
+                                                ins_nr, 1, 0);
+                               if (ret < 0)
+                                       goto out;
+                               ins_nr = 0;
+                       }
+                       ret = btrfs_next_leaf(root, path);
+                       if (ret < 0)
+                               goto out;
+                       if (ret > 0) {
+                               ret = 0;
+                               break;
+                       }
+                       continue;
+               }
+
+               btrfs_item_key_to_cpu(leaf, &key, slot);
+               if (key.objectid > ino)
+                       break;
+               if (WARN_ON_ONCE(key.objectid < ino) ||
+                   key.type < BTRFS_EXTENT_DATA_KEY ||
+                   key.offset < i_size) {
+                       path->slots[0]++;
+                       continue;
+               }
+               if (last_extent == (u64)-1) {
+                       last_extent = key.offset;
+                       /*
+                        * Avoid logging extent items logged in past fsync calls
+                        * and leading to duplicate keys in the log tree.
+                        */
+                       do {
+                               ret = btrfs_truncate_inode_items(trans,
+                                                        root->log_root,
+                                                        &inode->vfs_inode,
+                                                        i_size,
+                                                        BTRFS_EXTENT_DATA_KEY);
+                       } while (ret == -EAGAIN);
+                       if (ret)
+                               goto out;
+               }
+               if (ins_nr == 0)
+                       start_slot = slot;
+               ins_nr++;
+               path->slots[0]++;
+               if (!dst_path) {
+                       dst_path = btrfs_alloc_path();
+                       if (!dst_path) {
+                               ret = -ENOMEM;
+                               goto out;
+                       }
+               }
+       }
+       if (ins_nr > 0) {
+               ret = copy_items(trans, inode, dst_path, path, &last_extent,
+                                start_slot, ins_nr, 1, 0);
+               if (ret > 0)
+                       ret = 0;
+       }
+out:
+       btrfs_release_path(path);
+       btrfs_free_path(dst_path);
+       return ret;
+}
+
 static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
                                     struct btrfs_root *root,
                                     struct btrfs_inode *inode,
@@ -4362,6 +4466,11 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
                if (em->generation <= test_gen)
                        continue;
 
+               /* We log prealloc extents beyond eof later. */
+               if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) &&
+                   em->start >= i_size_read(&inode->vfs_inode))
+                       continue;
+
                if (em->start < logged_start)
                        logged_start = em->start;
                if ((em->start + em->len - 1) > logged_end)
@@ -4374,31 +4483,6 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
                num++;
        }
 
-       /*
-        * Add all prealloc extents beyond the inode's i_size to make sure we
-        * don't lose them after doing a fast fsync and replaying the log.
-        */
-       if (inode->flags & BTRFS_INODE_PREALLOC) {
-               struct rb_node *node;
-
-               for (node = rb_last(&tree->map); node; node = rb_prev(node)) {
-                       em = rb_entry(node, struct extent_map, rb_node);
-                       if (em->start < i_size_read(&inode->vfs_inode))
-                               break;
-                       if (!list_empty(&em->list))
-                               continue;
-                       /* Same as above loop. */
-                       if (++num > 32768) {
-                               list_del_init(&tree->modified_extents);
-                               ret = -EFBIG;
-                               goto process;
-                       }
-                       refcount_inc(&em->refs);
-                       set_bit(EXTENT_FLAG_LOGGING, &em->flags);
-                       list_add_tail(&em->list, &extents);
-               }
-       }
-
        list_sort(NULL, &extents, extent_cmp);
        btrfs_get_logged_extents(inode, logged_list, logged_start, logged_end);
        /*
@@ -4443,6 +4527,9 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
        up_write(&inode->dio_sem);
 
        btrfs_release_path(path);
+       if (!ret)
+               ret = btrfs_log_prealloc_extents(trans, inode, path);
+
        return ret;
 }
 
@@ -4827,6 +4914,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
        struct extent_map_tree *em_tree = &inode->extent_tree;
        u64 logged_isize = 0;
        bool need_log_inode_item = true;
+       bool xattrs_logged = false;
 
        path = btrfs_alloc_path();
        if (!path)
@@ -5128,6 +5216,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
        err = btrfs_log_all_xattrs(trans, root, inode, path, dst_path);
        if (err)
                goto out_unlock;
+       xattrs_logged = true;
        if (max_key.type >= BTRFS_EXTENT_DATA_KEY && !fast_search) {
                btrfs_release_path(path);
                btrfs_release_path(dst_path);
@@ -5140,6 +5229,11 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
        btrfs_release_path(dst_path);
        if (need_log_inode_item) {
                err = log_inode_item(trans, log, dst_path, inode);
+               if (!err && !xattrs_logged) {
+                       err = btrfs_log_all_xattrs(trans, root, inode, path,
+                                                  dst_path);
+                       btrfs_release_path(path);
+               }
                if (err)
                        goto out_unlock;
        }
index 292266f6ab9c9d8dfa18422998ec1884ae57369d..be3fc701f38948e37e5d776ee93413e0d06e85ff 100644 (file)
@@ -4052,6 +4052,15 @@ int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info)
                return 0;
        }
 
+       /*
+        * A ro->rw remount sequence should continue with the paused balance
+        * regardless of who pauses it, system or the user as of now, so set
+        * the resume flag.
+        */
+       spin_lock(&fs_info->balance_lock);
+       fs_info->balance_ctl->flags |= BTRFS_BALANCE_RESUME;
+       spin_unlock(&fs_info->balance_lock);
+
        tsk = kthread_run(balance_kthread, fs_info, "btrfs-balance");
        return PTR_ERR_OR_ZERO(tsk);
 }
index f85040d73e3dcaa0214196ca0c242eadd0db475d..cf0e45b10121aa8323ca7d6a86a109d6f8675c08 100644 (file)
@@ -70,69 +70,104 @@ static __le32 ceph_flags_sys2wire(u32 flags)
  */
 
 /*
- * Calculate the length sum of direct io vectors that can
- * be combined into one page vector.
+ * How many pages to get in one call to iov_iter_get_pages().  This
+ * determines the size of the on-stack array used as a buffer.
  */
-static size_t dio_get_pagev_size(const struct iov_iter *it)
+#define ITER_GET_BVECS_PAGES   64
+
+static ssize_t __iter_get_bvecs(struct iov_iter *iter, size_t maxsize,
+                               struct bio_vec *bvecs)
 {
-    const struct iovec *iov = it->iov;
-    const struct iovec *iovend = iov + it->nr_segs;
-    size_t size;
-
-    size = iov->iov_len - it->iov_offset;
-    /*
-     * An iov can be page vectored when both the current tail
-     * and the next base are page aligned.
-     */
-    while (PAGE_ALIGNED((iov->iov_base + iov->iov_len)) &&
-           (++iov < iovend && PAGE_ALIGNED((iov->iov_base)))) {
-        size += iov->iov_len;
-    }
-    dout("dio_get_pagevlen len = %zu\n", size);
-    return size;
+       size_t size = 0;
+       int bvec_idx = 0;
+
+       if (maxsize > iov_iter_count(iter))
+               maxsize = iov_iter_count(iter);
+
+       while (size < maxsize) {
+               struct page *pages[ITER_GET_BVECS_PAGES];
+               ssize_t bytes;
+               size_t start;
+               int idx = 0;
+
+               bytes = iov_iter_get_pages(iter, pages, maxsize - size,
+                                          ITER_GET_BVECS_PAGES, &start);
+               if (bytes < 0)
+                       return size ?: bytes;
+
+               iov_iter_advance(iter, bytes);
+               size += bytes;
+
+               for ( ; bytes; idx++, bvec_idx++) {
+                       struct bio_vec bv = {
+                               .bv_page = pages[idx],
+                               .bv_len = min_t(int, bytes, PAGE_SIZE - start),
+                               .bv_offset = start,
+                       };
+
+                       bvecs[bvec_idx] = bv;
+                       bytes -= bv.bv_len;
+                       start = 0;
+               }
+       }
+
+       return size;
 }
 
 /*
- * Allocate a page vector based on (@it, @nbytes).
- * The return value is the tuple describing a page vector,
- * that is (@pages, @page_align, @num_pages).
+ * iov_iter_get_pages() only considers one iov_iter segment, no matter
+ * what maxsize or maxpages are given.  For ITER_BVEC that is a single
+ * page.
+ *
+ * Attempt to get up to @maxsize bytes worth of pages from @iter.
+ * Return the number of bytes in the created bio_vec array, or an error.
  */
-static struct page **
-dio_get_pages_alloc(const struct iov_iter *it, size_t nbytes,
-                   size_t *page_align, int *num_pages)
+static ssize_t iter_get_bvecs_alloc(struct iov_iter *iter, size_t maxsize,
+                                   struct bio_vec **bvecs, int *num_bvecs)
 {
-       struct iov_iter tmp_it = *it;
-       size_t align;
-       struct page **pages;
-       int ret = 0, idx, npages;
+       struct bio_vec *bv;
+       size_t orig_count = iov_iter_count(iter);
+       ssize_t bytes;
+       int npages;
 
-       align = (unsigned long)(it->iov->iov_base + it->iov_offset) &
-               (PAGE_SIZE - 1);
-       npages = calc_pages_for(align, nbytes);
-       pages = kvmalloc(sizeof(*pages) * npages, GFP_KERNEL);
-       if (!pages)
-               return ERR_PTR(-ENOMEM);
+       iov_iter_truncate(iter, maxsize);
+       npages = iov_iter_npages(iter, INT_MAX);
+       iov_iter_reexpand(iter, orig_count);
 
-       for (idx = 0; idx < npages; ) {
-               size_t start;
-               ret = iov_iter_get_pages(&tmp_it, pages + idx, nbytes,
-                                        npages - idx, &start);
-               if (ret < 0)
-                       goto fail;
+       /*
+        * __iter_get_bvecs() may populate only part of the array -- zero it
+        * out.
+        */
+       bv = kvmalloc_array(npages, sizeof(*bv), GFP_KERNEL | __GFP_ZERO);
+       if (!bv)
+               return -ENOMEM;
 
-               iov_iter_advance(&tmp_it, ret);
-               nbytes -= ret;
-               idx += (ret + start + PAGE_SIZE - 1) / PAGE_SIZE;
+       bytes = __iter_get_bvecs(iter, maxsize, bv);
+       if (bytes < 0) {
+               /*
+                * No pages were pinned -- just free the array.
+                */
+               kvfree(bv);
+               return bytes;
        }
 
-       BUG_ON(nbytes != 0);
-       *num_pages = npages;
-       *page_align = align;
-       dout("dio_get_pages_alloc: got %d pages align %zu\n", npages, align);
-       return pages;
-fail:
-       ceph_put_page_vector(pages, idx, false);
-       return ERR_PTR(ret);
+       *bvecs = bv;
+       *num_bvecs = npages;
+       return bytes;
+}
+
+static void put_bvecs(struct bio_vec *bvecs, int num_bvecs, bool should_dirty)
+{
+       int i;
+
+       for (i = 0; i < num_bvecs; i++) {
+               if (bvecs[i].bv_page) {
+                       if (should_dirty)
+                               set_page_dirty_lock(bvecs[i].bv_page);
+                       put_page(bvecs[i].bv_page);
+               }
+       }
+       kvfree(bvecs);
 }
 
 /*
@@ -746,11 +781,12 @@ static void ceph_aio_complete_req(struct ceph_osd_request *req)
        struct inode *inode = req->r_inode;
        struct ceph_aio_request *aio_req = req->r_priv;
        struct ceph_osd_data *osd_data = osd_req_op_extent_osd_data(req, 0);
-       int num_pages = calc_pages_for((u64)osd_data->alignment,
-                                      osd_data->length);
 
-       dout("ceph_aio_complete_req %p rc %d bytes %llu\n",
-            inode, rc, osd_data->length);
+       BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_BVECS);
+       BUG_ON(!osd_data->num_bvecs);
+
+       dout("ceph_aio_complete_req %p rc %d bytes %u\n",
+            inode, rc, osd_data->bvec_pos.iter.bi_size);
 
        if (rc == -EOLDSNAPC) {
                struct ceph_aio_work *aio_work;
@@ -768,9 +804,10 @@ static void ceph_aio_complete_req(struct ceph_osd_request *req)
        } else if (!aio_req->write) {
                if (rc == -ENOENT)
                        rc = 0;
-               if (rc >= 0 && osd_data->length > rc) {
-                       int zoff = osd_data->alignment + rc;
-                       int zlen = osd_data->length - rc;
+               if (rc >= 0 && osd_data->bvec_pos.iter.bi_size > rc) {
+                       struct iov_iter i;
+                       int zlen = osd_data->bvec_pos.iter.bi_size - rc;
+
                        /*
                         * If read is satisfied by single OSD request,
                         * it can pass EOF. Otherwise read is within
@@ -785,13 +822,16 @@ static void ceph_aio_complete_req(struct ceph_osd_request *req)
                                aio_req->total_len = rc + zlen;
                        }
 
-                       if (zlen > 0)
-                               ceph_zero_page_vector_range(zoff, zlen,
-                                                           osd_data->pages);
+                       iov_iter_bvec(&i, ITER_BVEC, osd_data->bvec_pos.bvecs,
+                                     osd_data->num_bvecs,
+                                     osd_data->bvec_pos.iter.bi_size);
+                       iov_iter_advance(&i, rc);
+                       iov_iter_zero(zlen, &i);
                }
        }
 
-       ceph_put_page_vector(osd_data->pages, num_pages, aio_req->should_dirty);
+       put_bvecs(osd_data->bvec_pos.bvecs, osd_data->num_bvecs,
+                 aio_req->should_dirty);
        ceph_osdc_put_request(req);
 
        if (rc < 0)
@@ -879,7 +919,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
        struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
        struct ceph_vino vino;
        struct ceph_osd_request *req;
-       struct page **pages;
+       struct bio_vec *bvecs;
        struct ceph_aio_request *aio_req = NULL;
        int num_pages = 0;
        int flags;
@@ -914,10 +954,14 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
        }
 
        while (iov_iter_count(iter) > 0) {
-               u64 size = dio_get_pagev_size(iter);
-               size_t start = 0;
+               u64 size = iov_iter_count(iter);
                ssize_t len;
 
+               if (write)
+                       size = min_t(u64, size, fsc->mount_options->wsize);
+               else
+                       size = min_t(u64, size, fsc->mount_options->rsize);
+
                vino = ceph_vino(inode);
                req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
                                            vino, pos, &size, 0,
@@ -933,18 +977,14 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
                        break;
                }
 
-               if (write)
-                       size = min_t(u64, size, fsc->mount_options->wsize);
-               else
-                       size = min_t(u64, size, fsc->mount_options->rsize);
-
-               len = size;
-               pages = dio_get_pages_alloc(iter, len, &start, &num_pages);
-               if (IS_ERR(pages)) {
+               len = iter_get_bvecs_alloc(iter, size, &bvecs, &num_pages);
+               if (len < 0) {
                        ceph_osdc_put_request(req);
-                       ret = PTR_ERR(pages);
+                       ret = len;
                        break;
                }
+               if (len != size)
+                       osd_req_op_extent_update(req, 0, len);
 
                /*
                 * To simplify error handling, allow AIO when IO within i_size
@@ -977,8 +1017,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
                        req->r_mtime = mtime;
                }
 
-               osd_req_op_extent_osd_data_pages(req, 0, pages, len, start,
-                                                false, false);
+               osd_req_op_extent_osd_data_bvecs(req, 0, bvecs, num_pages, len);
 
                if (aio_req) {
                        aio_req->total_len += len;
@@ -991,7 +1030,6 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
                        list_add_tail(&req->r_unsafe_item, &aio_req->osd_reqs);
 
                        pos += len;
-                       iov_iter_advance(iter, len);
                        continue;
                }
 
@@ -1004,25 +1042,26 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
                        if (ret == -ENOENT)
                                ret = 0;
                        if (ret >= 0 && ret < len && pos + ret < size) {
+                               struct iov_iter i;
                                int zlen = min_t(size_t, len - ret,
                                                 size - pos - ret);
-                               ceph_zero_page_vector_range(start + ret, zlen,
-                                                           pages);
+
+                               iov_iter_bvec(&i, ITER_BVEC, bvecs, num_pages,
+                                             len);
+                               iov_iter_advance(&i, ret);
+                               iov_iter_zero(zlen, &i);
                                ret += zlen;
                        }
                        if (ret >= 0)
                                len = ret;
                }
 
-               ceph_put_page_vector(pages, num_pages, should_dirty);
-
+               put_bvecs(bvecs, num_pages, should_dirty);
                ceph_osdc_put_request(req);
                if (ret < 0)
                        break;
 
                pos += len;
-               iov_iter_advance(iter, len);
-
                if (!write && pos >= size)
                        break;
 
index 7e72348639e4bcbf523ec4bec3a589fbcd643c7c..315f7e63e7cca3b64eed882e8dc0c5fefbebe645 100644 (file)
@@ -228,7 +228,15 @@ static size_t ceph_vxattrcb_dir_rctime(struct ceph_inode_info *ci, char *val,
 
 static bool ceph_vxattrcb_quota_exists(struct ceph_inode_info *ci)
 {
-       return (ci->i_max_files || ci->i_max_bytes);
+       bool ret = false;
+       spin_lock(&ci->i_ceph_lock);
+       if ((ci->i_max_files || ci->i_max_bytes) &&
+           ci->i_vino.snap == CEPH_NOSNAP &&
+           ci->i_snap_realm &&
+           ci->i_snap_realm->ino == ci->i_vino.ino)
+               ret = true;
+       spin_unlock(&ci->i_ceph_lock);
+       return ret;
 }
 
 static size_t ceph_vxattrcb_quota(struct ceph_inode_info *ci, char *val,
@@ -1008,14 +1016,19 @@ int __ceph_setxattr(struct inode *inode, const char *name,
        char *newval = NULL;
        struct ceph_inode_xattr *xattr = NULL;
        int required_blob_size;
+       bool check_realm = false;
        bool lock_snap_rwsem = false;
 
        if (ceph_snap(inode) != CEPH_NOSNAP)
                return -EROFS;
 
        vxattr = ceph_match_vxattr(inode, name);
-       if (vxattr && vxattr->readonly)
-               return -EOPNOTSUPP;
+       if (vxattr) {
+               if (vxattr->readonly)
+                       return -EOPNOTSUPP;
+               if (value && !strncmp(vxattr->name, "ceph.quota", 10))
+                       check_realm = true;
+       }
 
        /* pass any unhandled ceph.* xattrs through to the MDS */
        if (!strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN))
@@ -1109,6 +1122,15 @@ int __ceph_setxattr(struct inode *inode, const char *name,
                err = -EBUSY;
        } else {
                err = ceph_sync_setxattr(inode, name, value, size, flags);
+               if (err >= 0 && check_realm) {
+                       /* check if snaprealm was created for quota inode */
+                       spin_lock(&ci->i_ceph_lock);
+                       if ((ci->i_max_files || ci->i_max_bytes) &&
+                           !(ci->i_snap_realm &&
+                             ci->i_snap_realm->ino == ci->i_vino.ino))
+                               err = -EOPNOTSUPP;
+                       spin_unlock(&ci->i_ceph_lock);
+               }
        }
 out:
        ceph_free_cap_flush(prealloc_cf);
index 741749a986142c5b1c2d47dfd6793ee05989ab09..5f132d59dfc2668cd0d737eae36b03f30226dc7a 100644 (file)
@@ -197,7 +197,7 @@ config CIFS_SMB311
 
 config CIFS_SMB_DIRECT
        bool "SMB Direct support (Experimental)"
-       depends on CIFS=m && INFINIBAND || CIFS=y && INFINIBAND=y
+       depends on CIFS=m && INFINIBAND && INFINIBAND_ADDR_TRANS || CIFS=y && INFINIBAND=y && INFINIBAND_ADDR_TRANS=y
        help
          Enables SMB Direct experimental support for SMB 3.0, 3.02 and 3.1.1.
          SMB Direct allows transferring SMB packets over RDMA. If unsure,
index fe5567655662cefc4edb7b52d071012e3b355f02..0e74690d11bc8e33c0472565860043175dc25995 100644 (file)
@@ -54,7 +54,7 @@ do {                                                          \
                pr_debug_ ## ratefunc("%s: "                    \
                                fmt, __FILE__, ##__VA_ARGS__);  \
        } else if ((type) & VFS) {                              \
-               pr_err_ ## ratefunc("CuIFS VFS: "               \
+               pr_err_ ## ratefunc("CIFS VFS: "                \
                                 fmt, ##__VA_ARGS__);           \
        } else if ((type) & NOISY && (NOISY != 0)) {            \
                pr_debug_ ## ratefunc(fmt, ##__VA_ARGS__);      \
index f715609b13f34a412de9f0486750a05cf73340b6..5a5a0158cc8f5085cda312d25c10b21e49bd8d1d 100644 (file)
@@ -1047,6 +1047,18 @@ ssize_t cifs_file_copychunk_range(unsigned int xid,
        return rc;
 }
 
+/*
+ * Directory operations under CIFS/SMB2/SMB3 are synchronous, so fsync()
+ * is a dummy operation.
+ */
+static int cifs_dir_fsync(struct file *file, loff_t start, loff_t end, int datasync)
+{
+       cifs_dbg(FYI, "Sync directory - name: %pD datasync: 0x%x\n",
+                file, datasync);
+
+       return 0;
+}
+
 static ssize_t cifs_copy_file_range(struct file *src_file, loff_t off,
                                struct file *dst_file, loff_t destoff,
                                size_t len, unsigned int flags)
@@ -1181,6 +1193,7 @@ const struct file_operations cifs_dir_ops = {
        .copy_file_range = cifs_copy_file_range,
        .clone_file_range = cifs_clone_file_range,
        .llseek = generic_file_llseek,
+       .fsync = cifs_dir_fsync,
 };
 
 static void
index 6d3e40d7029c47c6abd7264e8a8aa0cf526f4ee5..1529a088383d048554cfd129de7b8898aa12f6e1 100644 (file)
@@ -455,6 +455,9 @@ cifs_enable_signing(struct TCP_Server_Info *server, bool mnt_sign_required)
                server->sign = true;
        }
 
+       if (cifs_rdma_enabled(server) && server->sign)
+               cifs_dbg(VFS, "Signing is enabled, and RDMA read/write will be disabled");
+
        return 0;
 }
 
index e8830f076a7f16a056f7eec3cf025e0e30bff9e8..7a10a5d0731f078247d5636d3e00aae169b48345 100644 (file)
@@ -1977,14 +1977,6 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
                goto cifs_parse_mount_err;
        }
 
-#ifdef CONFIG_CIFS_SMB_DIRECT
-       if (vol->rdma && vol->sign) {
-               cifs_dbg(VFS, "Currently SMB direct doesn't support signing."
-                       " This is being fixed\n");
-               goto cifs_parse_mount_err;
-       }
-#endif
-
 #ifndef CONFIG_KEYS
        /* Muliuser mounts require CONFIG_KEYS support */
        if (vol->multiuser) {
@@ -2959,6 +2951,22 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info)
                }
        }
 
+       if (volume_info->seal) {
+               if (ses->server->vals->protocol_id == 0) {
+                       cifs_dbg(VFS,
+                                "SMB3 or later required for encryption\n");
+                       rc = -EOPNOTSUPP;
+                       goto out_fail;
+               } else if (tcon->ses->server->capabilities &
+                                       SMB2_GLOBAL_CAP_ENCRYPTION)
+                       tcon->seal = true;
+               else {
+                       cifs_dbg(VFS, "Encryption is not supported on share\n");
+                       rc = -EOPNOTSUPP;
+                       goto out_fail;
+               }
+       }
+
        /*
         * BB Do we need to wrap session_mutex around this TCon call and Unix
         * SetFS as we do on SessSetup and reconnect?
@@ -3007,22 +3015,6 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info)
                tcon->use_resilient = true;
        }
 
-       if (volume_info->seal) {
-               if (ses->server->vals->protocol_id == 0) {
-                       cifs_dbg(VFS,
-                                "SMB3 or later required for encryption\n");
-                       rc = -EOPNOTSUPP;
-                       goto out_fail;
-               } else if (tcon->ses->server->capabilities &
-                                       SMB2_GLOBAL_CAP_ENCRYPTION)
-                       tcon->seal = true;
-               else {
-                       cifs_dbg(VFS, "Encryption is not supported on share\n");
-                       rc = -EOPNOTSUPP;
-                       goto out_fail;
-               }
-       }
-
        /*
         * We can have only one retry value for a connection to a share so for
         * resources mounted more than once to the same server share the last
index 81ba6e0d88d8f3ec1d1c8b2e81e695819537c921..925844343038aaa04b479fcdd2063c9ddfb811a0 100644 (file)
@@ -684,6 +684,9 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, umode_t mode,
                goto mknod_out;
        }
 
+       if (!S_ISCHR(mode) && !S_ISBLK(mode))
+               goto mknod_out;
+
        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL))
                goto mknod_out;
 
@@ -692,10 +695,8 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, umode_t mode,
 
        buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
        if (buf == NULL) {
-               kfree(full_path);
                rc = -ENOMEM;
-               free_xid(xid);
-               return rc;
+               goto mknod_out;
        }
 
        if (backup_cred(cifs_sb))
@@ -742,7 +743,7 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, umode_t mode,
                pdev->minor = cpu_to_le64(MINOR(device_number));
                rc = tcon->ses->server->ops->sync_write(xid, &fid, &io_parms,
                                                        &bytes_written, iov, 1);
-       } /* else if (S_ISFIFO) */
+       }
        tcon->ses->server->ops->close(xid, tcon, &fid);
        d_drop(direntry);
 
index 4bcd4e838b475071f6e054ab9c89891dbef3d6fe..23fd430fe74a6f111b5832cce72a1bc212c4ac96 100644 (file)
@@ -3462,7 +3462,7 @@ cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
  * If the page is mmap'ed into a process' page tables, then we need to make
  * sure that it doesn't change while being written back.
  */
-static int
+static vm_fault_t
 cifs_page_mkwrite(struct vm_fault *vmf)
 {
        struct page *page = vmf->page;
index b4ae932ea13448aa31509142c72ec71a360eb967..9c6d95ffca97de94d0763d7fde9b952f4a26fb3d 100644 (file)
@@ -252,9 +252,14 @@ smb2_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *volume_info)
        wsize = volume_info->wsize ? volume_info->wsize : CIFS_DEFAULT_IOSIZE;
        wsize = min_t(unsigned int, wsize, server->max_write);
 #ifdef CONFIG_CIFS_SMB_DIRECT
-       if (server->rdma)
-               wsize = min_t(unsigned int,
+       if (server->rdma) {
+               if (server->sign)
+                       wsize = min_t(unsigned int,
+                               wsize, server->smbd_conn->max_fragmented_send_size);
+               else
+                       wsize = min_t(unsigned int,
                                wsize, server->smbd_conn->max_readwrite_size);
+       }
 #endif
        if (!(server->capabilities & SMB2_GLOBAL_CAP_LARGE_MTU))
                wsize = min_t(unsigned int, wsize, SMB2_MAX_BUFFER_SIZE);
@@ -272,9 +277,14 @@ smb2_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *volume_info)
        rsize = volume_info->rsize ? volume_info->rsize : CIFS_DEFAULT_IOSIZE;
        rsize = min_t(unsigned int, rsize, server->max_read);
 #ifdef CONFIG_CIFS_SMB_DIRECT
-       if (server->rdma)
-               rsize = min_t(unsigned int,
+       if (server->rdma) {
+               if (server->sign)
+                       rsize = min_t(unsigned int,
+                               rsize, server->smbd_conn->max_fragmented_recv_size);
+               else
+                       rsize = min_t(unsigned int,
                                rsize, server->smbd_conn->max_readwrite_size);
+       }
 #endif
 
        if (!(server->capabilities & SMB2_GLOBAL_CAP_LARGE_MTU))
@@ -579,9 +589,15 @@ smb2_query_eas(const unsigned int xid, struct cifs_tcon *tcon,
 
        SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid);
 
+       /*
+        * If ea_name is NULL (listxattr) and there are no EAs, return 0 as it's
+        * not an error. Otherwise, the specified ea_name was not found.
+        */
        if (!rc)
                rc = move_smb2_ea_to_cifs(ea_data, buf_size, smb2_data,
                                          SMB2_MAX_EA_BUF, ea_name);
+       else if (!ea_name && rc == -ENODATA)
+               rc = 0;
 
        kfree(smb2_data);
        return rc;
@@ -1452,7 +1468,7 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon,
        struct cifs_open_parms oparms;
        struct cifs_fid fid;
        struct kvec err_iov = {NULL, 0};
-       struct smb2_err_rsp *err_buf = NULL;
+       struct smb2_err_rsp *err_buf;
        struct smb2_symlink_err_rsp *symlink;
        unsigned int sub_len;
        unsigned int sub_offset;
@@ -1476,7 +1492,7 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon,
 
        rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, &err_iov);
 
-       if (!rc || !err_buf) {
+       if (!rc || !err_iov.iov_base) {
                kfree(utf16_path);
                return -ENOENT;
        }
index 0f044c4a2dc9b5b625e9cae21d34344692b76d23..0f48741a0130d35286fec1e031b5eea45e367699 100644 (file)
@@ -383,10 +383,10 @@ static void
 build_encrypt_ctxt(struct smb2_encryption_neg_context *pneg_ctxt)
 {
        pneg_ctxt->ContextType = SMB2_ENCRYPTION_CAPABILITIES;
-       pneg_ctxt->DataLength = cpu_to_le16(6);
-       pneg_ctxt->CipherCount = cpu_to_le16(2);
-       pneg_ctxt->Ciphers[0] = SMB2_ENCRYPTION_AES128_GCM;
-       pneg_ctxt->Ciphers[1] = SMB2_ENCRYPTION_AES128_CCM;
+       pneg_ctxt->DataLength = cpu_to_le16(4); /* Cipher Count + le16 cipher */
+       pneg_ctxt->CipherCount = cpu_to_le16(1);
+/* pneg_ctxt->Ciphers[0] = SMB2_ENCRYPTION_AES128_GCM;*/ /* not supported yet */
+       pneg_ctxt->Ciphers[0] = SMB2_ENCRYPTION_AES128_CCM;
 }
 
 static void
@@ -444,6 +444,7 @@ static int decode_encrypt_ctx(struct TCP_Server_Info *server,
                return -EINVAL;
        }
        server->cipher_type = ctxt->Ciphers[0];
+       server->capabilities |= SMB2_GLOBAL_CAP_ENCRYPTION;
        return 0;
 }
 
@@ -729,19 +730,14 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
 
 int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon)
 {
-       int rc = 0;
-       struct validate_negotiate_info_req vneg_inbuf;
+       int rc;
+       struct validate_negotiate_info_req *pneg_inbuf;
        struct validate_negotiate_info_rsp *pneg_rsp = NULL;
        u32 rsplen;
        u32 inbuflen; /* max of 4 dialects */
 
        cifs_dbg(FYI, "validate negotiate\n");
 
-#ifdef CONFIG_CIFS_SMB_DIRECT
-       if (tcon->ses->server->rdma)
-               return 0;
-#endif
-
        /* In SMB3.11 preauth integrity supersedes validate negotiate */
        if (tcon->ses->server->dialect == SMB311_PROT_ID)
                return 0;
@@ -764,63 +760,69 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon)
        if (tcon->ses->session_flags & SMB2_SESSION_FLAG_IS_NULL)
                cifs_dbg(VFS, "Unexpected null user (anonymous) auth flag sent by server\n");
 
-       vneg_inbuf.Capabilities =
+       pneg_inbuf = kmalloc(sizeof(*pneg_inbuf), GFP_NOFS);
+       if (!pneg_inbuf)
+               return -ENOMEM;
+
+       pneg_inbuf->Capabilities =
                        cpu_to_le32(tcon->ses->server->vals->req_capabilities);
-       memcpy(vneg_inbuf.Guid, tcon->ses->server->client_guid,
+       memcpy(pneg_inbuf->Guid, tcon->ses->server->client_guid,
                                        SMB2_CLIENT_GUID_SIZE);
 
        if (tcon->ses->sign)
-               vneg_inbuf.SecurityMode =
+               pneg_inbuf->SecurityMode =
                        cpu_to_le16(SMB2_NEGOTIATE_SIGNING_REQUIRED);
        else if (global_secflags & CIFSSEC_MAY_SIGN)
-               vneg_inbuf.SecurityMode =
+               pneg_inbuf->SecurityMode =
                        cpu_to_le16(SMB2_NEGOTIATE_SIGNING_ENABLED);
        else
-               vneg_inbuf.SecurityMode = 0;
+               pneg_inbuf->SecurityMode = 0;
 
 
        if (strcmp(tcon->ses->server->vals->version_string,
                SMB3ANY_VERSION_STRING) == 0) {
-               vneg_inbuf.Dialects[0] = cpu_to_le16(SMB30_PROT_ID);
-               vneg_inbuf.Dialects[1] = cpu_to_le16(SMB302_PROT_ID);
-               vneg_inbuf.DialectCount = cpu_to_le16(2);
+               pneg_inbuf->Dialects[0] = cpu_to_le16(SMB30_PROT_ID);
+               pneg_inbuf->Dialects[1] = cpu_to_le16(SMB302_PROT_ID);
+               pneg_inbuf->DialectCount = cpu_to_le16(2);
                /* structure is big enough for 3 dialects, sending only 2 */
-               inbuflen = sizeof(struct validate_negotiate_info_req) - 2;
+               inbuflen = sizeof(*pneg_inbuf) -
+                               sizeof(pneg_inbuf->Dialects[0]);
        } else if (strcmp(tcon->ses->server->vals->version_string,
                SMBDEFAULT_VERSION_STRING) == 0) {
-               vneg_inbuf.Dialects[0] = cpu_to_le16(SMB21_PROT_ID);
-               vneg_inbuf.Dialects[1] = cpu_to_le16(SMB30_PROT_ID);
-               vneg_inbuf.Dialects[2] = cpu_to_le16(SMB302_PROT_ID);
-               vneg_inbuf.DialectCount = cpu_to_le16(3);
+               pneg_inbuf->Dialects[0] = cpu_to_le16(SMB21_PROT_ID);
+               pneg_inbuf->Dialects[1] = cpu_to_le16(SMB30_PROT_ID);
+               pneg_inbuf->Dialects[2] = cpu_to_le16(SMB302_PROT_ID);
+               pneg_inbuf->DialectCount = cpu_to_le16(3);
                /* structure is big enough for 3 dialects */
-               inbuflen = sizeof(struct validate_negotiate_info_req);
+               inbuflen = sizeof(*pneg_inbuf);
        } else {
                /* otherwise specific dialect was requested */
-               vneg_inbuf.Dialects[0] =
+               pneg_inbuf->Dialects[0] =
                        cpu_to_le16(tcon->ses->server->vals->protocol_id);
-               vneg_inbuf.DialectCount = cpu_to_le16(1);
+               pneg_inbuf->DialectCount = cpu_to_le16(1);
                /* structure is big enough for 3 dialects, sending only 1 */
-               inbuflen = sizeof(struct validate_negotiate_info_req) - 4;
+               inbuflen = sizeof(*pneg_inbuf) -
+                               sizeof(pneg_inbuf->Dialects[0]) * 2;
        }
 
        rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID,
                FSCTL_VALIDATE_NEGOTIATE_INFO, true /* is_fsctl */,
-               (char *)&vneg_inbuf, sizeof(struct validate_negotiate_info_req),
-               (char **)&pneg_rsp, &rsplen);
+               (char *)pneg_inbuf, inbuflen, (char **)&pneg_rsp, &rsplen);
 
        if (rc != 0) {
                cifs_dbg(VFS, "validate protocol negotiate failed: %d\n", rc);
-               return -EIO;
+               rc = -EIO;
+               goto out_free_inbuf;
        }
 
-       if (rsplen != sizeof(struct validate_negotiate_info_rsp)) {
+       rc = -EIO;
+       if (rsplen != sizeof(*pneg_rsp)) {
                cifs_dbg(VFS, "invalid protocol negotiate response size: %d\n",
                         rsplen);
 
                /* relax check since Mac returns max bufsize allowed on ioctl */
-               if ((rsplen > CIFSMaxBufSize)
-                    || (rsplen < sizeof(struct validate_negotiate_info_rsp)))
-                       goto err_rsp_free;
+               if (rsplen > CIFSMaxBufSize || rsplen < sizeof(*pneg_rsp))
+                       goto out_free_rsp;
        }
 
        /* check validate negotiate info response matches what we got earlier */
@@ -837,15 +839,17 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon)
                goto vneg_out;
 
        /* validate negotiate successful */
+       rc = 0;
        cifs_dbg(FYI, "validate negotiate info successful\n");
-       kfree(pneg_rsp);
-       return 0;
+       goto out_free_rsp;
 
 vneg_out:
        cifs_dbg(VFS, "protocol revalidation - security settings mismatch\n");
-err_rsp_free:
+out_free_rsp:
        kfree(pneg_rsp);
-       return -EIO;
+out_free_inbuf:
+       kfree(pneg_inbuf);
+       return rc;
 }
 
 enum securityEnum
@@ -2590,7 +2594,7 @@ smb2_new_read_req(void **buf, unsigned int *total_len,
         * If we want to do a RDMA write, fill in and append
         * smbd_buffer_descriptor_v1 to the end of read request
         */
-       if (server->rdma && rdata &&
+       if (server->rdma && rdata && !server->sign &&
                rdata->bytes >= server->smbd_conn->rdma_readwrite_threshold) {
 
                struct smbd_buffer_descriptor_v1 *v1;
@@ -2968,7 +2972,7 @@ smb2_async_writev(struct cifs_writedata *wdata,
         * If we want to do a server RDMA read, fill in and append
         * smbd_buffer_descriptor_v1 to the end of write request
         */
-       if (server->rdma && wdata->bytes >=
+       if (server->rdma && !server->sign && wdata->bytes >=
                server->smbd_conn->rdma_readwrite_threshold) {
 
                struct smbd_buffer_descriptor_v1 *v1;
index 6093e5142b2bc3792cf40afb7f7abbc02b60bbb4..d28f358022c507cc0dc55dbebb382ed29dec708d 100644 (file)
@@ -297,7 +297,7 @@ struct smb2_encryption_neg_context {
        __le16  DataLength;
        __le32  Reserved;
        __le16  CipherCount; /* AES-128-GCM and AES-128-CCM */
-       __le16  Ciphers[2]; /* Ciphers[0] since only one used now */
+       __le16  Ciphers[1]; /* Ciphers[0] since only one used now */
 } __packed;
 
 struct smb2_negotiate_rsp {
index 5008af546dd16d6e276953b24b354cd7bb54796d..c62f7c95683c513213bc786152f78b60a2f8fbd6 100644 (file)
@@ -1028,7 +1028,7 @@ static int smbd_post_send(struct smbd_connection *info,
        for (i = 0; i < request->num_sge; i++) {
                log_rdma_send(INFO,
                        "rdma_request sge[%d] addr=%llu length=%u\n",
-                       i, request->sge[0].addr, request->sge[0].length);
+                       i, request->sge[i].addr, request->sge[i].length);
                ib_dma_sync_single_for_device(
                        info->id->device,
                        request->sge[i].addr,
@@ -2086,7 +2086,7 @@ int smbd_send(struct smbd_connection *info, struct smb_rqst *rqst)
        int start, i, j;
        int max_iov_size =
                info->max_send_size - sizeof(struct smbd_data_transfer);
-       struct kvec iov[SMBDIRECT_MAX_SGE];
+       struct kvec *iov;
        int rc;
 
        info->smbd_send_pending++;
@@ -2096,32 +2096,20 @@ int smbd_send(struct smbd_connection *info, struct smb_rqst *rqst)
        }
 
        /*
-        * This usually means a configuration error
-        * We use RDMA read/write for packet size > rdma_readwrite_threshold
-        * as long as it's properly configured we should never get into this
-        * situation
-        */
-       if (rqst->rq_nvec + rqst->rq_npages > SMBDIRECT_MAX_SGE) {
-               log_write(ERR, "maximum send segment %x exceeding %x\n",
-                        rqst->rq_nvec + rqst->rq_npages, SMBDIRECT_MAX_SGE);
-               rc = -EINVAL;
-               goto done;
-       }
-
-       /*
-        * Remove the RFC1002 length defined in MS-SMB2 section 2.1
-        * It is used only for TCP transport
+        * Skip the RFC1002 length defined in MS-SMB2 section 2.1
+        * It is used only for TCP transport in the iov[0]
         * In future we may want to add a transport layer under protocol
         * layer so this will only be issued to TCP transport
         */
-       iov[0].iov_base = (char *)rqst->rq_iov[0].iov_base + 4;
-       iov[0].iov_len = rqst->rq_iov[0].iov_len - 4;
-       buflen += iov[0].iov_len;
+
+       if (rqst->rq_iov[0].iov_len != 4) {
+               log_write(ERR, "expected the pdu length in 1st iov, but got %zu\n", rqst->rq_iov[0].iov_len);
+               return -EINVAL;
+       }
+       iov = &rqst->rq_iov[1];
 
        /* total up iov array first */
-       for (i = 1; i < rqst->rq_nvec; i++) {
-               iov[i].iov_base = rqst->rq_iov[i].iov_base;
-               iov[i].iov_len = rqst->rq_iov[i].iov_len;
+       for (i = 0; i < rqst->rq_nvec-1; i++) {
                buflen += iov[i].iov_len;
        }
 
@@ -2139,6 +2127,10 @@ int smbd_send(struct smbd_connection *info, struct smb_rqst *rqst)
                goto done;
        }
 
+       cifs_dbg(FYI, "Sending smb (RDMA): smb_len=%u\n", buflen);
+       for (i = 0; i < rqst->rq_nvec-1; i++)
+               dump_smb(iov[i].iov_base, iov[i].iov_len);
+
        remaining_data_length = buflen;
 
        log_write(INFO, "rqst->rq_nvec=%d rqst->rq_npages=%d rq_pagesz=%d "
@@ -2194,12 +2186,14 @@ int smbd_send(struct smbd_connection *info, struct smb_rqst *rqst)
                                                goto done;
                                }
                                i++;
+                               if (i == rqst->rq_nvec-1)
+                                       break;
                        }
                        start = i;
                        buflen = 0;
                } else {
                        i++;
-                       if (i == rqst->rq_nvec) {
+                       if (i == rqst->rq_nvec-1) {
                                /* send out all remaining vecs */
                                remaining_data_length -= buflen;
                                log_write(INFO,
index 8f6f25918229d9aeb680173a87cf9d684d2eb5c8..927226a2122f4dde57955fe805013d14fd6237df 100644 (file)
@@ -753,7 +753,7 @@ cifs_send_recv(const unsigned int xid, struct cifs_ses *ses,
                goto out;
 
 #ifdef CONFIG_CIFS_SMB311
-       if (ses->status == CifsNew)
+       if ((ses->status == CifsNew) || (optype & CIFS_NEG_OP))
                smb311_update_preauth_hash(ses, rqst->rq_iov+1,
                                           rqst->rq_nvec-1);
 #endif
@@ -798,7 +798,7 @@ cifs_send_recv(const unsigned int xid, struct cifs_ses *ses,
                *resp_buf_type = CIFS_SMALL_BUFFER;
 
 #ifdef CONFIG_CIFS_SMB311
-       if (ses->status == CifsNew) {
+       if ((ses->status == CifsNew) || (optype & CIFS_NEG_OP)) {
                struct kvec iov = {
                        .iov_base = buf + 4,
                        .iov_len = get_rfc1002_length(buf)
@@ -834,8 +834,11 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses,
        if (n_vec + 1 > CIFS_MAX_IOV_SIZE) {
                new_iov = kmalloc(sizeof(struct kvec) * (n_vec + 1),
                                  GFP_KERNEL);
-               if (!new_iov)
+               if (!new_iov) {
+                       /* otherwise cifs_send_recv below sets resp_buf_type */
+                       *resp_buf_type = CIFS_NO_BUFFER;
                        return -ENOMEM;
+               }
        } else
                new_iov = s_iov;
 
index a33d8fb1bf2a7fc3ac13459e526fb9ec88373db3..508b905d744d752bfa671f4a8a09ea066ad8c25a 100644 (file)
@@ -321,6 +321,7 @@ static ext4_fsblk_t ext4_valid_block_bitmap(struct super_block *sb,
        struct ext4_sb_info *sbi = EXT4_SB(sb);
        ext4_grpblk_t offset;
        ext4_grpblk_t next_zero_bit;
+       ext4_grpblk_t max_bit = EXT4_CLUSTERS_PER_GROUP(sb);
        ext4_fsblk_t blk;
        ext4_fsblk_t group_first_block;
 
@@ -338,7 +339,7 @@ static ext4_fsblk_t ext4_valid_block_bitmap(struct super_block *sb,
        /* check whether block bitmap block number is set */
        blk = ext4_block_bitmap(sb, desc);
        offset = blk - group_first_block;
-       if (offset < 0 || EXT4_B2C(sbi, offset) >= sb->s_blocksize ||
+       if (offset < 0 || EXT4_B2C(sbi, offset) >= max_bit ||
            !ext4_test_bit(EXT4_B2C(sbi, offset), bh->b_data))
                /* bad block bitmap */
                return blk;
@@ -346,7 +347,7 @@ static ext4_fsblk_t ext4_valid_block_bitmap(struct super_block *sb,
        /* check whether the inode bitmap block number is set */
        blk = ext4_inode_bitmap(sb, desc);
        offset = blk - group_first_block;
-       if (offset < 0 || EXT4_B2C(sbi, offset) >= sb->s_blocksize ||
+       if (offset < 0 || EXT4_B2C(sbi, offset) >= max_bit ||
            !ext4_test_bit(EXT4_B2C(sbi, offset), bh->b_data))
                /* bad block bitmap */
                return blk;
@@ -354,8 +355,8 @@ static ext4_fsblk_t ext4_valid_block_bitmap(struct super_block *sb,
        /* check whether the inode table block number is set */
        blk = ext4_inode_table(sb, desc);
        offset = blk - group_first_block;
-       if (offset < 0 || EXT4_B2C(sbi, offset) >= sb->s_blocksize ||
-           EXT4_B2C(sbi, offset + sbi->s_itb_per_group) >= sb->s_blocksize)
+       if (offset < 0 || EXT4_B2C(sbi, offset) >= max_bit ||
+           EXT4_B2C(sbi, offset + sbi->s_itb_per_group) >= max_bit)
                return blk;
        next_zero_bit = ext4_find_next_zero_bit(bh->b_data,
                        EXT4_B2C(sbi, offset + sbi->s_itb_per_group),
index 0a7315961bac6ebbca0c2bfe83cc3aba8fc5c807..c969275ce3ee7469167a6921e8fa22e4c91ed3a2 100644 (file)
@@ -5329,8 +5329,9 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
        stop = le32_to_cpu(extent->ee_block);
 
        /*
-        * In case of left shift, Don't start shifting extents until we make
-        * sure the hole is big enough to accommodate the shift.
+       * For left shifts, make sure the hole on the left is big enough to
+       * accommodate the shift.  For right shifts, make sure the last extent
+       * won't be shifted beyond EXT_MAX_BLOCKS.
        */
        if (SHIFT == SHIFT_LEFT) {
                path = ext4_find_extent(inode, start - 1, &path,
@@ -5350,9 +5351,14 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
 
                if ((start == ex_start && shift > ex_start) ||
                    (shift > start - ex_end)) {
-                       ext4_ext_drop_refs(path);
-                       kfree(path);
-                       return -EINVAL;
+                       ret = -EINVAL;
+                       goto out;
+               }
+       } else {
+               if (shift > EXT_MAX_BLOCKS -
+                   (stop + ext4_ext_get_actual_len(extent))) {
+                       ret = -EINVAL;
+                       goto out;
                }
        }
 
index 185f7e61f4cfe00531e71e772a519af005f6bd69..eb104e8476f040e61253a6676c93e882e5a22314 100644 (file)
@@ -5886,5 +5886,6 @@ static void __exit ext4_exit_fs(void)
 MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
 MODULE_DESCRIPTION("Fourth Extended Filesystem");
 MODULE_LICENSE("GPL");
+MODULE_SOFTDEP("pre: crc32c");
 module_init(ext4_init_fs)
 module_exit(ext4_exit_fs)
index 4b12ba70a895d8a359692f63459860f2e1dcd24c..471d863958bc26b7466f4ad5d6fcf8bb8c9d0d0e 100644 (file)
@@ -745,11 +745,12 @@ int inode_congested(struct inode *inode, int cong_bits)
         */
        if (inode && inode_to_wb_is_valid(inode)) {
                struct bdi_writeback *wb;
-               bool locked, congested;
+               struct wb_lock_cookie lock_cookie = {};
+               bool congested;
 
-               wb = unlocked_inode_to_wb_begin(inode, &locked);
+               wb = unlocked_inode_to_wb_begin(inode, &lock_cookie);
                congested = wb_congested(wb, cong_bits);
-               unlocked_inode_to_wb_end(inode, locked);
+               unlocked_inode_to_wb_end(inode, &lock_cookie);
                return congested;
        }
 
@@ -1960,7 +1961,7 @@ void wb_workfn(struct work_struct *work)
        }
 
        if (!list_empty(&wb->work_list))
-               mod_delayed_work(bdi_wq, &wb->dwork, 0);
+               wb_wakeup(wb);
        else if (wb_has_dirty_io(wb) && dirty_writeback_interval)
                wb_wakeup_delayed(wb);
 
index 513c357c734b0d838f331bebc71b1deeb9f8f57f..a6c0f54c48c30f25315865f8080d412fb428c00a 100644 (file)
@@ -588,6 +588,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
        return 0;
 
 out_put_hidden_dir:
+       cancel_delayed_work_sync(&sbi->sync_work);
        iput(sbi->hidden_dir);
 out_put_root:
        dput(sb->s_root);
index ac311037d7a59b03d6403a65c56c7978697463c6..8aa453784402bbed8d33aa76d025071bf6523c75 100644 (file)
@@ -532,6 +532,7 @@ int jbd2_journal_start_reserved(handle_t *handle, unsigned int type,
         */
        ret = start_this_handle(journal, handle, GFP_NOFS);
        if (ret < 0) {
+               handle->h_journal = journal;
                jbd2_journal_free_reserved(handle);
                return ret;
        }
index 01c6b3894406e0b9c51ede0086ca7633dd1f5044..7869622af22a2cd2ea0dfd164b1a5b3fba31cd25 100644 (file)
@@ -4250,10 +4250,11 @@ static int __ocfs2_reflink(struct dentry *old_dentry,
 static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir,
                         struct dentry *new_dentry, bool preserve)
 {
-       int error;
+       int error, had_lock;
        struct inode *inode = d_inode(old_dentry);
        struct buffer_head *old_bh = NULL;
        struct inode *new_orphan_inode = NULL;
+       struct ocfs2_lock_holder oh;
 
        if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb)))
                return -EOPNOTSUPP;
@@ -4295,6 +4296,14 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir,
                goto out;
        }
 
+       had_lock = ocfs2_inode_lock_tracker(new_orphan_inode, NULL, 1,
+                                           &oh);
+       if (had_lock < 0) {
+               error = had_lock;
+               mlog_errno(error);
+               goto out;
+       }
+
        /* If the security isn't preserved, we need to re-initialize them. */
        if (!preserve) {
                error = ocfs2_init_security_and_acl(dir, new_orphan_inode,
@@ -4302,14 +4311,15 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir,
                if (error)
                        mlog_errno(error);
        }
-out:
        if (!error) {
                error = ocfs2_mv_orphaned_inode_to_new(dir, new_orphan_inode,
                                                       new_dentry);
                if (error)
                        mlog_errno(error);
        }
+       ocfs2_inode_unlock_tracker(new_orphan_inode, 1, &oh, had_lock);
 
+out:
        if (new_orphan_inode) {
                /*
                 * We need to open_unlock the inode no matter whether we
index 1ade1206bb896f50ad326d281c42ec56e66d8372..0eaeb41453f566f600e6f97d9d1c69faacd89d1e 100644 (file)
@@ -43,6 +43,21 @@ config PROC_VMCORE
         help
         Exports the dump image of crashed kernel in ELF format.
 
+config PROC_VMCORE_DEVICE_DUMP
+       bool "Device Hardware/Firmware Log Collection"
+       depends on PROC_VMCORE
+       default n
+       help
+         After kernel panic, device drivers can collect the device
+         specific snapshot of their hardware or firmware before the
+         underlying devices are initialized in crash recovery kernel.
+         Note that the device driver must be present in the crash
+         recovery kernel's initramfs to collect its underlying device
+         snapshot.
+
+         If you say Y here, the collected device dumps will be added
+         as ELF notes to /proc/vmcore.
+
 config PROC_SYSCTL
        bool "Sysctl support (/proc/sys)" if EXPERT
        depends on PROC_FS
index eafa39a3a88cb479eaca9d164e05cc4dbca1db70..1a76d751cf3c24c2f4271b1a855ff5d7bca9ee85 100644 (file)
@@ -261,7 +261,7 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf,
         * Inherently racy -- command line shares address space
         * with code and data.
         */
-       rv = access_remote_vm(mm, arg_end - 1, &c, 1, 0);
+       rv = access_remote_vm(mm, arg_end - 1, &c, 1, FOLL_ANON);
        if (rv <= 0)
                goto out_free_page;
 
@@ -279,7 +279,7 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf,
                        int nr_read;
 
                        _count = min3(count, len, PAGE_SIZE);
-                       nr_read = access_remote_vm(mm, p, page, _count, 0);
+                       nr_read = access_remote_vm(mm, p, page, _count, FOLL_ANON);
                        if (nr_read < 0)
                                rv = nr_read;
                        if (nr_read <= 0)
@@ -325,7 +325,7 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf,
                                bool final;
 
                                _count = min3(count, len, PAGE_SIZE);
-                               nr_read = access_remote_vm(mm, p, page, _count, 0);
+                               nr_read = access_remote_vm(mm, p, page, _count, FOLL_ANON);
                                if (nr_read < 0)
                                        rv = nr_read;
                                if (nr_read <= 0)
@@ -946,7 +946,7 @@ static ssize_t environ_read(struct file *file, char __user *buf,
                max_len = min_t(size_t, PAGE_SIZE, count);
                this_len = min(max_len, this_len);
 
-               retval = access_remote_vm(mm, (env_start + src), page, this_len, 0);
+               retval = access_remote_vm(mm, (env_start + src), page, this_len, FOLL_ANON);
 
                if (retval <= 0) {
                        ret = retval;
@@ -1693,6 +1693,12 @@ void task_dump_owner(struct task_struct *task, umode_t mode,
        kuid_t uid;
        kgid_t gid;
 
+       if (unlikely(task->flags & PF_KTHREAD)) {
+               *ruid = GLOBAL_ROOT_UID;
+               *rgid = GLOBAL_ROOT_GID;
+               return;
+       }
+
        /* Default to the tasks effective ownership */
        rcu_read_lock();
        cred = __task_cred(task);
index d1e82761de813abb95af0faac99194dba5821538..e64ecb9f272090bf6b23772a0d36e56b63b8106a 100644 (file)
@@ -209,25 +209,34 @@ kclist_add_private(unsigned long pfn, unsigned long nr_pages, void *arg)
 {
        struct list_head *head = (struct list_head *)arg;
        struct kcore_list *ent;
+       struct page *p;
+
+       if (!pfn_valid(pfn))
+               return 1;
+
+       p = pfn_to_page(pfn);
+       if (!memmap_valid_within(pfn, p, page_zone(p)))
+               return 1;
 
        ent = kmalloc(sizeof(*ent), GFP_KERNEL);
        if (!ent)
                return -ENOMEM;
-       ent->addr = (unsigned long)__va((pfn << PAGE_SHIFT));
+       ent->addr = (unsigned long)page_to_virt(p);
        ent->size = nr_pages << PAGE_SHIFT;
 
-       /* Sanity check: Can happen in 32bit arch...maybe */
-       if (ent->addr < (unsigned long) __va(0))
+       if (!virt_addr_valid(ent->addr))
                goto free_out;
 
        /* cut not-mapped area. ....from ppc-32 code. */
        if (ULONG_MAX - ent->addr < ent->size)
                ent->size = ULONG_MAX - ent->addr;
 
-       /* cut when vmalloc() area is higher than direct-map area */
-       if (VMALLOC_START > (unsigned long)__va(0)) {
-               if (ent->addr > VMALLOC_START)
-                       goto free_out;
+       /*
+        * We've already checked virt_addr_valid so we know this address
+        * is a valid pointer, therefore we can check against it to determine
+        * if we need to trim
+        */
+       if (VMALLOC_START > ent->addr) {
                if (VMALLOC_START - ent->addr < ent->size)
                        ent->size = VMALLOC_START - ent->addr;
        }
index a000d7547479e8849acbea6dea406dc5b2bb7c46..b572cc865b928635c413f0d14d68b1ed46d5c7bf 100644 (file)
@@ -24,7 +24,7 @@ static int loadavg_proc_show(struct seq_file *m, void *v)
                LOAD_INT(avnrun[1]), LOAD_FRAC(avnrun[1]),
                LOAD_INT(avnrun[2]), LOAD_FRAC(avnrun[2]),
                nr_running(), nr_threads,
-               idr_get_cursor(&task_active_pid_ns(current)->idr));
+               idr_get_cursor(&task_active_pid_ns(current)->idr) - 1);
        return 0;
 }
 
index 65ae54659833888142d376835d9b4a8a70c7218e..c486ad4b43f00857a254cfcedeebfa2884162b52 100644 (file)
@@ -1310,9 +1310,11 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
 #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
                else if (is_swap_pmd(pmd)) {
                        swp_entry_t entry = pmd_to_swp_entry(pmd);
+                       unsigned long offset = swp_offset(entry);
 
+                       offset += (addr & ~PMD_MASK) >> PAGE_SHIFT;
                        frame = swp_type(entry) |
-                               (swp_offset(entry) << MAX_SWAPFILES_SHIFT);
+                               (offset << MAX_SWAPFILES_SHIFT);
                        flags |= PM_SWAP;
                        if (pmd_swp_soft_dirty(pmd))
                                flags |= PM_SOFT_DIRTY;
@@ -1332,6 +1334,8 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
                                break;
                        if (pm->show_pfn && (flags & PM_PRESENT))
                                frame++;
+                       else if (flags & PM_SWAP)
+                               frame += (1 << MAX_SWAPFILES_SHIFT);
                }
                spin_unlock(ptl);
                return err;
index a45f0af22a60ec20d0bd354a8b0684f2d3296a51..cfb6674331fded9083883df0d02a7e7f45c40d55 100644 (file)
@@ -20,6 +20,7 @@
 #include <linux/init.h>
 #include <linux/crash_dump.h>
 #include <linux/list.h>
+#include <linux/mutex.h>
 #include <linux/vmalloc.h>
 #include <linux/pagemap.h>
 #include <linux/uaccess.h>
@@ -38,12 +39,23 @@ static size_t elfcorebuf_sz_orig;
 
 static char *elfnotes_buf;
 static size_t elfnotes_sz;
+/* Size of all notes minus the device dump notes */
+static size_t elfnotes_orig_sz;
 
 /* Total size of vmcore file. */
 static u64 vmcore_size;
 
 static struct proc_dir_entry *proc_vmcore;
 
+#ifdef CONFIG_PROC_VMCORE_DEVICE_DUMP
+/* Device Dump list and mutex to synchronize access to list */
+static LIST_HEAD(vmcoredd_list);
+static DEFINE_MUTEX(vmcoredd_mutex);
+#endif /* CONFIG_PROC_VMCORE_DEVICE_DUMP */
+
+/* Device Dump Size */
+static size_t vmcoredd_orig_sz;
+
 /*
  * Returns > 0 for RAM pages, 0 for non-RAM pages, < 0 on error
  * The called function has to take care of module refcounting.
@@ -178,6 +190,77 @@ static int copy_to(void *target, void *src, size_t size, int userbuf)
        return 0;
 }
 
+#ifdef CONFIG_PROC_VMCORE_DEVICE_DUMP
+static int vmcoredd_copy_dumps(void *dst, u64 start, size_t size, int userbuf)
+{
+       struct vmcoredd_node *dump;
+       u64 offset = 0;
+       int ret = 0;
+       size_t tsz;
+       char *buf;
+
+       mutex_lock(&vmcoredd_mutex);
+       list_for_each_entry(dump, &vmcoredd_list, list) {
+               if (start < offset + dump->size) {
+                       tsz = min(offset + (u64)dump->size - start, (u64)size);
+                       buf = dump->buf + start - offset;
+                       if (copy_to(dst, buf, tsz, userbuf)) {
+                               ret = -EFAULT;
+                               goto out_unlock;
+                       }
+
+                       size -= tsz;
+                       start += tsz;
+                       dst += tsz;
+
+                       /* Leave now if buffer filled already */
+                       if (!size)
+                               goto out_unlock;
+               }
+               offset += dump->size;
+       }
+
+out_unlock:
+       mutex_unlock(&vmcoredd_mutex);
+       return ret;
+}
+
+static int vmcoredd_mmap_dumps(struct vm_area_struct *vma, unsigned long dst,
+                              u64 start, size_t size)
+{
+       struct vmcoredd_node *dump;
+       u64 offset = 0;
+       int ret = 0;
+       size_t tsz;
+       char *buf;
+
+       mutex_lock(&vmcoredd_mutex);
+       list_for_each_entry(dump, &vmcoredd_list, list) {
+               if (start < offset + dump->size) {
+                       tsz = min(offset + (u64)dump->size - start, (u64)size);
+                       buf = dump->buf + start - offset;
+                       if (remap_vmalloc_range_partial(vma, dst, buf, tsz)) {
+                               ret = -EFAULT;
+                               goto out_unlock;
+                       }
+
+                       size -= tsz;
+                       start += tsz;
+                       dst += tsz;
+
+                       /* Leave now if buffer filled already */
+                       if (!size)
+                               goto out_unlock;
+               }
+               offset += dump->size;
+       }
+
+out_unlock:
+       mutex_unlock(&vmcoredd_mutex);
+       return ret;
+}
+#endif /* CONFIG_PROC_VMCORE_DEVICE_DUMP */
+
 /* Read from the ELF header and then the crash dump. On error, negative value is
  * returned otherwise number of bytes read are returned.
  */
@@ -215,10 +298,41 @@ static ssize_t __read_vmcore(char *buffer, size_t buflen, loff_t *fpos,
        if (*fpos < elfcorebuf_sz + elfnotes_sz) {
                void *kaddr;
 
+               /* We add device dumps before other elf notes because the
+                * other elf notes may not fill the elf notes buffer
+                * completely and we will end up with zero-filled data
+                * between the elf notes and the device dumps. Tools will
+                * then try to decode this zero-filled data as valid notes
+                * and we don't want that. Hence, adding device dumps before
+                * the other elf notes ensure that zero-filled data can be
+                * avoided.
+                */
+#ifdef CONFIG_PROC_VMCORE_DEVICE_DUMP
+               /* Read device dumps */
+               if (*fpos < elfcorebuf_sz + vmcoredd_orig_sz) {
+                       tsz = min(elfcorebuf_sz + vmcoredd_orig_sz -
+                                 (size_t)*fpos, buflen);
+                       start = *fpos - elfcorebuf_sz;
+                       if (vmcoredd_copy_dumps(buffer, start, tsz, userbuf))
+                               return -EFAULT;
+
+                       buflen -= tsz;
+                       *fpos += tsz;
+                       buffer += tsz;
+                       acc += tsz;
+
+                       /* leave now if filled buffer already */
+                       if (!buflen)
+                               return acc;
+               }
+#endif /* CONFIG_PROC_VMCORE_DEVICE_DUMP */
+
+               /* Read remaining elf notes */
                tsz = min(elfcorebuf_sz + elfnotes_sz - (size_t)*fpos, buflen);
-               kaddr = elfnotes_buf + *fpos - elfcorebuf_sz;
+               kaddr = elfnotes_buf + *fpos - elfcorebuf_sz - vmcoredd_orig_sz;
                if (copy_to(buffer, kaddr, tsz, userbuf))
                        return -EFAULT;
+
                buflen -= tsz;
                *fpos += tsz;
                buffer += tsz;
@@ -302,10 +416,8 @@ static const struct vm_operations_struct vmcore_mmap_ops = {
 };
 
 /**
- * alloc_elfnotes_buf - allocate buffer for ELF note segment in
- *                      vmalloc memory
- *
- * @notes_sz: size of buffer
+ * vmcore_alloc_buf - allocate buffer in vmalloc memory
+ * @sizez: size of buffer
  *
  * If CONFIG_MMU is defined, use vmalloc_user() to allow users to mmap
  * the buffer to user-space by means of remap_vmalloc_range().
@@ -313,12 +425,12 @@ static const struct vm_operations_struct vmcore_mmap_ops = {
  * If CONFIG_MMU is not defined, use vzalloc() since mmap_vmcore() is
  * disabled and there's no need to allow users to mmap the buffer.
  */
-static inline char *alloc_elfnotes_buf(size_t notes_sz)
+static inline char *vmcore_alloc_buf(size_t size)
 {
 #ifdef CONFIG_MMU
-       return vmalloc_user(notes_sz);
+       return vmalloc_user(size);
 #else
-       return vzalloc(notes_sz);
+       return vzalloc(size);
 #endif
 }
 
@@ -446,11 +558,46 @@ static int mmap_vmcore(struct file *file, struct vm_area_struct *vma)
        if (start < elfcorebuf_sz + elfnotes_sz) {
                void *kaddr;
 
+               /* We add device dumps before other elf notes because the
+                * other elf notes may not fill the elf notes buffer
+                * completely and we will end up with zero-filled data
+                * between the elf notes and the device dumps. Tools will
+                * then try to decode this zero-filled data as valid notes
+                * and we don't want that. Hence, adding device dumps before
+                * the other elf notes ensure that zero-filled data can be
+                * avoided. This also ensures that the device dumps and
+                * other elf notes can be properly mmaped at page aligned
+                * address.
+                */
+#ifdef CONFIG_PROC_VMCORE_DEVICE_DUMP
+               /* Read device dumps */
+               if (start < elfcorebuf_sz + vmcoredd_orig_sz) {
+                       u64 start_off;
+
+                       tsz = min(elfcorebuf_sz + vmcoredd_orig_sz -
+                                 (size_t)start, size);
+                       start_off = start - elfcorebuf_sz;
+                       if (vmcoredd_mmap_dumps(vma, vma->vm_start + len,
+                                               start_off, tsz))
+                               goto fail;
+
+                       size -= tsz;
+                       start += tsz;
+                       len += tsz;
+
+                       /* leave now if filled buffer already */
+                       if (!size)
+                               return 0;
+               }
+#endif /* CONFIG_PROC_VMCORE_DEVICE_DUMP */
+
+               /* Read remaining elf notes */
                tsz = min(elfcorebuf_sz + elfnotes_sz - (size_t)start, size);
-               kaddr = elfnotes_buf + start - elfcorebuf_sz;
+               kaddr = elfnotes_buf + start - elfcorebuf_sz - vmcoredd_orig_sz;
                if (remap_vmalloc_range_partial(vma, vma->vm_start + len,
                                                kaddr, tsz))
                        goto fail;
+
                size -= tsz;
                start += tsz;
                len += tsz;
@@ -502,8 +649,8 @@ static struct vmcore* __init get_new_element(void)
        return kzalloc(sizeof(struct vmcore), GFP_KERNEL);
 }
 
-static u64 __init get_vmcore_size(size_t elfsz, size_t elfnotesegsz,
-                                 struct list_head *vc_list)
+static u64 get_vmcore_size(size_t elfsz, size_t elfnotesegsz,
+                          struct list_head *vc_list)
 {
        u64 size;
        struct vmcore *m;
@@ -665,7 +812,7 @@ static int __init merge_note_headers_elf64(char *elfptr, size_t *elfsz,
                return rc;
 
        *notes_sz = roundup(phdr_sz, PAGE_SIZE);
-       *notes_buf = alloc_elfnotes_buf(*notes_sz);
+       *notes_buf = vmcore_alloc_buf(*notes_sz);
        if (!*notes_buf)
                return -ENOMEM;
 
@@ -698,6 +845,11 @@ static int __init merge_note_headers_elf64(char *elfptr, size_t *elfsz,
        /* Modify e_phnum to reflect merged headers. */
        ehdr_ptr->e_phnum = ehdr_ptr->e_phnum - nr_ptnote + 1;
 
+       /* Store the size of all notes.  We need this to update the note
+        * header when the device dumps will be added.
+        */
+       elfnotes_orig_sz = phdr.p_memsz;
+
        return 0;
 }
 
@@ -851,7 +1003,7 @@ static int __init merge_note_headers_elf32(char *elfptr, size_t *elfsz,
                return rc;
 
        *notes_sz = roundup(phdr_sz, PAGE_SIZE);
-       *notes_buf = alloc_elfnotes_buf(*notes_sz);
+       *notes_buf = vmcore_alloc_buf(*notes_sz);
        if (!*notes_buf)
                return -ENOMEM;
 
@@ -884,6 +1036,11 @@ static int __init merge_note_headers_elf32(char *elfptr, size_t *elfsz,
        /* Modify e_phnum to reflect merged headers. */
        ehdr_ptr->e_phnum = ehdr_ptr->e_phnum - nr_ptnote + 1;
 
+       /* Store the size of all notes.  We need this to update the note
+        * header when the device dumps will be added.
+        */
+       elfnotes_orig_sz = phdr.p_memsz;
+
        return 0;
 }
 
@@ -976,8 +1133,8 @@ static int __init process_ptload_program_headers_elf32(char *elfptr,
 }
 
 /* Sets offset fields of vmcore elements. */
-static void __init set_vmcore_list_offsets(size_t elfsz, size_t elfnotes_sz,
-                                          struct list_head *vc_list)
+static void set_vmcore_list_offsets(size_t elfsz, size_t elfnotes_sz,
+                                   struct list_head *vc_list)
 {
        loff_t vmcore_off;
        struct vmcore *m;
@@ -1145,6 +1302,202 @@ static int __init parse_crash_elf_headers(void)
        return 0;
 }
 
+#ifdef CONFIG_PROC_VMCORE_DEVICE_DUMP
+/**
+ * vmcoredd_write_header - Write vmcore device dump header at the
+ * beginning of the dump's buffer.
+ * @buf: Output buffer where the note is written
+ * @data: Dump info
+ * @size: Size of the dump
+ *
+ * Fills beginning of the dump's buffer with vmcore device dump header.
+ */
+static void vmcoredd_write_header(void *buf, struct vmcoredd_data *data,
+                                 u32 size)
+{
+       struct vmcoredd_header *vdd_hdr = (struct vmcoredd_header *)buf;
+
+       vdd_hdr->n_namesz = sizeof(vdd_hdr->name);
+       vdd_hdr->n_descsz = size + sizeof(vdd_hdr->dump_name);
+       vdd_hdr->n_type = NT_VMCOREDD;
+
+       strncpy((char *)vdd_hdr->name, VMCOREDD_NOTE_NAME,
+               sizeof(vdd_hdr->name));
+       memcpy(vdd_hdr->dump_name, data->dump_name, sizeof(vdd_hdr->dump_name));
+}
+
+/**
+ * vmcoredd_update_program_headers - Update all Elf program headers
+ * @elfptr: Pointer to elf header
+ * @elfnotesz: Size of elf notes aligned to page size
+ * @vmcoreddsz: Size of device dumps to be added to elf note header
+ *
+ * Determine type of Elf header (Elf64 or Elf32) and update the elf note size.
+ * Also update the offsets of all the program headers after the elf note header.
+ */
+static void vmcoredd_update_program_headers(char *elfptr, size_t elfnotesz,
+                                           size_t vmcoreddsz)
+{
+       unsigned char *e_ident = (unsigned char *)elfptr;
+       u64 start, end, size;
+       loff_t vmcore_off;
+       u32 i;
+
+       vmcore_off = elfcorebuf_sz + elfnotesz;
+
+       if (e_ident[EI_CLASS] == ELFCLASS64) {
+               Elf64_Ehdr *ehdr = (Elf64_Ehdr *)elfptr;
+               Elf64_Phdr *phdr = (Elf64_Phdr *)(elfptr + sizeof(Elf64_Ehdr));
+
+               /* Update all program headers */
+               for (i = 0; i < ehdr->e_phnum; i++, phdr++) {
+                       if (phdr->p_type == PT_NOTE) {
+                               /* Update note size */
+                               phdr->p_memsz = elfnotes_orig_sz + vmcoreddsz;
+                               phdr->p_filesz = phdr->p_memsz;
+                               continue;
+                       }
+
+                       start = rounddown(phdr->p_offset, PAGE_SIZE);
+                       end = roundup(phdr->p_offset + phdr->p_memsz,
+                                     PAGE_SIZE);
+                       size = end - start;
+                       phdr->p_offset = vmcore_off + (phdr->p_offset - start);
+                       vmcore_off += size;
+               }
+       } else {
+               Elf32_Ehdr *ehdr = (Elf32_Ehdr *)elfptr;
+               Elf32_Phdr *phdr = (Elf32_Phdr *)(elfptr + sizeof(Elf32_Ehdr));
+
+               /* Update all program headers */
+               for (i = 0; i < ehdr->e_phnum; i++, phdr++) {
+                       if (phdr->p_type == PT_NOTE) {
+                               /* Update note size */
+                               phdr->p_memsz = elfnotes_orig_sz + vmcoreddsz;
+                               phdr->p_filesz = phdr->p_memsz;
+                               continue;
+                       }
+
+                       start = rounddown(phdr->p_offset, PAGE_SIZE);
+                       end = roundup(phdr->p_offset + phdr->p_memsz,
+                                     PAGE_SIZE);
+                       size = end - start;
+                       phdr->p_offset = vmcore_off + (phdr->p_offset - start);
+                       vmcore_off += size;
+               }
+       }
+}
+
+/**
+ * vmcoredd_update_size - Update the total size of the device dumps and update
+ * Elf header
+ * @dump_size: Size of the current device dump to be added to total size
+ *
+ * Update the total size of all the device dumps and update the Elf program
+ * headers. Calculate the new offsets for the vmcore list and update the
+ * total vmcore size.
+ */
+static void vmcoredd_update_size(size_t dump_size)
+{
+       vmcoredd_orig_sz += dump_size;
+       elfnotes_sz = roundup(elfnotes_orig_sz, PAGE_SIZE) + vmcoredd_orig_sz;
+       vmcoredd_update_program_headers(elfcorebuf, elfnotes_sz,
+                                       vmcoredd_orig_sz);
+
+       /* Update vmcore list offsets */
+       set_vmcore_list_offsets(elfcorebuf_sz, elfnotes_sz, &vmcore_list);
+
+       vmcore_size = get_vmcore_size(elfcorebuf_sz, elfnotes_sz,
+                                     &vmcore_list);
+       proc_vmcore->size = vmcore_size;
+}
+
+/**
+ * vmcore_add_device_dump - Add a buffer containing device dump to vmcore
+ * @data: dump info.
+ *
+ * Allocate a buffer and invoke the calling driver's dump collect routine.
+ * Write Elf note at the beginning of the buffer to indicate vmcore device
+ * dump and add the dump to global list.
+ */
+int vmcore_add_device_dump(struct vmcoredd_data *data)
+{
+       struct vmcoredd_node *dump;
+       void *buf = NULL;
+       size_t data_size;
+       int ret;
+
+       if (!data || !strlen(data->dump_name) ||
+           !data->vmcoredd_callback || !data->size)
+               return -EINVAL;
+
+       dump = vzalloc(sizeof(*dump));
+       if (!dump) {
+               ret = -ENOMEM;
+               goto out_err;
+       }
+
+       /* Keep size of the buffer page aligned so that it can be mmaped */
+       data_size = roundup(sizeof(struct vmcoredd_header) + data->size,
+                           PAGE_SIZE);
+
+       /* Allocate buffer for driver's to write their dumps */
+       buf = vmcore_alloc_buf(data_size);
+       if (!buf) {
+               ret = -ENOMEM;
+               goto out_err;
+       }
+
+       vmcoredd_write_header(buf, data, data_size -
+                             sizeof(struct vmcoredd_header));
+
+       /* Invoke the driver's dump collection routing */
+       ret = data->vmcoredd_callback(data, buf +
+                                     sizeof(struct vmcoredd_header));
+       if (ret)
+               goto out_err;
+
+       dump->buf = buf;
+       dump->size = data_size;
+
+       /* Add the dump to driver sysfs list */
+       mutex_lock(&vmcoredd_mutex);
+       list_add_tail(&dump->list, &vmcoredd_list);
+       mutex_unlock(&vmcoredd_mutex);
+
+       vmcoredd_update_size(data_size);
+       return 0;
+
+out_err:
+       if (buf)
+               vfree(buf);
+
+       if (dump)
+               vfree(dump);
+
+       return ret;
+}
+EXPORT_SYMBOL(vmcore_add_device_dump);
+#endif /* CONFIG_PROC_VMCORE_DEVICE_DUMP */
+
+/* Free all dumps in vmcore device dump list */
+static void vmcore_free_device_dumps(void)
+{
+#ifdef CONFIG_PROC_VMCORE_DEVICE_DUMP
+       mutex_lock(&vmcoredd_mutex);
+       while (!list_empty(&vmcoredd_list)) {
+               struct vmcoredd_node *dump;
+
+               dump = list_first_entry(&vmcoredd_list, struct vmcoredd_node,
+                                       list);
+               list_del(&dump->list);
+               vfree(dump->buf);
+               vfree(dump);
+       }
+       mutex_unlock(&vmcoredd_mutex);
+#endif /* CONFIG_PROC_VMCORE_DEVICE_DUMP */
+}
+
 /* Init function for vmcore module. */
 static int __init vmcore_init(void)
 {
@@ -1192,4 +1545,7 @@ void vmcore_cleanup(void)
                kfree(m);
        }
        free_elfcorebuf();
+
+       /* clear vmcore device dump list */
+       vmcore_free_device_dumps();
 }
index ce4a34a2751d6ebe3ee0806d88ff58adf1023061..35a124400d60a11a468ec298335835a2666f087c 100644 (file)
@@ -511,7 +511,14 @@ xfs_attr_shortform_addname(xfs_da_args_t *args)
                if (args->flags & ATTR_CREATE)
                        return retval;
                retval = xfs_attr_shortform_remove(args);
-               ASSERT(retval == 0);
+               if (retval)
+                       return retval;
+               /*
+                * Since we have removed the old attr, clear ATTR_REPLACE so
+                * that the leaf format add routine won't trip over the attr
+                * not being around.
+                */
+               args->flags &= ~ATTR_REPLACE;
        }
 
        if (args->namelen >= XFS_ATTR_SF_ENTSIZE_MAX ||
index 6a7c2f03ea11a123bcd04fbc70be96cbc897382f..040eeda8426ff7d996b87f144282303b26cc39b0 100644 (file)
@@ -725,12 +725,16 @@ xfs_bmap_extents_to_btree(
        *logflagsp = 0;
        if ((error = xfs_alloc_vextent(&args))) {
                xfs_iroot_realloc(ip, -1, whichfork);
+               ASSERT(ifp->if_broot == NULL);
+               XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS);
                xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
                return error;
        }
 
        if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) {
                xfs_iroot_realloc(ip, -1, whichfork);
+               ASSERT(ifp->if_broot == NULL);
+               XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS);
                xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
                return -ENOSPC;
        }
index ef68b1de006a7ecedd7eabe41551165b0d3beffb..1201107eabc632c91894f52590609045a104eabf 100644 (file)
@@ -466,6 +466,8 @@ xfs_dinode_verify(
                                return __this_address;
                        if (di_size > XFS_DFORK_DSIZE(dip, mp))
                                return __this_address;
+                       if (dip->di_nextents)
+                               return __this_address;
                        /* fall through */
                case XFS_DINODE_FMT_EXTENTS:
                case XFS_DINODE_FMT_BTREE:
@@ -484,12 +486,31 @@ xfs_dinode_verify(
        if (XFS_DFORK_Q(dip)) {
                switch (dip->di_aformat) {
                case XFS_DINODE_FMT_LOCAL:
+                       if (dip->di_anextents)
+                               return __this_address;
+               /* fall through */
                case XFS_DINODE_FMT_EXTENTS:
                case XFS_DINODE_FMT_BTREE:
                        break;
                default:
                        return __this_address;
                }
+       } else {
+               /*
+                * If there is no fork offset, this may be a freshly-made inode
+                * in a new disk cluster, in which case di_aformat is zeroed.
+                * Otherwise, such an inode must be in EXTENTS format; this goes
+                * for freed inodes as well.
+                */
+               switch (dip->di_aformat) {
+               case 0:
+               case XFS_DINODE_FMT_EXTENTS:
+                       break;
+               default:
+                       return __this_address;
+               }
+               if (dip->di_anextents)
+                       return __this_address;
        }
 
        /* only version 3 or greater inodes are extensively verified here */
index 299aee4b7b0b35b14df223cc7d92efecdd026d1f..e70fb8cceceaa5d2333573e49460beba75629815 100644 (file)
@@ -778,22 +778,26 @@ xfs_file_fallocate(
                if (error)
                        goto out_unlock;
        } else if (mode & FALLOC_FL_INSERT_RANGE) {
-               unsigned int blksize_mask = i_blocksize(inode) - 1;
+               unsigned int    blksize_mask = i_blocksize(inode) - 1;
+               loff_t          isize = i_size_read(inode);
 
-               new_size = i_size_read(inode) + len;
                if (offset & blksize_mask || len & blksize_mask) {
                        error = -EINVAL;
                        goto out_unlock;
                }
 
-               /* check the new inode size does not wrap through zero */
-               if (new_size > inode->i_sb->s_maxbytes) {
+               /*
+                * New inode size must not exceed ->s_maxbytes, accounting for
+                * possible signed overflow.
+                */
+               if (inode->i_sb->s_maxbytes - isize < len) {
                        error = -EFBIG;
                        goto out_unlock;
                }
+               new_size = isize + len;
 
                /* Offset should be less than i_size */
-               if (offset >= i_size_read(inode)) {
+               if (offset >= isize) {
                        error = -EINVAL;
                        goto out_unlock;
                }
@@ -876,8 +880,18 @@ xfs_file_dedupe_range(
        struct file     *dst_file,
        u64             dst_loff)
 {
+       struct inode    *srci = file_inode(src_file);
+       u64             max_dedupe;
        int             error;
 
+       /*
+        * Since we have to read all these pages in to compare them, cut
+        * it off at MAX_RW_COUNT/2 rounded down to the nearest block.
+        * That means we won't do more than MAX_RW_COUNT IO per request.
+        */
+       max_dedupe = (MAX_RW_COUNT >> 1) & ~(i_blocksize(srci) - 1);
+       if (len > max_dedupe)
+               len = max_dedupe;
        error = xfs_reflink_remap_range(src_file, loff, dst_file, dst_loff,
                                     len, true);
        if (error)
index 278841c75b97b0f55431e75c3a172a041f43f94b..af240573e48295687e4403b0bcc11875d5da2e82 100644 (file)
 #endif
 
 #ifdef CONFIG_SERIAL_EARLYCON
-#define EARLYCON_TABLE() STRUCT_ALIGN();                       \
+#define EARLYCON_TABLE() . = ALIGN(8);                         \
                         VMLINUX_SYMBOL(__earlycon_table) = .;  \
                         KEEP(*(__earlycon_table))              \
                         VMLINUX_SYMBOL(__earlycon_table_end) = .;
index 562fa7df263704503ea14c8be2127597cb8d8589..98e63d870139db0c9956f4446d3e298eb2964e7a 100644 (file)
@@ -19,7 +19,7 @@
 #define DRM_HDCP_RI_LEN                                2
 #define DRM_HDCP_V_PRIME_PART_LEN              4
 #define DRM_HDCP_V_PRIME_NUM_PARTS             5
-#define DRM_HDCP_NUM_DOWNSTREAM(x)             (x & 0x3f)
+#define DRM_HDCP_NUM_DOWNSTREAM(x)             (x & 0x7f)
 #define DRM_HDCP_MAX_CASCADE_EXCEEDED(x)       (x & BIT(3))
 #define DRM_HDCP_MAX_DEVICE_EXCEEDED(x)                (x & BIT(7))
 
index 86e3ec662ef4d58391add4d1781b8a070cc82d50..90ec780bfc68622f264e7e7c7e1e030c3b83cfba 100644 (file)
@@ -76,7 +76,7 @@
 #define I2C6           63
 #define USART1         64
 #define RTCAPB         65
-#define TZC            66
+#define TZC1           66
 #define TZPC           67
 #define IWDG1          68
 #define BSEC           69
 #define CRC1           110
 #define USBH           111
 #define ETHSTP         112
+#define TZC2           113
 
 /* Kernel clocks */
 #define SDMMC1_K       118
 #define CK_MCO2                212
 
 /* TRACE & DEBUG clocks */
-#define DBG            213
 #define CK_DBG         214
 #define CK_TRACE       215
 
index e518e4e3dfb50a83520a3b8100c3c27899c15bdf..4b1548129fa2b3e2b87565ec02d9dd5adb80ccb2 100644 (file)
@@ -37,10 +37,15 @@ static inline int kvm_psci_version(struct kvm_vcpu *vcpu, struct kvm *kvm)
         * Our PSCI implementation stays the same across versions from
         * v0.2 onward, only adding the few mandatory functions (such
         * as FEATURES with 1.0) that are required by newer
-        * revisions. It is thus safe to return the latest.
+        * revisions. It is thus safe to return the latest, unless
+        * userspace has instructed us otherwise.
         */
-       if (test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features))
+       if (test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features)) {
+               if (vcpu->kvm->arch.psci_version)
+                       return vcpu->kvm->arch.psci_version;
+
                return KVM_ARM_PSCI_LATEST;
+       }
 
        return KVM_ARM_PSCI_0_1;
 }
@@ -48,4 +53,11 @@ static inline int kvm_psci_version(struct kvm_vcpu *vcpu, struct kvm *kvm)
 
 int kvm_hvc_call_handler(struct kvm_vcpu *vcpu);
 
+struct kvm_one_reg;
+
+int kvm_arm_get_fw_num_regs(struct kvm_vcpu *vcpu);
+int kvm_arm_copy_fw_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices);
+int kvm_arm_get_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
+int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
+
 #endif /* __KVM_ARM_PSCI_H__ */
index 24f03941ada8e2d07803765c5770d5beb9e1a7a6..e7efe12a81bdfae67d4f5709e34b14831bd594ac 100644 (file)
@@ -131,6 +131,7 @@ struct vgic_irq {
                u32 mpidr;                      /* GICv3 target VCPU */
        };
        u8 source;                      /* GICv2 SGIs only */
+       u8 active_source;               /* GICv2 SGIs only */
        u8 priority;
        enum vgic_irq_config config;    /* Level or edge */
 
index b0a7f315bfbed18dd50ef19a32d870f6a42c05b5..212b3822d1804c09d04ee0b31958db7c0394e9dd 100644 (file)
@@ -485,7 +485,7 @@ VIRTCHNL_CHECK_STRUCT_LEN(6, virtchnl_rss_key);
 struct virtchnl_rss_lut {
        u16 vsi_id;
        u16 lut_entries;
-       u8 lut[1];        /* RSS lookup table*/
+       u8 lut[1];        /* RSS lookup table */
 };
 
 VIRTCHNL_CHECK_STRUCT_LEN(6, virtchnl_rss_lut);
@@ -819,7 +819,7 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode,
                return VIRTCHNL_ERR_PARAM;
        }
        /* few more checks */
-       if ((valid_len != msglen) || (err_msg_format))
+       if (err_msg_format || valid_len != msglen)
                return VIRTCHNL_STATUS_ERR_OPCODE_MISMATCH;
 
        return 0;
index bfe86b54f6c149a6c8718f13417ecf7a796b7193..0bd432a4d7bd00ce376292720edd104d617c80c2 100644 (file)
@@ -223,6 +223,11 @@ static inline void set_bdi_congested(struct backing_dev_info *bdi, int sync)
        set_wb_congested(bdi->wb.congested, sync);
 }
 
+struct wb_lock_cookie {
+       bool locked;
+       unsigned long flags;
+};
+
 #ifdef CONFIG_CGROUP_WRITEBACK
 
 /**
index f6be4b0b6c18d2b76308f1eac441a4e6c223f8ac..72ca0f3d39f3039b0cda57c2a350c941075bdd62 100644 (file)
@@ -347,7 +347,7 @@ static inline struct bdi_writeback *inode_to_wb(const struct inode *inode)
 /**
  * unlocked_inode_to_wb_begin - begin unlocked inode wb access transaction
  * @inode: target inode
- * @lockedp: temp bool output param, to be passed to the end function
+ * @cookie: output param, to be passed to the end function
  *
  * The caller wants to access the wb associated with @inode but isn't
  * holding inode->i_lock, the i_pages lock or wb->list_lock.  This
@@ -355,12 +355,12 @@ static inline struct bdi_writeback *inode_to_wb(const struct inode *inode)
  * association doesn't change until the transaction is finished with
  * unlocked_inode_to_wb_end().
  *
- * The caller must call unlocked_inode_to_wb_end() with *@lockdep
- * afterwards and can't sleep during transaction.  IRQ may or may not be
- * disabled on return.
+ * The caller must call unlocked_inode_to_wb_end() with *@cookie afterwards and
+ * can't sleep during the transaction.  IRQs may or may not be disabled on
+ * return.
  */
 static inline struct bdi_writeback *
-unlocked_inode_to_wb_begin(struct inode *inode, bool *lockedp)
+unlocked_inode_to_wb_begin(struct inode *inode, struct wb_lock_cookie *cookie)
 {
        rcu_read_lock();
 
@@ -368,10 +368,10 @@ unlocked_inode_to_wb_begin(struct inode *inode, bool *lockedp)
         * Paired with store_release in inode_switch_wb_work_fn() and
         * ensures that we see the new wb if we see cleared I_WB_SWITCH.
         */
-       *lockedp = smp_load_acquire(&inode->i_state) & I_WB_SWITCH;
+       cookie->locked = smp_load_acquire(&inode->i_state) & I_WB_SWITCH;
 
-       if (unlikely(*lockedp))
-               xa_lock_irq(&inode->i_mapping->i_pages);
+       if (unlikely(cookie->locked))
+               xa_lock_irqsave(&inode->i_mapping->i_pages, cookie->flags);
 
        /*
         * Protected by either !I_WB_SWITCH + rcu_read_lock() or the i_pages
@@ -383,12 +383,13 @@ unlocked_inode_to_wb_begin(struct inode *inode, bool *lockedp)
 /**
  * unlocked_inode_to_wb_end - end inode wb access transaction
  * @inode: target inode
- * @locked: *@lockedp from unlocked_inode_to_wb_begin()
+ * @cookie: @cookie from unlocked_inode_to_wb_begin()
  */
-static inline void unlocked_inode_to_wb_end(struct inode *inode, bool locked)
+static inline void unlocked_inode_to_wb_end(struct inode *inode,
+                                           struct wb_lock_cookie *cookie)
 {
-       if (unlikely(locked))
-               xa_unlock_irq(&inode->i_mapping->i_pages);
+       if (unlikely(cookie->locked))
+               xa_unlock_irqrestore(&inode->i_mapping->i_pages, cookie->flags);
 
        rcu_read_unlock();
 }
@@ -435,12 +436,13 @@ static inline struct bdi_writeback *inode_to_wb(struct inode *inode)
 }
 
 static inline struct bdi_writeback *
-unlocked_inode_to_wb_begin(struct inode *inode, bool *lockedp)
+unlocked_inode_to_wb_begin(struct inode *inode, struct wb_lock_cookie *cookie)
 {
        return inode_to_wb(inode);
 }
 
-static inline void unlocked_inode_to_wb_end(struct inode *inode, bool locked)
+static inline void unlocked_inode_to_wb_end(struct inode *inode,
+                                           struct wb_lock_cookie *cookie)
 {
 }
 
index e3986f4b34615fa7c50c6d412976594ebc3a7fdf..ebc34a5686dc7cd0bfc31b7d1220724282f51b0f 100644 (file)
@@ -9,6 +9,9 @@
 struct blk_mq_tags;
 struct blk_flush_queue;
 
+/**
+ * struct blk_mq_hw_ctx - State for a hardware queue facing the hardware block device
+ */
 struct blk_mq_hw_ctx {
        struct {
                spinlock_t              lock;
index 9af3e0f430bcbedd114f40c8be05f91dfe03a2cf..5c4eee04319174a291a6e76378e18c105da87fda 100644 (file)
@@ -605,6 +605,11 @@ struct request_queue {
         * initialized by the low level device driver (e.g. scsi/sd.c).
         * Stacking drivers (device mappers) may or may not initialize
         * these fields.
+        *
+        * Reads of this information must be protected with blk_queue_enter() /
+        * blk_queue_exit(). Modifying this information is only allowed while
+        * no requests are being processed. See also blk_mq_freeze_queue() and
+        * blk_mq_unfreeze_queue().
         */
        unsigned int            nr_zones;
        unsigned long           *seq_zones_bitmap;
@@ -737,6 +742,7 @@ bool blk_queue_flag_test_and_clear(unsigned int flag, struct request_queue *q);
 #define blk_queue_quiesced(q)  test_bit(QUEUE_FLAG_QUIESCED, &(q)->queue_flags)
 #define blk_queue_preempt_only(q)                              \
        test_bit(QUEUE_FLAG_PREEMPT_ONLY, &(q)->queue_flags)
+#define blk_queue_fua(q)       test_bit(QUEUE_FLAG_FUA, &(q)->queue_flags)
 
 extern int blk_set_preempt_only(struct request_queue *q);
 extern void blk_clear_preempt_only(struct request_queue *q);
index ee5275e7d4df1d30cb55021bb5c60b8830289539..ed0122b45b633a46573cff9ad9c1ff0a7dbe7006 100644 (file)
@@ -33,6 +33,7 @@ struct bpf_map_ops {
        void (*map_release)(struct bpf_map *map, struct file *map_file);
        void (*map_free)(struct bpf_map *map);
        int (*map_get_next_key)(struct bpf_map *map, void *key, void *next_key);
+       void (*map_release_uref)(struct bpf_map *map);
 
        /* funcs callable from userspace and from eBPF programs */
        void *(*map_lookup_elem)(struct bpf_map *map, void *key);
@@ -109,6 +110,11 @@ static inline struct bpf_offloaded_map *map_to_offmap(struct bpf_map *map)
        return container_of(map, struct bpf_offloaded_map, map);
 }
 
+static inline bool bpf_map_offload_neutral(const struct bpf_map *map)
+{
+       return map->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY;
+}
+
 static inline bool bpf_map_support_seq_show(const struct bpf_map *map)
 {
        return map->ops->map_seq_show_elem && map->ops->map_check_btf;
@@ -234,6 +240,8 @@ struct bpf_verifier_ops {
                                struct bpf_insn_access_aux *info);
        int (*gen_prologue)(struct bpf_insn *insn, bool direct_write,
                            const struct bpf_prog *prog);
+       int (*gen_ld_abs)(const struct bpf_insn *orig,
+                         struct bpf_insn *insn_buf);
        u32 (*convert_ctx_access)(enum bpf_access_type type,
                                  const struct bpf_insn *src,
                                  struct bpf_insn *dst,
@@ -353,8 +361,8 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs,
 void bpf_prog_array_delete_safe(struct bpf_prog_array __rcu *progs,
                                struct bpf_prog *old_prog);
 int bpf_prog_array_copy_info(struct bpf_prog_array __rcu *array,
-                            __u32 __user *prog_ids, u32 request_cnt,
-                            __u32 __user *prog_cnt);
+                            u32 *prog_ids, u32 request_cnt,
+                            u32 *prog_cnt);
 int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
                        struct bpf_prog *exclude_prog,
                        struct bpf_prog *include_prog,
@@ -365,6 +373,7 @@ int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
                struct bpf_prog **_prog, *__prog;       \
                struct bpf_prog_array *_array;          \
                u32 _ret = 1;                           \
+               preempt_disable();                      \
                rcu_read_lock();                        \
                _array = rcu_dereference(array);        \
                if (unlikely(check_non_null && !_array))\
@@ -376,6 +385,7 @@ int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
                }                                       \
 _out:                                                  \
                rcu_read_unlock();                      \
+               preempt_enable_no_resched();            \
                _ret;                                   \
         })
 
@@ -448,7 +458,6 @@ int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value);
 int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file,
                                 void *key, void *value, u64 map_flags);
 int bpf_fd_array_map_lookup_elem(struct bpf_map *map, void *key, u32 *value);
-void bpf_fd_array_map_clear(struct bpf_map *map);
 int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file,
                                void *key, void *value, u64 map_flags);
 int bpf_fd_htab_map_lookup_elem(struct bpf_map *map, void *key, u32 *value);
@@ -618,7 +627,7 @@ bool bpf_offload_dev_match(struct bpf_prog *prog, struct bpf_map *map);
 #if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL)
 int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr);
 
-static inline bool bpf_prog_is_dev_bound(struct bpf_prog_aux *aux)
+static inline bool bpf_prog_is_dev_bound(const struct bpf_prog_aux *aux)
 {
        return aux->offload_requested;
 }
@@ -659,6 +668,7 @@ static inline void bpf_map_offload_map_free(struct bpf_map *map)
 
 #if defined(CONFIG_STREAM_PARSER) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_INET)
 struct sock  *__sock_map_lookup_elem(struct bpf_map *map, u32 key);
+struct sock  *__sock_hash_lookup_elem(struct bpf_map *map, void *key);
 int sock_map_prog(struct bpf_map *map, struct bpf_prog *prog, u32 type);
 #else
 static inline struct sock  *__sock_map_lookup_elem(struct bpf_map *map, u32 key)
@@ -666,6 +676,12 @@ static inline struct sock  *__sock_map_lookup_elem(struct bpf_map *map, u32 key)
        return NULL;
 }
 
+static inline struct sock  *__sock_hash_lookup_elem(struct bpf_map *map,
+                                                   void *key)
+{
+       return NULL;
+}
+
 static inline int sock_map_prog(struct bpf_map *map,
                                struct bpf_prog *prog,
                                u32 type)
@@ -674,6 +690,31 @@ static inline int sock_map_prog(struct bpf_map *map,
 }
 #endif
 
+#if defined(CONFIG_XDP_SOCKETS)
+struct xdp_sock;
+struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map, u32 key);
+int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp,
+                      struct xdp_sock *xs);
+void __xsk_map_flush(struct bpf_map *map);
+#else
+struct xdp_sock;
+static inline struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map,
+                                                    u32 key)
+{
+       return NULL;
+}
+
+static inline int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp,
+                                    struct xdp_sock *xs)
+{
+       return -EOPNOTSUPP;
+}
+
+static inline void __xsk_map_flush(struct bpf_map *map)
+{
+}
+#endif
+
 /* verifier prototypes for helper functions called from eBPF programs */
 extern const struct bpf_func_proto bpf_map_lookup_elem_proto;
 extern const struct bpf_func_proto bpf_map_update_elem_proto;
@@ -687,10 +728,10 @@ extern const struct bpf_func_proto bpf_ktime_get_ns_proto;
 extern const struct bpf_func_proto bpf_get_current_pid_tgid_proto;
 extern const struct bpf_func_proto bpf_get_current_uid_gid_proto;
 extern const struct bpf_func_proto bpf_get_current_comm_proto;
-extern const struct bpf_func_proto bpf_skb_vlan_push_proto;
-extern const struct bpf_func_proto bpf_skb_vlan_pop_proto;
 extern const struct bpf_func_proto bpf_get_stackid_proto;
+extern const struct bpf_func_proto bpf_get_stack_proto;
 extern const struct bpf_func_proto bpf_sock_map_update_proto;
+extern const struct bpf_func_proto bpf_sock_hash_update_proto;
 
 /* Shared helpers among cBPF and eBPF. */
 void bpf_user_rnd_init_once(void);
index e6fe98ae3794330298f834a89a7448b5fa9d730b..ddf896abcfb6ea71bd7a3e0ab298d0f3332d5b97 100644 (file)
@@ -2,7 +2,6 @@
 #ifndef __LINUX_BPF_TRACE_H__
 #define __LINUX_BPF_TRACE_H__
 
-#include <trace/events/bpf.h>
 #include <trace/events/xdp.h>
 
 #endif /* __LINUX_BPF_TRACE_H__ */
index 2b28fcf6f6ae8c151f8ae03fb505ae8c3cdfc702..b67f8793de0d6083b8908f3794560363e67659fd 100644 (file)
@@ -47,6 +47,10 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops)
 #if defined(CONFIG_STREAM_PARSER) && defined(CONFIG_INET)
 BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops)
+BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKHASH, sock_hash_ops)
 #endif
 BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops)
+#if defined(CONFIG_XDP_SOCKETS)
+BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops)
+#endif
 #endif
index 7e61c395fddffea20ed6dfc8a92ad3e2f848a20e..c286813deaebe48a8f2f92ca10a7e0655eed35b3 100644 (file)
@@ -173,6 +173,11 @@ static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log)
 
 #define BPF_MAX_SUBPROGS 256
 
+struct bpf_subprog_info {
+       u32 start; /* insn idx of function entry point */
+       u16 stack_depth; /* max. stack depth used by this function */
+};
+
 /* single container for all structs
  * one verifier_env per bpf_check() call
  */
@@ -191,14 +196,12 @@ struct bpf_verifier_env {
        bool seen_direct_write;
        struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */
        struct bpf_verifier_log log;
-       u32 subprog_starts[BPF_MAX_SUBPROGS];
-       /* computes the stack depth of each bpf function */
-       u16 subprog_stack_depth[BPF_MAX_SUBPROGS + 1];
+       struct bpf_subprog_info subprog_info[BPF_MAX_SUBPROGS + 1];
        u32 subprog_cnt;
 };
 
-void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt,
-                      va_list args);
+__printf(2, 0) void bpf_verifier_vlog(struct bpf_verifier_log *log,
+                                     const char *fmt, va_list args);
 __printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
                                           const char *fmt, ...);
 
index d3339dd48b1ab4224d512a4f3c47b85b48da51d6..b324e01ccf2da6a3a06142d7e1ae960ccb1b1e3e 100644 (file)
@@ -25,6 +25,7 @@
 #define PHY_ID_BCM54612E               0x03625e60
 #define PHY_ID_BCM54616S               0x03625d10
 #define PHY_ID_BCM57780                        0x03625d90
+#define PHY_ID_BCM89610                        0x03625cd0
 
 #define PHY_ID_BCM7250                 0xae025280
 #define PHY_ID_BCM7260                 0xae025190
index a966dc6d61ee1cb4a40e0b07b33c0d42d3a3e0b1..e076c4697049da1e62f39160ed2f1fcdf73c743c 100644 (file)
@@ -44,5 +44,7 @@ const struct btf_type *btf_type_id_size(const struct btf *btf,
                                        u32 *ret_size);
 void btf_type_seq_show(const struct btf *btf, u32 type_id, void *obj,
                       struct seq_file *m);
+int btf_get_fd_by_id(u32 id);
+u32 btf_id(const struct btf *btf);
 
 #endif
index 528ccc943cee0a5da4bc95e5d200a8591debe836..96bb3228598927c0cb089b5f4fa88da445cd1b26 100644 (file)
@@ -77,7 +77,10 @@ struct ceph_osd_data {
                        u32                     bio_length;
                };
 #endif /* CONFIG_BLOCK */
-               struct ceph_bvec_iter   bvec_pos;
+               struct {
+                       struct ceph_bvec_iter   bvec_pos;
+                       u32                     num_bvecs;
+               };
        };
 };
 
@@ -412,6 +415,10 @@ void osd_req_op_extent_osd_data_bio(struct ceph_osd_request *osd_req,
                                    struct ceph_bio_iter *bio_pos,
                                    u32 bio_length);
 #endif /* CONFIG_BLOCK */
+void osd_req_op_extent_osd_data_bvecs(struct ceph_osd_request *osd_req,
+                                     unsigned int which,
+                                     struct bio_vec *bvecs, u32 num_bvecs,
+                                     u32 bytes);
 void osd_req_op_extent_osd_data_bvec_pos(struct ceph_osd_request *osd_req,
                                         unsigned int which,
                                         struct ceph_bvec_iter *bvec_pos);
@@ -426,7 +433,8 @@ extern void osd_req_op_cls_request_data_pages(struct ceph_osd_request *,
                                        bool own_pages);
 void osd_req_op_cls_request_data_bvecs(struct ceph_osd_request *osd_req,
                                       unsigned int which,
-                                      struct bio_vec *bvecs, u32 bytes);
+                                      struct bio_vec *bvecs, u32 num_bvecs,
+                                      u32 bytes);
 extern void osd_req_op_cls_response_data_pages(struct ceph_osd_request *,
                                        unsigned int which,
                                        struct page **pages, u64 length,
index 210a890008f9e12987e578b949e6702864d076f6..1d25e149c1c5b8b439e2a301ae6c32ccd3164d2b 100644 (file)
@@ -765,6 +765,9 @@ int __clk_mux_determine_rate(struct clk_hw *hw,
 int __clk_determine_rate(struct clk_hw *core, struct clk_rate_request *req);
 int __clk_mux_determine_rate_closest(struct clk_hw *hw,
                                     struct clk_rate_request *req);
+int clk_mux_determine_rate_flags(struct clk_hw *hw,
+                                struct clk_rate_request *req,
+                                unsigned long flags);
 void clk_hw_reparent(struct clk_hw *hw, struct clk_hw *new_parent);
 void clk_hw_set_rate_range(struct clk_hw *hw, unsigned long min_rate,
                           unsigned long max_rate);
index ceb96ecab96e255be42cecc015be8c0e01b4f710..7d98e263e048d988ab77a202b8eb8e81e9194931 100644 (file)
@@ -25,6 +25,9 @@
 #define __SANITIZE_ADDRESS__
 #endif
 
+#undef __no_sanitize_address
+#define __no_sanitize_address __attribute__((no_sanitize("address")))
+
 /* Clang doesn't have a way to turn it off per-function, yet. */
 #ifdef __noretpoline
 #undef __noretpoline
index edfeaba954295a76c1c842253004404365436208..a1a959ba24ffada75e13de61aa570490a988c8d4 100644 (file)
@@ -1,18 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright(C) 2015 Linaro Limited. All rights reserved.
  * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef _LINUX_CORESIGHT_PMU_H
index f7ac2aa932699012217abc07a21798e68b6de170..3e4ba9d753c88f064e4965e2752e2b8886845a3b 100644 (file)
@@ -5,6 +5,7 @@
 #include <linux/kexec.h>
 #include <linux/proc_fs.h>
 #include <linux/elf.h>
+#include <uapi/linux/vmcore.h>
 
 #include <asm/pgtable.h> /* for pgprot_t */
 
@@ -93,4 +94,21 @@ static inline bool is_kdump_kernel(void) { return 0; }
 #endif /* CONFIG_CRASH_DUMP */
 
 extern unsigned long saved_max_pfn;
+
+/* Device Dump information to be filled by drivers */
+struct vmcoredd_data {
+       char dump_name[VMCOREDD_MAX_NAME_BYTES]; /* Unique name of the dump */
+       unsigned int size;                       /* Size of the dump */
+       /* Driver's registered callback to be invoked to collect dump */
+       int (*vmcoredd_callback)(struct vmcoredd_data *data, void *buf);
+};
+
+#ifdef CONFIG_PROC_VMCORE_DEVICE_DUMP
+int vmcore_add_device_dump(struct vmcoredd_data *data);
+#else
+static inline int vmcore_add_device_dump(struct vmcoredd_data *data)
+{
+       return -EOPNOTSUPP;
+}
+#endif /* CONFIG_PROC_VMCORE_DEVICE_DUMP */
 #endif /* LINUX_CRASHDUMP_H */
index 0059b99e1f25db0ef52d09bf50e1239e2fbf14e9..477956990f5e3dc703df3154ef06ed3a5e3264d0 100644 (file)
@@ -256,7 +256,9 @@ enum probe_type {
  *             automatically.
  * @pm:                Power management operations of the device which matched
  *             this driver.
- * @coredump:  Called through sysfs to initiate a device coredump.
+ * @coredump:  Called when sysfs entry is written to. The device driver
+ *             is expected to call the dev_coredump API resulting in a
+ *             uevent.
  * @p:         Driver core's private data, no one other than the driver
  *             core can touch this.
  *
@@ -288,7 +290,7 @@ struct device_driver {
        const struct attribute_group **groups;
 
        const struct dev_pm_ops *pm;
-       int (*coredump) (struct device *dev);
+       void (*coredump) (struct device *dev);
 
        struct driver_private *p;
 };
index f1b7d68ac4600a3df4ec3280ef2c25086dfc6670..3016d8c456bcfd4a8f2dfe139aab476e9387dc63 100644 (file)
@@ -395,8 +395,8 @@ typedef struct {
        u32 attributes;
        u32 get_bar_attributes;
        u32 set_bar_attributes;
-       uint64_t romsize;
-       void *romimage;
+       u64 romsize;
+       u32 romimage;
 } efi_pci_io_protocol_32;
 
 typedef struct {
@@ -415,8 +415,8 @@ typedef struct {
        u64 attributes;
        u64 get_bar_attributes;
        u64 set_bar_attributes;
-       uint64_t romsize;
-       void *romimage;
+       u64 romsize;
+       u64 romimage;
 } efi_pci_io_protocol_64;
 
 typedef struct {
index ebe41811ed34164a9a7000fee9a3a45f55d2480c..f8a2245b70ac354231c7911e9684f756ed47de0f 100644 (file)
@@ -310,6 +310,11 @@ bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32,
  *     fields should be ignored (use %__ETHTOOL_LINK_MODE_MASK_NBITS
  *     instead of the latter), any change to them will be overwritten
  *     by kernel. Returns a negative error code or zero.
+ * @get_fecparam: Get the network device Forward Error Correction parameters.
+ * @set_fecparam: Set the network device Forward Error Correction parameters.
+ * @get_ethtool_phy_stats: Return extended statistics about the PHY device.
+ *     This is only useful if the device maintains PHY statistics and
+ *     cannot use the standard PHY library helpers.
  *
  * All operations are optional (i.e. the function pointer may be set
  * to %NULL) and callers must take this into account.  Callers must
@@ -405,5 +410,7 @@ struct ethtool_ops {
                                      struct ethtool_fecparam *);
        int     (*set_fecparam)(struct net_device *,
                                      struct ethtool_fecparam *);
+       void    (*get_ethtool_phy_stats)(struct net_device *,
+                                        struct ethtool_stats *, u64 *);
 };
 #endif /* _LINUX_ETHTOOL_H */
index 4da8b23081748bc3644e8090a19d800710d008dd..9dbcb9d55921b6914724e90f8c99e21149112527 100644 (file)
@@ -47,7 +47,9 @@ struct xdp_buff;
 /* Additional register mappings for converted user programs. */
 #define BPF_REG_A      BPF_REG_0
 #define BPF_REG_X      BPF_REG_7
-#define BPF_REG_TMP    BPF_REG_8
+#define BPF_REG_TMP    BPF_REG_2       /* scratch reg */
+#define BPF_REG_D      BPF_REG_8       /* data, callee-saved */
+#define BPF_REG_H      BPF_REG_9       /* hlen, callee-saved */
 
 /* Kernel hidden auxiliary/helper register for hardening step.
  * Only used by eBPF JITs. It's nothing more than a temporary
@@ -468,7 +470,8 @@ struct bpf_prog {
                                dst_needed:1,   /* Do we need dst entry? */
                                blinded:1,      /* Was blinded */
                                is_func:1,      /* program is a bpf function */
-                               kprobe_override:1; /* Do we override a kprobe? */
+                               kprobe_override:1, /* Do we override a kprobe? */
+                               has_callchain_buf:1; /* callchain buffer allocated? */
        enum bpf_prog_type      type;           /* Type of BPF program */
        enum bpf_attach_type    expected_attach_type; /* For some prog types */
        u32                     len;            /* Number of filter blocks */
@@ -512,9 +515,8 @@ struct sk_msg_buff {
        int sg_end;
        struct scatterlist sg_data[MAX_SKB_FRAGS];
        bool sg_copy[MAX_SKB_FRAGS];
-       __u32 key;
        __u32 flags;
-       struct bpf_map *map;
+       struct sock *sk_redir;
        struct sk_buff *skb;
        struct list_head list;
 };
@@ -759,7 +761,7 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
  * This does not appear to be a real limitation for existing software.
  */
 int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
-                           struct bpf_prog *prog);
+                           struct xdp_buff *xdp, struct bpf_prog *prog);
 int xdp_do_redirect(struct net_device *dev,
                    struct xdp_buff *xdp,
                    struct bpf_prog *prog);
index e0c95c9f1e292275ce7556321887576bbd643399..e64c0294f50bf82dd50d447e37a5c77f86eabc26 100644 (file)
@@ -217,12 +217,10 @@ struct fsnotify_mark_connector {
        union { /* Object pointer [lock] */
                struct inode *inode;
                struct vfsmount *mnt;
-       };
-       union {
-               struct hlist_head list;
                /* Used listing heads to free after srcu period expires */
                struct fsnotify_mark_connector *destroy_next;
        };
+       struct hlist_head list;
 };
 
 /*
index c826b0b5232aff63877bb441fb62e43128b4e976..6cb8a57896682af6dd29ac67b283d147f79e71a5 100644 (file)
@@ -368,7 +368,9 @@ static inline void free_part_stats(struct hd_struct *part)
        part_stat_add(cpu, gendiskp, field, -subnd)
 
 void part_in_flight(struct request_queue *q, struct hd_struct *part,
-                       unsigned int inflight[2]);
+                   unsigned int inflight[2]);
+void part_in_flight_rw(struct request_queue *q, struct hd_struct *part,
+                      unsigned int inflight[2]);
 void part_dec_in_flight(struct request_queue *q, struct hd_struct *part,
                        int rw);
 void part_inc_in_flight(struct request_queue *q, struct hd_struct *part,
index a2656c3ebe81cafaff88dc2329897b4a22d89edd..3892e9c8b2deb7993c11c1ac171375c7ba6e9e09 100644 (file)
@@ -161,9 +161,11 @@ struct hrtimer_clock_base {
 enum  hrtimer_base_type {
        HRTIMER_BASE_MONOTONIC,
        HRTIMER_BASE_REALTIME,
+       HRTIMER_BASE_BOOTTIME,
        HRTIMER_BASE_TAI,
        HRTIMER_BASE_MONOTONIC_SOFT,
        HRTIMER_BASE_REALTIME_SOFT,
+       HRTIMER_BASE_BOOTTIME_SOFT,
        HRTIMER_BASE_TAI_SOFT,
        HRTIMER_MAX_CLOCK_BASES,
 };
index 02639ebea2f068f2e4b026610501a5a1768c58dc..585d271824250e2595f8d1c66fd1c7ac18e6fefc 100644 (file)
@@ -93,11 +93,39 @@ static inline bool br_multicast_router(const struct net_device *dev)
 
 #if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_BRIDGE_VLAN_FILTERING)
 bool br_vlan_enabled(const struct net_device *dev);
+int br_vlan_get_pvid(const struct net_device *dev, u16 *p_pvid);
+int br_vlan_get_info(const struct net_device *dev, u16 vid,
+                    struct bridge_vlan_info *p_vinfo);
 #else
 static inline bool br_vlan_enabled(const struct net_device *dev)
 {
        return false;
 }
+
+static inline int br_vlan_get_pvid(const struct net_device *dev, u16 *p_pvid)
+{
+       return -1;
+}
+
+static inline int br_vlan_get_info(const struct net_device *dev, u16 vid,
+                                  struct bridge_vlan_info *p_vinfo)
+{
+       return -1;
+}
+#endif
+
+#if IS_ENABLED(CONFIG_BRIDGE)
+struct net_device *br_fdb_find_port(const struct net_device *br_dev,
+                                   const unsigned char *addr,
+                                   __u16 vid);
+#else
+static inline struct net_device *
+br_fdb_find_port(const struct net_device *br_dev,
+                const unsigned char *addr,
+                __u16 vid)
+{
+       return NULL;
+}
 #endif
 
 #endif
index 4cb7aeeafce03df846188fb936b6bd2ee6b0417f..2e55e4cdbd8aaf2fb6f1e9220ba2a8f3d6285de5 100644 (file)
@@ -21,7 +21,7 @@ struct macvlan_dev {
        struct hlist_node       hlist;
        struct macvlan_port     *port;
        struct net_device       *lowerdev;
-       void                    *fwd_priv;
+       void                    *accel_priv;
        struct vlan_pcpu_stats __percpu *pcpu_stats;
 
        DECLARE_BITMAP(mc_filter, MACVLAN_MC_FILTER_SZ);
@@ -61,10 +61,6 @@ extern int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
                                  struct nlattr *tb[], struct nlattr *data[],
                                  struct netlink_ext_ack *extack);
 
-extern void macvlan_count_rx(const struct macvlan_dev *vlan,
-                            unsigned int len, bool success,
-                            bool multicast);
-
 extern void macvlan_dellink(struct net_device *dev, struct list_head *head);
 
 extern int macvlan_link_register(struct rtnl_link_ops *ops);
@@ -86,4 +82,27 @@ macvlan_dev_real_dev(const struct net_device *dev)
 }
 #endif
 
+static inline void *macvlan_accel_priv(struct net_device *dev)
+{
+       struct macvlan_dev *macvlan = netdev_priv(dev);
+
+       return macvlan->accel_priv;
+}
+
+static inline bool macvlan_supports_dest_filter(struct net_device *dev)
+{
+       struct macvlan_dev *macvlan = netdev_priv(dev);
+
+       return macvlan->mode == MACVLAN_MODE_PRIVATE ||
+              macvlan->mode == MACVLAN_MODE_VEPA ||
+              macvlan->mode == MACVLAN_MODE_BRIDGE;
+}
+
+static inline int macvlan_release_l2fw_offload(struct net_device *dev)
+{
+       struct macvlan_dev *macvlan = netdev_priv(dev);
+
+       macvlan->accel_priv = NULL;
+       return dev_uc_add(macvlan->lowerdev, dev->dev_addr);
+}
 #endif /* _LINUX_IF_MACVLAN_H */
index 80db19d3a5054d5c064727644af49fdf31310111..8de55e4b5ee93bf824ba2a33a6cd4664c590acd8 100644 (file)
@@ -28,6 +28,12 @@ struct vmcore {
        loff_t offset;
 };
 
+struct vmcoredd_node {
+       struct list_head list;  /* List of dumps */
+       void *buf;              /* Buffer containing device's dump */
+       unsigned int size;      /* Size of the buffer */
+};
+
 #ifdef CONFIG_PROC_KCORE
 extern void kclist_add(struct kcore_list *, void *, size_t, int type);
 #else
index c1961761311dbfd5968d6ed64ea91ca3c7d25b0e..2803264c512f8f6bf80dffc462c4a7ab079ce5f3 100644 (file)
@@ -62,6 +62,7 @@ void *kthread_probe_data(struct task_struct *k);
 int kthread_park(struct task_struct *k);
 void kthread_unpark(struct task_struct *k);
 void kthread_parkme(void);
+void kthread_park_complete(struct task_struct *k);
 
 int kthreadd(void *unused);
 extern struct task_struct *kthreadd_task;
index 6930c63126c78a9ef665b5b5653a60a8773b4d4c..6d6e79c59e68fa7fd5387f48814341082d6b8526 100644 (file)
@@ -1045,13 +1045,7 @@ static inline int mmu_notifier_retry(struct kvm *kvm, unsigned long mmu_seq)
 
 #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
 
-#ifdef CONFIG_S390
-#define KVM_MAX_IRQ_ROUTES 4096 //FIXME: we can have more than that...
-#elif defined(CONFIG_ARM64)
-#define KVM_MAX_IRQ_ROUTES 4096
-#else
-#define KVM_MAX_IRQ_ROUTES 1024
-#endif
+#define KVM_MAX_IRQ_ROUTES 4096 /* might need extension/rework in the future */
 
 bool kvm_arch_can_set_irq_routing(struct kvm *kvm);
 int kvm_set_irq_routing(struct kvm *kvm,
index 81d0799b609168a4a9ea00da4f3efdf82bb32f58..122e7e9d3091b5b55f4ded91ae7a9dccb620d193 100644 (file)
@@ -225,6 +225,7 @@ enum {
        MLX4_DEV_CAP_FLAG2_SVLAN_BY_QP          = 1ULL <<  36,
        MLX4_DEV_CAP_FLAG2_SL_TO_VL_CHANGE_EVENT = 1ULL << 37,
        MLX4_DEV_CAP_FLAG2_USER_MAC_EN          = 1ULL << 38,
+       MLX4_DEV_CAP_FLAG2_DRIVER_VERSION_TO_FW = 1ULL << 39,
 };
 
 enum {
index 767d193c269aef28465476b91e85862a4e64715c..d703774982cadb352359769e1a69c6f55dc94acf 100644 (file)
@@ -1284,25 +1284,9 @@ enum {
 };
 
 static inline const struct cpumask *
-mlx5_get_vector_affinity(struct mlx5_core_dev *dev, int vector)
+mlx5_get_vector_affinity_hint(struct mlx5_core_dev *dev, int vector)
 {
-       const struct cpumask *mask;
-       struct irq_desc *desc;
-       unsigned int irq;
-       int eqn;
-       int err;
-
-       err = mlx5_vector2eqn(dev, MLX5_EQ_VEC_COMP_BASE + vector, &eqn, &irq);
-       if (err)
-               return NULL;
-
-       desc = irq_to_desc(irq);
-#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
-       mask = irq_data_get_effective_affinity_mask(&desc->irq_data);
-#else
-       mask = desc->irq_common_data.affinity;
-#endif
-       return mask;
+       return dev->priv.irq_info[vector].mask;
 }
 
 #endif /* MLX5_DRIVER_H */
index 47aecc4fa8c2b9e4f00e6fccd2c08c759f486695..9f4d32e41c06916c50d01c71cd16ec4e0b0cfcc2 100644 (file)
@@ -90,8 +90,12 @@ struct mlx5_flow_destination {
        union {
                u32                     tir_num;
                struct mlx5_flow_table  *ft;
-               u32                     vport_num;
                struct mlx5_fc          *counter;
+               struct {
+                       u16             num;
+                       u16             vhca_id;
+                       bool            vhca_id_valid;
+               } vport;
        };
 };
 
index 1aad455538f40836635d352a06c20955d6ec5ef7..b4ea8a9914c4f66f93db6e3a1953f9787b4018e4 100644 (file)
@@ -356,22 +356,6 @@ struct mlx5_ifc_odp_per_transport_service_cap_bits {
        u8         reserved_at_6[0x1a];
 };
 
-struct mlx5_ifc_ipv4_layout_bits {
-       u8         reserved_at_0[0x60];
-
-       u8         ipv4[0x20];
-};
-
-struct mlx5_ifc_ipv6_layout_bits {
-       u8         ipv6[16][0x8];
-};
-
-union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits {
-       struct mlx5_ifc_ipv6_layout_bits ipv6_layout;
-       struct mlx5_ifc_ipv4_layout_bits ipv4_layout;
-       u8         reserved_at_0[0x80];
-};
-
 struct mlx5_ifc_fte_match_set_lyr_2_4_bits {
        u8         smac_47_16[0x20];
 
@@ -412,7 +396,7 @@ struct mlx5_ifc_fte_match_set_misc_bits {
        u8         reserved_at_0[0x8];
        u8         source_sqn[0x18];
 
-       u8         reserved_at_20[0x10];
+       u8         source_eswitch_owner_vhca_id[0x10];
        u8         source_port[0x10];
 
        u8         outer_second_prio[0x3];
@@ -557,7 +541,8 @@ struct mlx5_ifc_e_switch_cap_bits {
        u8         vport_svlan_insert[0x1];
        u8         vport_cvlan_insert_if_not_exist[0x1];
        u8         vport_cvlan_insert_overwrite[0x1];
-       u8         reserved_at_5[0x19];
+       u8         reserved_at_5[0x18];
+       u8         merged_eswitch[0x1];
        u8         nic_vport_node_guid_modify[0x1];
        u8         nic_vport_port_guid_modify[0x1];
 
@@ -1147,8 +1132,9 @@ enum mlx5_flow_destination_type {
 struct mlx5_ifc_dest_format_struct_bits {
        u8         destination_type[0x8];
        u8         destination_id[0x18];
-
-       u8         reserved_at_20[0x20];
+       u8         destination_eswitch_owner_vhca_id_valid[0x1];
+       u8         reserved_at_21[0xf];
+       u8         destination_eswitch_owner_vhca_id[0x10];
 };
 
 struct mlx5_ifc_flow_counter_list_bits {
@@ -6993,7 +6979,9 @@ struct mlx5_ifc_create_flow_group_in_bits {
        u8         reserved_at_a0[0x8];
        u8         table_id[0x18];
 
-       u8         reserved_at_c0[0x20];
+       u8         source_eswitch_owner_vhca_id_valid[0x1];
+
+       u8         reserved_at_c1[0x1f];
 
        u8         start_flow_index[0x20];
 
index ec052491ba3d7a38cb84ca0109d8820eb9b07bdb..193091537cb6da1968b016b21d9f62b68da4b605 100644 (file)
 #ifndef MLX5_IFC_FPGA_H
 #define MLX5_IFC_FPGA_H
 
+struct mlx5_ifc_ipv4_layout_bits {
+       u8         reserved_at_0[0x60];
+
+       u8         ipv4[0x20];
+};
+
+struct mlx5_ifc_ipv6_layout_bits {
+       u8         ipv6[16][0x8];
+};
+
+union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits {
+       struct mlx5_ifc_ipv6_layout_bits ipv6_layout;
+       struct mlx5_ifc_ipv4_layout_bits ipv4_layout;
+       u8         reserved_at_0[0x80];
+};
+
 enum {
        MLX5_FPGA_CAP_SANDBOX_VENDOR_ID_MLNX = 0x2c9,
 };
 
 enum {
        MLX5_FPGA_CAP_SANDBOX_PRODUCT_ID_IPSEC    = 0x2,
+       MLX5_FPGA_CAP_SANDBOX_PRODUCT_ID_TLS      = 0x3,
 };
 
 struct mlx5_ifc_fpga_shell_caps_bits {
@@ -370,6 +387,27 @@ struct mlx5_ifc_fpga_destroy_qp_out_bits {
        u8         reserved_at_40[0x40];
 };
 
+struct mlx5_ifc_tls_extended_cap_bits {
+       u8         aes_gcm_128[0x1];
+       u8         aes_gcm_256[0x1];
+       u8         reserved_at_2[0x1e];
+       u8         reserved_at_20[0x20];
+       u8         context_capacity_total[0x20];
+       u8         context_capacity_rx[0x20];
+       u8         context_capacity_tx[0x20];
+       u8         reserved_at_a0[0x10];
+       u8         tls_counter_size[0x10];
+       u8         tls_counters_addr_low[0x20];
+       u8         tls_counters_addr_high[0x20];
+       u8         rx[0x1];
+       u8         tx[0x1];
+       u8         tls_v12[0x1];
+       u8         tls_v13[0x1];
+       u8         lro[0x1];
+       u8         ipv6[0x1];
+       u8         reserved_at_106[0x1a];
+};
+
 struct mlx5_ifc_ipsec_extended_cap_bits {
        u8         encapsulation[0x20];
 
@@ -519,4 +557,43 @@ struct mlx5_ifc_fpga_ipsec_sa {
        __be16 reserved2;
 } __packed;
 
+enum fpga_tls_cmds {
+       CMD_SETUP_STREAM                = 0x1001,
+       CMD_TEARDOWN_STREAM             = 0x1002,
+};
+
+#define MLX5_TLS_1_2 (0)
+
+#define MLX5_TLS_ALG_AES_GCM_128 (0)
+#define MLX5_TLS_ALG_AES_GCM_256 (1)
+
+struct mlx5_ifc_tls_cmd_bits {
+       u8         command_type[0x20];
+       u8         ipv6[0x1];
+       u8         direction_sx[0x1];
+       u8         tls_version[0x2];
+       u8         reserved[0x1c];
+       u8         swid[0x20];
+       u8         src_port[0x10];
+       u8         dst_port[0x10];
+       union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits src_ipv4_src_ipv6;
+       union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits dst_ipv4_dst_ipv6;
+       u8         tls_rcd_sn[0x40];
+       u8         tcp_sn[0x20];
+       u8         tls_implicit_iv[0x20];
+       u8         tls_xor_iv[0x40];
+       u8         encryption_key[0x100];
+       u8         alg[4];
+       u8         reserved2[0x1c];
+       u8         reserved3[0x4a0];
+};
+
+struct mlx5_ifc_tls_resp_bits {
+       u8         syndrome[0x20];
+       u8         stream_id[0x20];
+       u8         reserverd[0x40];
+};
+
+#define MLX5_TLS_COMMAND_SIZE (0x100)
+
 #endif /* MLX5_IFC_FPGA_H */
index 1ac1f06a4be6b22faf3883c760515a042a6d347e..c6fa9a255dbf63a9ec938c5c6be3f926917bd502 100644 (file)
@@ -2466,6 +2466,13 @@ static inline vm_fault_t vmf_insert_pfn(struct vm_area_struct *vma,
        return VM_FAULT_NOPAGE;
 }
 
+static inline vm_fault_t vmf_error(int err)
+{
+       if (err == -ENOMEM)
+               return VM_FAULT_OOM;
+       return VM_FAULT_SIGBUS;
+}
+
 struct page *follow_page_mask(struct vm_area_struct *vma,
                              unsigned long address, unsigned int foll_flags,
                              unsigned int *page_mask);
@@ -2493,6 +2500,7 @@ static inline struct page *follow_page(struct vm_area_struct *vma,
 #define FOLL_MLOCK     0x1000  /* lock present pages */
 #define FOLL_REMOTE    0x2000  /* we are working on non-current tsk/mm */
 #define FOLL_COW       0x4000  /* internal GUP flag */
+#define FOLL_ANON      0x8000  /* don't do file mappings */
 
 static inline int vm_fault_to_errno(int vm_fault, int foll_flags)
 {
index cdd66a5fbd5e00905ff5e6c2623e6efaf12bf225..0a7abe8a407ff223bfdbd18890a0b20bac9f4aee 100644 (file)
@@ -35,6 +35,7 @@
 #define SDIO_DEVICE_ID_BROADCOM_4335_4339      0x4335
 #define SDIO_DEVICE_ID_BROADCOM_4339           0x4339
 #define SDIO_DEVICE_ID_BROADCOM_43362          0xa962
+#define SDIO_DEVICE_ID_BROADCOM_43364          0xa9a4
 #define SDIO_DEVICE_ID_BROADCOM_43430          0xa9a6
 #define SDIO_DEVICE_ID_BROADCOM_4345           0x4345
 #define SDIO_DEVICE_ID_BROADCOM_43455          0xa9bf
index b63fa457febd8d5d379ffe891e981ea2c8e912c8..3529683f691ef90c10f8ba89211341ceb60a1fe1 100644 (file)
@@ -85,6 +85,7 @@ struct flchip {
        unsigned int write_suspended:1;
        unsigned int erase_suspended:1;
        unsigned long in_progress_block_addr;
+       unsigned long in_progress_block_mask;
 
        struct mutex mutex;
        wait_queue_head_t wq; /* Wait on here when we're waiting for the chip
index b5b43f94f311626ee364157515c4342aba976e6f..01b990e4b228a90ef26bc302d8b8476293a81869 100644 (file)
@@ -312,7 +312,7 @@ void map_destroy(struct mtd_info *mtd);
 ({                                                                     \
        int i, ret = 1;                                                 \
        for (i = 0; i < map_words(map); i++) {                          \
-               if (((val1).x[i] & (val2).x[i]) != (val2).x[i]) {       \
+               if (((val1).x[i] & (val2).x[i]) != (val3).x[i]) {       \
                        ret = 0;                                        \
                        break;                                          \
                }                                                       \
index 5dad59b312440a9c6cf44160276f162a82130738..17c919436f48726450069ca1e589ccd3fc48e712 100644 (file)
@@ -867,12 +867,18 @@ struct nand_op_instr {
  * tBERS (during an erase) which all of them are u64 values that cannot be
  * divided by usual kernel macros and must be handled with the special
  * DIV_ROUND_UP_ULL() macro.
+ *
+ * Cast to type of dividend is needed here to guarantee that the result won't
+ * be an unsigned long long when the dividend is an unsigned long (or smaller),
+ * which is what the compiler does when it sees ternary operator with 2
+ * different return types (picks the largest type to make sure there's no
+ * loss).
  */
-#define __DIVIDE(dividend, divisor) ({                                 \
-       sizeof(dividend) == sizeof(u32) ?                               \
-               DIV_ROUND_UP(dividend, divisor) :                       \
-               DIV_ROUND_UP_ULL(dividend, divisor);                    \
-               })
+#define __DIVIDE(dividend, divisor) ({                                         \
+       (__typeof__(dividend))(sizeof(dividend) <= sizeof(unsigned long) ?      \
+                              DIV_ROUND_UP(dividend, divisor) :                \
+                              DIV_ROUND_UP_ULL(dividend, divisor));            \
+       })
 #define PSEC_TO_NSEC(x) __DIVIDE(x, 1000)
 #define PSEC_TO_MSEC(x) __DIVIDE(x, 1000000000)
 
index 29ed8fd6379a79f43d887be4fd9a7bfc9a46c897..db99240d00bdfea565ea6ed838bbfe5eeba4aa33 100644 (file)
@@ -103,11 +103,12 @@ enum {
 #define NET_DIM_PARAMS_NUM_PROFILES 5
 /* Adaptive moderation profiles */
 #define NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE 256
+#define NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE 128
 #define NET_DIM_DEF_PROFILE_CQE 1
 #define NET_DIM_DEF_PROFILE_EQE 1
 
 /* All profiles sizes must be NET_PARAMS_DIM_NUM_PROFILES */
-#define NET_DIM_EQE_PROFILES { \
+#define NET_DIM_RX_EQE_PROFILES { \
        {1,   NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
        {8,   NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
        {64,  NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
@@ -115,7 +116,7 @@ enum {
        {256, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
 }
 
-#define NET_DIM_CQE_PROFILES { \
+#define NET_DIM_RX_CQE_PROFILES { \
        {2,  256},             \
        {8,  128},             \
        {16, 64},              \
@@ -123,32 +124,68 @@ enum {
        {64, 64}               \
 }
 
+#define NET_DIM_TX_EQE_PROFILES { \
+       {1,   NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE},  \
+       {8,   NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE},  \
+       {32,  NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE},  \
+       {64,  NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE},  \
+       {128, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}   \
+}
+
+#define NET_DIM_TX_CQE_PROFILES { \
+       {5,  128},  \
+       {8,  64},  \
+       {16, 32},  \
+       {32, 32},  \
+       {64, 32}   \
+}
+
 static const struct net_dim_cq_moder
-profile[NET_DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = {
-       NET_DIM_EQE_PROFILES,
-       NET_DIM_CQE_PROFILES,
+rx_profile[NET_DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = {
+       NET_DIM_RX_EQE_PROFILES,
+       NET_DIM_RX_CQE_PROFILES,
 };
 
-static inline struct net_dim_cq_moder net_dim_get_profile(u8 cq_period_mode,
-                                                         int ix)
+static const struct net_dim_cq_moder
+tx_profile[NET_DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = {
+       NET_DIM_TX_EQE_PROFILES,
+       NET_DIM_TX_CQE_PROFILES,
+};
+
+static inline struct net_dim_cq_moder
+net_dim_get_rx_moderation(u8 cq_period_mode, int ix)
 {
-       struct net_dim_cq_moder cq_moder;
+       struct net_dim_cq_moder cq_moder = rx_profile[cq_period_mode][ix];
 
-       cq_moder = profile[cq_period_mode][ix];
        cq_moder.cq_period_mode = cq_period_mode;
        return cq_moder;
 }
 
-static inline struct net_dim_cq_moder net_dim_get_def_profile(u8 rx_cq_period_mode)
+static inline struct net_dim_cq_moder
+net_dim_get_def_rx_moderation(u8 cq_period_mode)
+{
+       u8 profile_ix = cq_period_mode == NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE ?
+                       NET_DIM_DEF_PROFILE_CQE : NET_DIM_DEF_PROFILE_EQE;
+
+       return net_dim_get_rx_moderation(cq_period_mode, profile_ix);
+}
+
+static inline struct net_dim_cq_moder
+net_dim_get_tx_moderation(u8 cq_period_mode, int ix)
 {
-       int default_profile_ix;
+       struct net_dim_cq_moder cq_moder = tx_profile[cq_period_mode][ix];
+
+       cq_moder.cq_period_mode = cq_period_mode;
+       return cq_moder;
+}
 
-       if (rx_cq_period_mode == NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE)
-               default_profile_ix = NET_DIM_DEF_PROFILE_CQE;
-       else /* NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE */
-               default_profile_ix = NET_DIM_DEF_PROFILE_EQE;
+static inline struct net_dim_cq_moder
+net_dim_get_def_tx_moderation(u8 cq_period_mode)
+{
+       u8 profile_ix = cq_period_mode == NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE ?
+                       NET_DIM_DEF_PROFILE_CQE : NET_DIM_DEF_PROFILE_EQE;
 
-       return net_dim_get_profile(rx_cq_period_mode, default_profile_ix);
+       return net_dim_get_tx_moderation(cq_period_mode, profile_ix);
 }
 
 static inline bool net_dim_on_top(struct net_dim *dim)
index 35b79f47a13de3a6664fe6c8e79e75c4738f2fd7..c87c3a3453c19fe8ae61c10daecc55c29399aae0 100644 (file)
@@ -55,8 +55,9 @@ enum {
        NETIF_F_GSO_SCTP_BIT,           /* ... SCTP fragmentation */
        NETIF_F_GSO_ESP_BIT,            /* ... ESP with TSO */
        NETIF_F_GSO_UDP_BIT,            /* ... UFO, deprecated except tuntap */
+       NETIF_F_GSO_UDP_L4_BIT,         /* ... UDP payload GSO (not UFO) */
        /**/NETIF_F_GSO_LAST =          /* last bit, see GSO_MASK */
-               NETIF_F_GSO_UDP_BIT,
+               NETIF_F_GSO_UDP_L4_BIT,
 
        NETIF_F_FCOE_CRC_BIT,           /* FCoE CRC32 */
        NETIF_F_SCTP_CRC_BIT,           /* SCTP checksum offload */
@@ -77,6 +78,7 @@ enum {
        NETIF_F_HW_ESP_BIT,             /* Hardware ESP transformation offload */
        NETIF_F_HW_ESP_TX_CSUM_BIT,     /* ESP with TX checksum offload */
        NETIF_F_RX_UDP_TUNNEL_PORT_BIT, /* Offload of RX port for UDP tunnels */
+       NETIF_F_HW_TLS_TX_BIT,          /* Hardware TLS TX offload */
 
        NETIF_F_GRO_HW_BIT,             /* Hardware Generic receive offload */
        NETIF_F_HW_TLS_RECORD_BIT,      /* Offload TLS record */
@@ -147,6 +149,8 @@ enum {
 #define NETIF_F_HW_ESP_TX_CSUM __NETIF_F(HW_ESP_TX_CSUM)
 #define        NETIF_F_RX_UDP_TUNNEL_PORT  __NETIF_F(RX_UDP_TUNNEL_PORT)
 #define NETIF_F_HW_TLS_RECORD  __NETIF_F(HW_TLS_RECORD)
+#define NETIF_F_GSO_UDP_L4     __NETIF_F(GSO_UDP_L4)
+#define NETIF_F_HW_TLS_TX      __NETIF_F(HW_TLS_TX)
 
 #define for_each_netdev_feature(mask_addr, bit)        \
        for_each_set_bit(bit, (unsigned long *)mask_addr, NETDEV_FEATURE_COUNT)
@@ -216,6 +220,7 @@ enum {
                                 NETIF_F_GSO_GRE_CSUM |                 \
                                 NETIF_F_GSO_IPXIP4 |                   \
                                 NETIF_F_GSO_IPXIP6 |                   \
+                                NETIF_F_GSO_UDP_L4 |                   \
                                 NETIF_F_GSO_UDP_TUNNEL |               \
                                 NETIF_F_GSO_UDP_TUNNEL_CSUM)
 
index 14e0777ffcfb0b2a501fe5ae5a1a70f5f8666d24..03ed492c4e14ade6cfee53b8cf9a65fd1ec62d16 100644 (file)
@@ -865,6 +865,26 @@ struct xfrmdev_ops {
 };
 #endif
 
+#if IS_ENABLED(CONFIG_TLS_DEVICE)
+enum tls_offload_ctx_dir {
+       TLS_OFFLOAD_CTX_DIR_RX,
+       TLS_OFFLOAD_CTX_DIR_TX,
+};
+
+struct tls_crypto_info;
+struct tls_context;
+
+struct tlsdev_ops {
+       int (*tls_dev_add)(struct net_device *netdev, struct sock *sk,
+                          enum tls_offload_ctx_dir direction,
+                          struct tls_crypto_info *crypto_info,
+                          u32 start_offload_tcp_sn);
+       void (*tls_dev_del)(struct net_device *netdev,
+                           struct tls_context *ctx,
+                           enum tls_offload_ctx_dir direction);
+};
+#endif
+
 struct dev_ifalias {
        struct rcu_head rcuhead;
        char ifalias[];
@@ -1750,6 +1770,10 @@ struct net_device {
        const struct xfrmdev_ops *xfrmdev_ops;
 #endif
 
+#if IS_ENABLED(CONFIG_TLS_DEVICE)
+       const struct tlsdev_ops *tlsdev_ops;
+#endif
+
        const struct header_ops *header_ops;
 
        unsigned int            flags;
@@ -2486,6 +2510,7 @@ void dev_disable_lro(struct net_device *dev);
 int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *newskb);
 int dev_queue_xmit(struct sk_buff *skb);
 int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv);
+int dev_direct_xmit(struct sk_buff *skb, u16 queue_id);
 int register_netdevice(struct net_device *dev);
 void unregister_netdevice_queue(struct net_device *dev, struct list_head *head);
 void unregister_netdevice_many(struct list_head *head);
@@ -3213,19 +3238,6 @@ static inline int netif_set_xps_queue(struct net_device *dev,
 }
 #endif
 
-u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb,
-                 unsigned int num_tx_queues);
-
-/*
- * Returns a Tx hash for the given packet when dev->real_num_tx_queues is used
- * as a distribution range limit for the returned value.
- */
-static inline u16 skb_tx_hash(const struct net_device *dev,
-                             struct sk_buff *skb)
-{
-       return __skb_tx_hash(dev, skb, dev->real_num_tx_queues);
-}
-
 /**
  *     netif_is_multiqueue - test if device has multiple transmit queues
  *     @dev: network device
@@ -4186,6 +4198,7 @@ static inline bool net_gso_ok(netdev_features_t features, int gso_type)
        BUILD_BUG_ON(SKB_GSO_SCTP    != (NETIF_F_GSO_SCTP >> NETIF_F_GSO_SHIFT));
        BUILD_BUG_ON(SKB_GSO_ESP != (NETIF_F_GSO_ESP >> NETIF_F_GSO_SHIFT));
        BUILD_BUG_ON(SKB_GSO_UDP != (NETIF_F_GSO_UDP >> NETIF_F_GSO_SHIFT));
+       BUILD_BUG_ON(SKB_GSO_UDP_L4 != (NETIF_F_GSO_UDP_L4 >> NETIF_F_GSO_SHIFT));
 
        return (features & feature) == feature;
 }
diff --git a/include/linux/netfilter/nf_osf.h b/include/linux/netfilter/nf_osf.h
new file mode 100644 (file)
index 0000000..a2b3960
--- /dev/null
@@ -0,0 +1,27 @@
+#include <uapi/linux/netfilter/nf_osf.h>
+
+/* Initial window size option state machine: multiple of mss, mtu or
+ * plain numeric value. Can also be made as plain numeric value which
+ * is not a multiple of specified value.
+ */
+enum nf_osf_window_size_options {
+       OSF_WSS_PLAIN   = 0,
+       OSF_WSS_MSS,
+       OSF_WSS_MTU,
+       OSF_WSS_MODULO,
+       OSF_WSS_MAX,
+};
+
+enum osf_fmatch_states {
+       /* Packet does not match the fingerprint */
+       FMATCH_WRONG = 0,
+       /* Packet matches the fingerprint */
+       FMATCH_OK,
+       /* Options do not match the fingerprint, but header does */
+       FMATCH_OPT_WRONG,
+};
+
+bool nf_osf_match(const struct sk_buff *skb, u_int8_t family,
+                 int hooknum, struct net_device *in, struct net_device *out,
+                 const struct nf_osf_info *info, struct net *net,
+                 const struct list_head *nf_osf_fingers);
index 0773b5a032f1fc1cedaaef44c89f37c9344399f1..c6935be7c6ca3e9a33f2835a8dc10035bee43f1d 100644 (file)
 #include <linux/if_ether.h>
 #include <uapi/linux/netfilter_bridge/ebtables.h>
 
-/* return values for match() functions */
-#define EBT_MATCH 0
-#define EBT_NOMATCH 1
-
 struct ebt_match {
        struct list_head list;
        const char name[EBT_FUNCTION_MAXNAMELEN];
index 5bad038ac012e6e3047fd6a63a625c6814966d51..6adac113e96d29b5059ed65ac0522237c9b94388 100644 (file)
@@ -95,6 +95,8 @@ static inline int check_stable_address_space(struct mm_struct *mm)
        return 0;
 }
 
+void __oom_reap_task_mm(struct mm_struct *mm);
+
 extern unsigned long oom_badness(struct task_struct *p,
                struct mem_cgroup *memcg, const nodemask_t *nodemask,
                unsigned long totalpages);
index b1f37a89e368683233499bc5942b43e353b637a3..79b99d653e030d113e4401fc26c7b47e81dcff8c 100644 (file)
@@ -133,7 +133,7 @@ static inline void percpu_rwsem_release(struct percpu_rw_semaphore *sem,
        lock_release(&sem->rw_sem.dep_map, 1, ip);
 #ifdef CONFIG_RWSEM_SPIN_ON_OWNER
        if (!read)
-               sem->rw_sem.owner = NULL;
+               sem->rw_sem.owner = RWSEM_OWNER_UNKNOWN;
 #endif
 }
 
@@ -141,6 +141,10 @@ static inline void percpu_rwsem_acquire(struct percpu_rw_semaphore *sem,
                                        bool read, unsigned long ip)
 {
        lock_acquire(&sem->rw_sem.dep_map, 0, 1, read, 1, NULL, ip);
+#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
+       if (!read)
+               sem->rw_sem.owner = current;
+#endif
 }
 
 #endif
index f0b5870a6d40b66437e8f3f9bf5d894c677fbc58..073235e704427c3e1238f4cfed1a97574f8193b3 100644 (file)
@@ -1068,6 +1068,52 @@ int __init mdio_bus_init(void);
 void mdio_bus_exit(void);
 #endif
 
+/* Inline function for use within net/core/ethtool.c (built-in) */
+static inline int phy_ethtool_get_strings(struct phy_device *phydev, u8 *data)
+{
+       if (!phydev->drv)
+               return -EIO;
+
+       mutex_lock(&phydev->lock);
+       phydev->drv->get_strings(phydev, data);
+       mutex_unlock(&phydev->lock);
+
+       return 0;
+}
+
+static inline int phy_ethtool_get_sset_count(struct phy_device *phydev)
+{
+       int ret;
+
+       if (!phydev->drv)
+               return -EIO;
+
+       if (phydev->drv->get_sset_count &&
+           phydev->drv->get_strings &&
+           phydev->drv->get_stats) {
+               mutex_lock(&phydev->lock);
+               ret = phydev->drv->get_sset_count(phydev);
+               mutex_unlock(&phydev->lock);
+
+               return ret;
+       }
+
+       return -EOPNOTSUPP;
+}
+
+static inline int phy_ethtool_get_stats(struct phy_device *phydev,
+                                       struct ethtool_stats *stats, u64 *data)
+{
+       if (!phydev->drv)
+               return -EIO;
+
+       mutex_lock(&phydev->lock);
+       phydev->drv->get_stats(phydev, stats, data);
+       mutex_unlock(&phydev->lock);
+
+       return 0;
+}
+
 extern struct bus_type mdio_bus_type;
 
 struct mdio_board_info {
index c9d14eeee7f5ae8156c10967a66db5b53b191879..9713aebdd348b1d9bb73ecbe2b82efba3f929695 100644 (file)
@@ -36,6 +36,7 @@ enum phy_mode {
        PHY_MODE_USB_DEVICE_SS,
        PHY_MODE_USB_OTG,
        PHY_MODE_SGMII,
+       PHY_MODE_2500SGMII,
        PHY_MODE_10GKR,
        PHY_MODE_UFS_HS_A,
        PHY_MODE_UFS_HS_B,
index 69d279c0da968f96f766386d70768aa8fbf5ef6e..8eaef2f2b691e7f98a050ace421428c2afb69e8d 100644 (file)
 #define __B53_H
 
 #include <linux/kernel.h>
+#include <net/dsa.h>
 
 struct b53_platform_data {
+       /* Must be first such that dsa_register_switch() can access it */
+       struct dsa_chip_data cd;
+
        u32 chip_id;
        u16 enabled_ports;
 
diff --git a/include/linux/platform_data/mv88e6xxx.h b/include/linux/platform_data/mv88e6xxx.h
new file mode 100644 (file)
index 0000000..f63af29
--- /dev/null
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __DSA_MV88E6XXX_H
+#define __DSA_MV88E6XXX_H
+
+#include <net/dsa.h>
+
+struct dsa_mv88e6xxx_pdata {
+       /* Must be first, such that dsa_register_switch() can access this
+        * without gory pointer manipulations
+        */
+       struct dsa_chip_data cd;
+       const char *compatible;
+       unsigned int enabled_ports;
+       struct net_device *netdev;
+       u32 eeprom_len;
+};
+
+#endif
index e53f9c7c2809f938b042015e18e47b7d0a1ec2f6..907976fd56f7dedb7795b0a48adef5d184977c7e 100644 (file)
@@ -339,7 +339,6 @@ struct qed_dev_info {
        u8              num_hwfns;
 
        u8              hw_mac[ETH_ALEN];
-       bool            is_mf_default;
 
        /* FW version */
        u16             fw_major;
@@ -359,7 +358,7 @@ struct qed_dev_info {
 #define QED_MFW_VERSION_3_OFFSET       24
 
        u32             flash_size;
-       u8              mf_mode;
+       bool            b_inter_pf_switch;
        bool            tx_switching;
        bool            rdma_supported;
        u16             mtu;
index 266c1fb4538756a03b88ae39c0c54368991e704d..5eb022953acadd7f8c6c2297901bf0e5c566ce3f 100644 (file)
@@ -202,6 +202,7 @@ struct qed_ll2_tx_pkt_info {
        bool enable_ip_cksum;
        bool enable_l4_cksum;
        bool calc_ip_len;
+       bool remove_stag;
 };
 
 #define QED_LL2_UNUSED_HANDLE   (0xff)
@@ -220,6 +221,11 @@ struct qed_ll2_params {
        u8 ll2_mac_address[ETH_ALEN];
 };
 
+enum qed_ll2_xmit_flags {
+       /* FIP discovery packet */
+       QED_LL2_XMIT_FLAGS_FIP_DISCOVERY
+};
+
 struct qed_ll2_ops {
 /**
  * @brief start - initializes ll2
@@ -245,10 +251,12 @@ struct qed_ll2_ops {
  *
  * @param cdev
  * @param skb
+ * @param xmit_flags - Transmit options defined by the enum qed_ll2_xmit_flags.
  *
  * @return 0 on success, otherwise error value.
  */
-       int (*start_xmit)(struct qed_dev *cdev, struct sk_buff *skb);
+       int (*start_xmit)(struct qed_dev *cdev, struct sk_buff *skb,
+                         unsigned long xmit_flags);
 
 /**
  * @brief register_cb_ops - protocol driver register the callback for Rx/Tx
index 6bfd2b581f75a464e13d3d749ebe5d639a32551f..af8a61be2d8d500206687249f1e428c23dceb5fe 100644 (file)
@@ -26,6 +26,7 @@
 
 #include <linux/compiler.h>
 #include <linux/rbtree.h>
+#include <linux/rcupdate.h>
 
 /*
  * Please note - only struct rb_augment_callbacks and the prototypes for
index ece43e882b56df3c119b97d26e69f388cc348499..7d012faa509a44c775b8835d409fb1ac3933d3c8 100644 (file)
@@ -35,6 +35,7 @@
 
 #include <linux/rbtree.h>
 #include <linux/seqlock.h>
+#include <linux/rcupdate.h>
 
 struct latch_tree_node {
        struct rb_node node[2];
index d09a9c7af109fbb27324d9f4f7f83c60843c9110..dfdaede9139e555611de84ff3de3459e959e97da 100644 (file)
@@ -569,7 +569,7 @@ static inline struct rproc *vdev_to_rproc(struct virtio_device *vdev)
 void rproc_add_subdev(struct rproc *rproc,
                      struct rproc_subdev *subdev,
                      int (*probe)(struct rproc_subdev *subdev),
-                     void (*remove)(struct rproc_subdev *subdev, bool graceful));
+                     void (*remove)(struct rproc_subdev *subdev, bool crashed));
 
 void rproc_remove_subdev(struct rproc *rproc, struct rproc_subdev *subdev);
 
index 1f8ad121eb4346ed06195661dc2327d17be4de49..4e1f535c2034e8d292f3d5b5bd569423878bab58 100644 (file)
@@ -836,9 +836,8 @@ static inline void *__rhashtable_insert_fast(
  *
  * It is safe to call this function from atomic context.
  *
- * Will trigger an automatic deferred table resizing if the size grows
- * beyond the watermark indicated by grow_decision() which can be passed
- * to rhashtable_init().
+ * Will trigger an automatic deferred table resizing if residency in the
+ * table grows beyond 70%.
  */
 static inline int rhashtable_insert_fast(
        struct rhashtable *ht, struct rhash_head *obj,
@@ -866,9 +865,8 @@ static inline int rhashtable_insert_fast(
  *
  * It is safe to call this function from atomic context.
  *
- * Will trigger an automatic deferred table resizing if the size grows
- * beyond the watermark indicated by grow_decision() which can be passed
- * to rhashtable_init().
+ * Will trigger an automatic deferred table resizing if residency in the
+ * table grows beyond 70%.
  */
 static inline int rhltable_insert_key(
        struct rhltable *hlt, const void *key, struct rhlist_head *list,
@@ -890,9 +888,8 @@ static inline int rhltable_insert_key(
  *
  * It is safe to call this function from atomic context.
  *
- * Will trigger an automatic deferred table resizing if the size grows
- * beyond the watermark indicated by grow_decision() which can be passed
- * to rhashtable_init().
+ * Will trigger an automatic deferred table resizing if residency in the
+ * table grows beyond 70%.
  */
 static inline int rhltable_insert(
        struct rhltable *hlt, struct rhlist_head *list,
@@ -922,9 +919,8 @@ static inline int rhltable_insert(
  *
  * It is safe to call this function from atomic context.
  *
- * Will trigger an automatic deferred table resizing if the size grows
- * beyond the watermark indicated by grow_decision() which can be passed
- * to rhashtable_init().
+ * Will trigger an automatic deferred table resizing if residency in the
+ * table grows beyond 70%.
  */
 static inline int rhashtable_lookup_insert_fast(
        struct rhashtable *ht, struct rhash_head *obj,
@@ -981,9 +977,8 @@ static inline void *rhashtable_lookup_get_insert_fast(
  *
  * Lookups may occur in parallel with hashtable mutations and resizing.
  *
- * Will trigger an automatic deferred table resizing if the size grows
- * beyond the watermark indicated by grow_decision() which can be passed
- * to rhashtable_init().
+ * Will trigger an automatic deferred table resizing if residency in the
+ * table grows beyond 70%.
  *
  * Returns zero on success.
  */
@@ -1134,8 +1129,8 @@ static inline int __rhashtable_remove_fast(
  * walk the bucket chain upon removal. The removal operation is thus
  * considerable slow if the hash table is not correctly sized.
  *
- * Will automatically shrink the table via rhashtable_expand() if the
- * shrink_decision function specified at rhashtable_init() returns true.
+ * Will automatically shrink the table if permitted when residency drops
+ * below 30%.
  *
  * Returns zero on success, -ENOENT if the entry could not be found.
  */
@@ -1156,8 +1151,8 @@ static inline int rhashtable_remove_fast(
  * walk the bucket chain upon removal. The removal operation is thus
  * considerable slow if the hash table is not correctly sized.
  *
- * Will automatically shrink the table via rhashtable_expand() if the
- * shrink_decision function specified at rhashtable_init() returns true.
+ * Will automatically shrink the table if permitted when residency drops
+ * below 30%
  *
  * Returns zero on success, -ENOENT if the entry could not be found.
  */
@@ -1273,8 +1268,9 @@ static inline int rhashtable_walk_init(struct rhashtable *ht,
  * For a completely stable walk you should construct your own data
  * structure outside the hash table.
  *
- * This function may sleep so you must not call it from interrupt
- * context or with spin locks held.
+ * This function may be called from any process context, including
+ * non-preemptable context, but cannot be called from softirq or
+ * hardirq context.
  *
  * You must call rhashtable_walk_exit after this function returns.
  */
index 56707d5ff6adddce20b7ae417d91abfd9c393744..ab93b6eae6968e4eef4b29b2f1f134c4b21fe5d1 100644 (file)
@@ -44,6 +44,12 @@ struct rw_semaphore {
 #endif
 };
 
+/*
+ * Setting bit 0 of the owner field with other non-zero bits will indicate
+ * that the rwsem is writer-owned with an unknown owner.
+ */
+#define RWSEM_OWNER_UNKNOWN    ((struct task_struct *)-1L)
+
 extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
 extern struct rw_semaphore *rwsem_down_read_failed_killable(struct rw_semaphore *sem);
 extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
index b3d697f3b5731b4c45986fb10e8afb2d00ecf6e4..c2413703f45dcf005c7601b1e8c8c24c51d2a844 100644 (file)
@@ -112,17 +112,36 @@ struct task_group;
 
 #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
 
+/*
+ * Special states are those that do not use the normal wait-loop pattern. See
+ * the comment with set_special_state().
+ */
+#define is_special_task_state(state)                           \
+       ((state) & (__TASK_STOPPED | __TASK_TRACED | TASK_DEAD))
+
 #define __set_current_state(state_value)                       \
        do {                                                    \
+               WARN_ON_ONCE(is_special_task_state(state_value));\
                current->task_state_change = _THIS_IP_;         \
                current->state = (state_value);                 \
        } while (0)
+
 #define set_current_state(state_value)                         \
        do {                                                    \
+               WARN_ON_ONCE(is_special_task_state(state_value));\
                current->task_state_change = _THIS_IP_;         \
                smp_store_mb(current->state, (state_value));    \
        } while (0)
 
+#define set_special_state(state_value)                                 \
+       do {                                                            \
+               unsigned long flags; /* may shadow */                   \
+               WARN_ON_ONCE(!is_special_task_state(state_value));      \
+               raw_spin_lock_irqsave(&current->pi_lock, flags);        \
+               current->task_state_change = _THIS_IP_;                 \
+               current->state = (state_value);                         \
+               raw_spin_unlock_irqrestore(&current->pi_lock, flags);   \
+       } while (0)
 #else
 /*
  * set_current_state() includes a barrier so that the write of current->state
@@ -144,8 +163,8 @@ struct task_group;
  *
  * The above is typically ordered against the wakeup, which does:
  *
- *     need_sleep = false;
- *     wake_up_state(p, TASK_UNINTERRUPTIBLE);
+ *   need_sleep = false;
+ *   wake_up_state(p, TASK_UNINTERRUPTIBLE);
  *
  * Where wake_up_state() (and all other wakeup primitives) imply enough
  * barriers to order the store of the variable against wakeup.
@@ -154,12 +173,33 @@ struct task_group;
  * once it observes the TASK_UNINTERRUPTIBLE store the waking CPU can issue a
  * TASK_RUNNING store which can collide with __set_current_state(TASK_RUNNING).
  *
- * This is obviously fine, since they both store the exact same value.
+ * However, with slightly different timing the wakeup TASK_RUNNING store can
+ * also collide with the TASK_UNINTERRUPTIBLE store. Loosing that store is not
+ * a problem either because that will result in one extra go around the loop
+ * and our @cond test will save the day.
  *
  * Also see the comments of try_to_wake_up().
  */
-#define __set_current_state(state_value) do { current->state = (state_value); } while (0)
-#define set_current_state(state_value)  smp_store_mb(current->state, (state_value))
+#define __set_current_state(state_value)                               \
+       current->state = (state_value)
+
+#define set_current_state(state_value)                                 \
+       smp_store_mb(current->state, (state_value))
+
+/*
+ * set_special_state() should be used for those states when the blocking task
+ * can not use the regular condition based wait-loop. In that case we must
+ * serialize against wakeups such that any possible in-flight TASK_RUNNING stores
+ * will not collide with our state change.
+ */
+#define set_special_state(state_value)                                 \
+       do {                                                            \
+               unsigned long flags; /* may shadow */                   \
+               raw_spin_lock_irqsave(&current->pi_lock, flags);        \
+               current->state = (state_value);                         \
+               raw_spin_unlock_irqrestore(&current->pi_lock, flags);   \
+       } while (0)
+
 #endif
 
 /* Task command name length: */
index a7ce74c74e494ae05ba687bf9ad0497a690faed2..113d1ad1ced76e0c138c9dd05d1420ff28c71616 100644 (file)
@@ -280,7 +280,7 @@ static inline void kernel_signal_stop(void)
 {
        spin_lock_irq(&current->sighand->siglock);
        if (current->jobctl & JOBCTL_STOP_DEQUEUED)
-               __set_current_state(TASK_STOPPED);
+               set_special_state(TASK_STOPPED);
        spin_unlock_irq(&current->sighand->siglock);
 
        schedule();
index 1d356105f25a1680b05a6119fa1d9efba578b6b6..b4c9fda9d8335bd3b20611d9684645dce3f8532c 100644 (file)
@@ -351,10 +351,10 @@ struct earlycon_id {
        char    name[16];
        char    compatible[128];
        int     (*setup)(struct earlycon_device *, const char *options);
-} __aligned(32);
+};
 
-extern const struct earlycon_id __earlycon_table[];
-extern const struct earlycon_id __earlycon_table_end[];
+extern const struct earlycon_id *__earlycon_table[];
+extern const struct earlycon_id *__earlycon_table_end[];
 
 #if defined(CONFIG_SERIAL_EARLYCON) && !defined(MODULE)
 #define EARLYCON_USED_OR_UNUSED        __used
@@ -362,12 +362,19 @@ extern const struct earlycon_id __earlycon_table_end[];
 #define EARLYCON_USED_OR_UNUSED        __maybe_unused
 #endif
 
-#define OF_EARLYCON_DECLARE(_name, compat, fn)                         \
-       static const struct earlycon_id __UNIQUE_ID(__earlycon_##_name) \
-            EARLYCON_USED_OR_UNUSED __section(__earlycon_table)        \
+#define _OF_EARLYCON_DECLARE(_name, compat, fn, unique_id)             \
+       static const struct earlycon_id unique_id                       \
+            EARLYCON_USED_OR_UNUSED __initconst                        \
                = { .name = __stringify(_name),                         \
                    .compatible = compat,                               \
-                   .setup = fn  }
+                   .setup = fn  };                                     \
+       static const struct earlycon_id EARLYCON_USED_OR_UNUSED         \
+               __section(__earlycon_table)                             \
+               * const __PASTE(__p, unique_id) = &unique_id
+
+#define OF_EARLYCON_DECLARE(_name, compat, fn)                         \
+       _OF_EARLYCON_DECLARE(_name, compat, fn,                         \
+                            __UNIQUE_ID(__earlycon_##_name))
 
 #define EARLYCON_DECLARE(_name, fn)    OF_EARLYCON_DECLARE(_name, "", fn)
 
index a6b6e8bb3d7b8a7b37577d6376d10ea102298a99..62d9b0a6329f1ef486546506073316d004317b1c 100644 (file)
@@ -97,6 +97,11 @@ static inline bool skb_array_empty_any(struct skb_array *a)
        return ptr_ring_empty_any(&a->ring);
 }
 
+static inline struct sk_buff *__skb_array_consume(struct skb_array *a)
+{
+       return __ptr_ring_consume(&a->ring);
+}
+
 static inline struct sk_buff *skb_array_consume(struct skb_array *a)
 {
        return ptr_ring_consume(&a->ring);
index d274059529eb5216d041dfdcad4a564a623c8ea0..693564a9a9799df1d54db923904627dffc547879 100644 (file)
@@ -573,6 +573,8 @@ enum {
        SKB_GSO_ESP = 1 << 15,
 
        SKB_GSO_UDP = 1 << 16,
+
+       SKB_GSO_UDP_L4 = 1 << 17,
 };
 
 #if BITS_PER_LONG > 32
@@ -1032,6 +1034,7 @@ static inline struct sk_buff *alloc_skb_fclone(unsigned int size,
 struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src);
 int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask);
 struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t priority);
+void skb_copy_header(struct sk_buff *new, const struct sk_buff *old);
 struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t priority);
 struct sk_buff *__pskb_copy_fclone(struct sk_buff *skb, int headroom,
                                   gfp_t gfp_mask, bool fclone);
@@ -1168,7 +1171,7 @@ void __skb_get_hash(struct sk_buff *skb);
 u32 __skb_get_hash_symmetric(const struct sk_buff *skb);
 u32 skb_get_poff(const struct sk_buff *skb);
 u32 __skb_get_poff(const struct sk_buff *skb, void *data,
-                  const struct flow_keys *keys, int hlen);
+                  const struct flow_keys_basic *keys, int hlen);
 __be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto,
                            void *data, int hlen_proto);
 
@@ -1205,13 +1208,14 @@ static inline bool skb_flow_dissect_flow_keys(const struct sk_buff *skb,
                                  NULL, 0, 0, 0, flags);
 }
 
-static inline bool skb_flow_dissect_flow_keys_buf(struct flow_keys *flow,
-                                                 void *data, __be16 proto,
-                                                 int nhoff, int hlen,
-                                                 unsigned int flags)
+static inline bool
+skb_flow_dissect_flow_keys_basic(const struct sk_buff *skb,
+                                struct flow_keys_basic *flow, void *data,
+                                __be16 proto, int nhoff, int hlen,
+                                unsigned int flags)
 {
        memset(flow, 0, sizeof(*flow));
-       return __skb_flow_dissect(NULL, &flow_keys_buf_dissector, flow,
+       return __skb_flow_dissect(skb, &flow_keys_basic_dissector, flow,
                                  data, proto, nhoff, hlen, flags);
 }
 
@@ -2347,11 +2351,12 @@ static inline void skb_pop_mac_header(struct sk_buff *skb)
 static inline void skb_probe_transport_header(struct sk_buff *skb,
                                              const int offset_hint)
 {
-       struct flow_keys keys;
+       struct flow_keys_basic keys;
 
        if (skb_transport_header_was_set(skb))
                return;
-       else if (skb_flow_dissect_flow_keys(skb, &keys, 0))
+
+       if (skb_flow_dissect_flow_keys_basic(skb, &keys, 0, 0, 0, 0, 0))
                skb_set_transport_header(skb, keys.control.thoff);
        else
                skb_set_transport_header(skb, offset_hint);
index 5c916e6dff3627a8c830a1bcbf9624dd3312cc33..15fe980a27ead226cdf5cbd00df3c54f2dd944a6 100644 (file)
@@ -25,15 +25,6 @@ void sock_diag_unregister(const struct sock_diag_handler *h);
 void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh));
 void sock_diag_unregister_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh));
 
-static inline
-void sock_init_cookie(struct sock *sk)
-{
-       u64 res;
-
-       res = atomic64_inc_return(&sock_net(sk)->cookie_gen);
-       atomic64_set(&sk->sk_cookie, res);
-}
-
 u64 sock_gen_cookie(struct sock *sk);
 int sock_diag_check_cookie(struct sock *sk, const __u32 *cookie);
 void sock_diag_save_cookie(struct sock *sk, __u32 *cookie);
index ea50f4a6581622be6d6246b0d604fdb911a6e441..7ed4713d533727ff53d199e4982139d93e16ec7b 100644 (file)
@@ -207,8 +207,9 @@ struct ucred {
                                 * PF_SMC protocol family that
                                 * reuses AF_INET address family
                                 */
+#define AF_XDP         44      /* XDP sockets                  */
 
-#define AF_MAX         44      /* For now.. */
+#define AF_MAX         45      /* For now.. */
 
 /* Protocol families, same as address families. */
 #define PF_UNSPEC      AF_UNSPEC
@@ -257,6 +258,7 @@ struct ucred {
 #define PF_KCM         AF_KCM
 #define PF_QIPCRTR     AF_QIPCRTR
 #define PF_SMC         AF_SMC
+#define PF_XDP         AF_XDP
 #define PF_MAX         AF_MAX
 
 /* Maximum queue length specifiable by listen.  */
@@ -338,6 +340,7 @@ struct ucred {
 #define SOL_NFC                280
 #define SOL_KCM                281
 #define SOL_TLS                282
+#define SOL_XDP                283
 
 /* IPX options */
 #define IPX_TYPE       1
index e8f0f852968f1326edb7b261708d5a5cee49f030..c0c5c5b73dc0b0949043881e247a90aff5cf8a03 100644 (file)
@@ -50,9 +50,9 @@ partial_name_hash(unsigned long c, unsigned long prevhash)
  * losing bits).  This also has the property (wanted by the dcache)
  * that the msbits make a good hash table index.
  */
-static inline unsigned long end_name_hash(unsigned long hash)
+static inline unsigned int end_name_hash(unsigned long hash)
 {
-       return __hash_32((unsigned int)hash);
+       return hash_long(hash, 32);
 }
 
 /*
index 20585d5c4e1c3b64015df4a0339d19f65c03747b..72705eaf4b84060a45bf04d5170f389a18010eac 100644 (file)
@@ -218,6 +218,7 @@ struct tcp_sock {
                   reord:1;      /* reordering detected */
        } rack;
        u16     advmss;         /* Advertised MSS                       */
+       u8      compressed_ack;
        u32     chrono_start;   /* Start time in jiffies of a TCP chrono */
        u32     chrono_stat[3]; /* Time in jiffies for chrono_stat stats */
        u8      chrono_type:2,  /* current chronograph type */
@@ -228,7 +229,7 @@ struct tcp_sock {
                unused:2;
        u8      nonagle     : 4,/* Disable Nagle algorithm?             */
                thin_lto    : 1,/* Use linear timeouts for thin streams */
-               unused1     : 1,
+               recvmsg_inq : 1,/* Indicate # of bytes in queue upon recvmsg */
                repair      : 1,
                frto        : 1;/* F-RTO (RFC5682) activated in CA_Loss */
        u8      repair_queue;
@@ -297,6 +298,7 @@ struct tcp_sock {
        u32     sacked_out;     /* SACK'd packets                       */
 
        struct hrtimer  pacing_timer;
+       struct hrtimer  compressed_ack_timer;
 
        /* from STCP, retrans queue hinting */
        struct sk_buff* lost_skb_hint;
index 34f053a150a969bf03805cd56ee7f5487e041642..cf2862bd134a400b99136aaf2ec1357bd79ba3d2 100644 (file)
@@ -43,11 +43,7 @@ enum {
 #define THREAD_ALIGN   THREAD_SIZE
 #endif
 
-#if IS_ENABLED(CONFIG_DEBUG_STACK_USAGE) || IS_ENABLED(CONFIG_DEBUG_KMEMLEAK)
-# define THREADINFO_GFP                (GFP_KERNEL_ACCOUNT | __GFP_ZERO)
-#else
-# define THREADINFO_GFP                (GFP_KERNEL_ACCOUNT)
-#endif
+#define THREADINFO_GFP         (GFP_KERNEL_ACCOUNT | __GFP_ZERO)
 
 /*
  * flag set/clear/test wrappers
index 45bc6b37649294788e1061f8dd626c5609e167b9..53604b087f2c0b24993ea357b42103a8e07a4971 100644 (file)
@@ -60,6 +60,81 @@ struct ti_emif_pm_functions {
        u32 abort_sr;
 } __packed __aligned(8);
 
+static inline void ti_emif_asm_offsets(void)
+{
+       DEFINE(EMIF_SDCFG_VAL_OFFSET,
+              offsetof(struct emif_regs_amx3, emif_sdcfg_val));
+       DEFINE(EMIF_TIMING1_VAL_OFFSET,
+              offsetof(struct emif_regs_amx3, emif_timing1_val));
+       DEFINE(EMIF_TIMING2_VAL_OFFSET,
+              offsetof(struct emif_regs_amx3, emif_timing2_val));
+       DEFINE(EMIF_TIMING3_VAL_OFFSET,
+              offsetof(struct emif_regs_amx3, emif_timing3_val));
+       DEFINE(EMIF_REF_CTRL_VAL_OFFSET,
+              offsetof(struct emif_regs_amx3, emif_ref_ctrl_val));
+       DEFINE(EMIF_ZQCFG_VAL_OFFSET,
+              offsetof(struct emif_regs_amx3, emif_zqcfg_val));
+       DEFINE(EMIF_PMCR_VAL_OFFSET,
+              offsetof(struct emif_regs_amx3, emif_pmcr_val));
+       DEFINE(EMIF_PMCR_SHDW_VAL_OFFSET,
+              offsetof(struct emif_regs_amx3, emif_pmcr_shdw_val));
+       DEFINE(EMIF_RD_WR_LEVEL_RAMP_CTRL_OFFSET,
+              offsetof(struct emif_regs_amx3, emif_rd_wr_level_ramp_ctrl));
+       DEFINE(EMIF_RD_WR_EXEC_THRESH_OFFSET,
+              offsetof(struct emif_regs_amx3, emif_rd_wr_exec_thresh));
+       DEFINE(EMIF_COS_CONFIG_OFFSET,
+              offsetof(struct emif_regs_amx3, emif_cos_config));
+       DEFINE(EMIF_PRIORITY_TO_COS_MAPPING_OFFSET,
+              offsetof(struct emif_regs_amx3, emif_priority_to_cos_mapping));
+       DEFINE(EMIF_CONNECT_ID_SERV_1_MAP_OFFSET,
+              offsetof(struct emif_regs_amx3, emif_connect_id_serv_1_map));
+       DEFINE(EMIF_CONNECT_ID_SERV_2_MAP_OFFSET,
+              offsetof(struct emif_regs_amx3, emif_connect_id_serv_2_map));
+       DEFINE(EMIF_OCP_CONFIG_VAL_OFFSET,
+              offsetof(struct emif_regs_amx3, emif_ocp_config_val));
+       DEFINE(EMIF_LPDDR2_NVM_TIM_OFFSET,
+              offsetof(struct emif_regs_amx3, emif_lpddr2_nvm_tim));
+       DEFINE(EMIF_LPDDR2_NVM_TIM_SHDW_OFFSET,
+              offsetof(struct emif_regs_amx3, emif_lpddr2_nvm_tim_shdw));
+       DEFINE(EMIF_DLL_CALIB_CTRL_VAL_OFFSET,
+              offsetof(struct emif_regs_amx3, emif_dll_calib_ctrl_val));
+       DEFINE(EMIF_DLL_CALIB_CTRL_VAL_SHDW_OFFSET,
+              offsetof(struct emif_regs_amx3, emif_dll_calib_ctrl_val_shdw));
+       DEFINE(EMIF_DDR_PHY_CTLR_1_OFFSET,
+              offsetof(struct emif_regs_amx3, emif_ddr_phy_ctlr_1));
+       DEFINE(EMIF_EXT_PHY_CTRL_VALS_OFFSET,
+              offsetof(struct emif_regs_amx3, emif_ext_phy_ctrl_vals));
+       DEFINE(EMIF_REGS_AMX3_SIZE, sizeof(struct emif_regs_amx3));
+
+       BLANK();
+
+       DEFINE(EMIF_PM_BASE_ADDR_VIRT_OFFSET,
+              offsetof(struct ti_emif_pm_data, ti_emif_base_addr_virt));
+       DEFINE(EMIF_PM_BASE_ADDR_PHYS_OFFSET,
+              offsetof(struct ti_emif_pm_data, ti_emif_base_addr_phys));
+       DEFINE(EMIF_PM_CONFIG_OFFSET,
+              offsetof(struct ti_emif_pm_data, ti_emif_sram_config));
+       DEFINE(EMIF_PM_REGS_VIRT_OFFSET,
+              offsetof(struct ti_emif_pm_data, regs_virt));
+       DEFINE(EMIF_PM_REGS_PHYS_OFFSET,
+              offsetof(struct ti_emif_pm_data, regs_phys));
+       DEFINE(EMIF_PM_DATA_SIZE, sizeof(struct ti_emif_pm_data));
+
+       BLANK();
+
+       DEFINE(EMIF_PM_SAVE_CONTEXT_OFFSET,
+              offsetof(struct ti_emif_pm_functions, save_context));
+       DEFINE(EMIF_PM_RESTORE_CONTEXT_OFFSET,
+              offsetof(struct ti_emif_pm_functions, restore_context));
+       DEFINE(EMIF_PM_ENTER_SR_OFFSET,
+              offsetof(struct ti_emif_pm_functions, enter_sr));
+       DEFINE(EMIF_PM_EXIT_SR_OFFSET,
+              offsetof(struct ti_emif_pm_functions, exit_sr));
+       DEFINE(EMIF_PM_ABORT_SR_OFFSET,
+              offsetof(struct ti_emif_pm_functions, abort_sr));
+       DEFINE(EMIF_PM_FUNCTIONS_SIZE, sizeof(struct ti_emif_pm_functions));
+}
+
 struct gen_pool;
 
 int ti_emif_copy_pm_function_table(struct gen_pool *sram_pool, void *dst);
index 4b3dca173e8959a307785a1a1459654eadf62838..7acb953298a73118028ae04052fab9249529380f 100644 (file)
@@ -52,7 +52,6 @@ struct tk_read_base {
  * @offs_real:         Offset clock monotonic -> clock realtime
  * @offs_boot:         Offset clock monotonic -> clock boottime
  * @offs_tai:          Offset clock monotonic -> clock tai
- * @time_suspended:    Accumulated suspend time
  * @tai_offset:                The current UTC to TAI offset in seconds
  * @clock_was_set_seq: The sequence number of clock was set events
  * @cs_was_changed_seq:        The sequence number of clocksource change events
@@ -95,7 +94,6 @@ struct timekeeper {
        ktime_t                 offs_real;
        ktime_t                 offs_boot;
        ktime_t                 offs_tai;
-       ktime_t                 time_suspended;
        s32                     tai_offset;
        unsigned int            clock_was_set_seq;
        u8                      cs_was_changed_seq;
index 9737fbec7019bd02b9f6bf0ef9d34e1dc67fdaaa..588a0e4b1ab9336674b2bbc4f1fd0465b4d4a845 100644 (file)
@@ -33,25 +33,20 @@ extern void ktime_get_ts64(struct timespec64 *ts);
 extern time64_t ktime_get_seconds(void);
 extern time64_t __ktime_get_real_seconds(void);
 extern time64_t ktime_get_real_seconds(void);
-extern void ktime_get_active_ts64(struct timespec64 *ts);
 
 extern int __getnstimeofday64(struct timespec64 *tv);
 extern void getnstimeofday64(struct timespec64 *tv);
 extern void getboottime64(struct timespec64 *ts);
 
-#define ktime_get_real_ts64(ts)                getnstimeofday64(ts)
-
-/* Clock BOOTTIME compatibility wrappers */
-static inline void get_monotonic_boottime64(struct timespec64 *ts)
-{
-       ktime_get_ts64(ts);
-}
+#define ktime_get_real_ts64(ts)        getnstimeofday64(ts)
 
 /*
  * ktime_t based interfaces
  */
+
 enum tk_offsets {
        TK_OFFS_REAL,
+       TK_OFFS_BOOT,
        TK_OFFS_TAI,
        TK_OFFS_MAX,
 };
@@ -62,10 +57,6 @@ extern ktime_t ktime_mono_to_any(ktime_t tmono, enum tk_offsets offs);
 extern ktime_t ktime_get_raw(void);
 extern u32 ktime_get_resolution_ns(void);
 
-/* Clock BOOTTIME compatibility wrappers */
-static inline ktime_t ktime_get_boottime(void) { return ktime_get(); }
-static inline u64 ktime_get_boot_ns(void) { return ktime_get(); }
-
 /**
  * ktime_get_real - get the real (wall-) time in ktime_t format
  */
@@ -74,6 +65,17 @@ static inline ktime_t ktime_get_real(void)
        return ktime_get_with_offset(TK_OFFS_REAL);
 }
 
+/**
+ * ktime_get_boottime - Returns monotonic time since boot in ktime_t format
+ *
+ * This is similar to CLOCK_MONTONIC/ktime_get, but also includes the
+ * time spent in suspend.
+ */
+static inline ktime_t ktime_get_boottime(void)
+{
+       return ktime_get_with_offset(TK_OFFS_BOOT);
+}
+
 /**
  * ktime_get_clocktai - Returns the TAI time of day in ktime_t format
  */
@@ -100,6 +102,11 @@ static inline u64 ktime_get_real_ns(void)
        return ktime_to_ns(ktime_get_real());
 }
 
+static inline u64 ktime_get_boot_ns(void)
+{
+       return ktime_to_ns(ktime_get_boottime());
+}
+
 static inline u64 ktime_get_tai_ns(void)
 {
        return ktime_to_ns(ktime_get_clocktai());
@@ -112,11 +119,17 @@ static inline u64 ktime_get_raw_ns(void)
 
 extern u64 ktime_get_mono_fast_ns(void);
 extern u64 ktime_get_raw_fast_ns(void);
+extern u64 ktime_get_boot_fast_ns(void);
 extern u64 ktime_get_real_fast_ns(void);
 
 /*
  * timespec64 interfaces utilizing the ktime based ones
  */
+static inline void get_monotonic_boottime64(struct timespec64 *ts)
+{
+       *ts = ktime_to_timespec64(ktime_get_boottime());
+}
+
 static inline void timekeeping_clocktai64(struct timespec64 *ts)
 {
        *ts = ktime_to_timespec64(ktime_get_clocktai());
index af4114d5dc1751235fe99fbd70b79ae5cca73ccd..3616b4becb594012cdd6280b10d297ecebb7d04f 100644 (file)
@@ -9,9 +9,6 @@
 extern void do_gettimeofday(struct timeval *tv);
 unsigned long get_seconds(void);
 
-/* does not take xtime_lock */
-struct timespec __current_kernel_time(void);
-
 static inline struct timespec current_kernel_time(void)
 {
        struct timespec64 now = current_kernel_time64();
index 2448f9cc48a3120222d29f8110069c626b428543..7b066fd38248bb48466fa53c0e6462d2af76dc33 100644 (file)
@@ -8,8 +8,6 @@
 #include <linux/debugobjects.h>
 #include <linux/stringify.h>
 
-struct tvec_base;
-
 struct timer_list {
        /*
         * All fields that change during normal runtime grouped to the
index 0d2d3da461397017753231edf208438d6f18b482..c7dc2b5902c057ee0475f786f576fd921b3a8014 100644 (file)
@@ -23,8 +23,10 @@ struct tnum tnum_range(u64 min, u64 max);
 /* Arithmetic and logical ops */
 /* Shift a tnum left (by a fixed shift) */
 struct tnum tnum_lshift(struct tnum a, u8 shift);
-/* Shift a tnum right (by a fixed shift) */
+/* Shift (rsh) a tnum right (by a fixed shift) */
 struct tnum tnum_rshift(struct tnum a, u8 shift);
+/* Shift (arsh) a tnum right (by a fixed min_shift) */
+struct tnum tnum_arshift(struct tnum a, u8 min_shift);
 /* Add two tnums, return @a + @b */
 struct tnum tnum_add(struct tnum a, struct tnum b);
 /* Subtract two tnums, return @a - @b */
index 47f8af22f2168f0376d77068ecbc2df930ae8f79..1dd587ba6d882bb882b44680ec1acebb314c4364 100644 (file)
@@ -701,7 +701,7 @@ extern int tty_unregister_ldisc(int disc);
 extern int tty_set_ldisc(struct tty_struct *tty, int disc);
 extern int tty_ldisc_setup(struct tty_struct *tty, struct tty_struct *o_tty);
 extern void tty_ldisc_release(struct tty_struct *tty);
-extern void tty_ldisc_init(struct tty_struct *tty);
+extern int __must_check tty_ldisc_init(struct tty_struct *tty);
 extern void tty_ldisc_deinit(struct tty_struct *tty);
 extern int tty_ldisc_receive_buf(struct tty_ldisc *ld, const unsigned char *p,
                                 char *f, int count);
index 07ee0f84a46caa9e2b1c446f96009f63b3b99f50..a27604f99ed044d3492395c0328f41538234d7dc 100644 (file)
@@ -112,20 +112,6 @@ u64_stats_update_end_irqrestore(struct u64_stats_sync *syncp,
 #endif
 }
 
-static inline void u64_stats_update_begin_raw(struct u64_stats_sync *syncp)
-{
-#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
-       raw_write_seqcount_begin(&syncp->seq);
-#endif
-}
-
-static inline void u64_stats_update_end_raw(struct u64_stats_sync *syncp)
-{
-#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
-       raw_write_seqcount_end(&syncp->seq);
-#endif
-}
-
 static inline unsigned int __u64_stats_fetch_begin(const struct u64_stats_sync *syncp)
 {
 #if BITS_PER_LONG==32 && defined(CONFIG_SMP)
index eaea63bc79bb2418ebec58e1afa88129ecdc8ca7..ca840345571bf6cf9253647c11112252fa7b6241 100644 (file)
@@ -55,6 +55,7 @@ struct udp_sock {
         * when the socket is uncorked.
         */
        __u16            len;           /* total length of pending frames */
+       __u16            gso_size;
        /*
         * Fields specific to UDP-Lite.
         */
@@ -87,6 +88,8 @@ struct udp_sock {
        int             forward_deficit;
 };
 
+#define UDP_MAX_SEGMENTS       (1 << 6UL)
+
 static inline struct udp_sock *udp_sk(const struct sock *sk)
 {
        return (struct udp_sock *)sk;
index 4b6b9283fa7bf69c1e5459e259a548c90de3d742..8675e145ea8b3f9caa5538f3fb544f2e205ed9bf 100644 (file)
@@ -52,7 +52,7 @@
 #define USB_GADGET_DELAYED_STATUS       0x7fff /* Impossibly large value */
 
 /* big enough to hold our biggest descriptor */
-#define USB_COMP_EP0_BUFSIZ    1024
+#define USB_COMP_EP0_BUFSIZ    4096
 
 /* OS feature descriptor length <= 4kB */
 #define USB_COMP_EP0_OS_DESC_BUFSIZ    4096
index c71def6b310f3edb0b11ccc485be7d770b919831..a240ed2a0372c20281e03a45fe49dea6a2fd60a3 100644 (file)
@@ -24,24 +24,6 @@ __printf(1, 2) void vbg_debug(const char *fmt, ...);
 #define vbg_debug pr_debug
 #endif
 
-/**
- * Allocate memory for generic request and initialize the request header.
- *
- * Return: the allocated memory
- * @len:               Size of memory block required for the request.
- * @req_type:          The generic request type.
- */
-void *vbg_req_alloc(size_t len, enum vmmdev_request_type req_type);
-
-/**
- * Perform a generic request.
- *
- * Return: VBox status code
- * @gdev:              The Guest extension device.
- * @req:               Pointer to the request structure.
- */
-int vbg_req_perform(struct vbg_dev *gdev, void *req);
-
 int vbg_hgcm_connect(struct vbg_dev *gdev,
                     struct vmmdev_hgcm_service_location *loc,
                     u32 *client_id, int *vbox_status);
@@ -52,11 +34,6 @@ int vbg_hgcm_call(struct vbg_dev *gdev, u32 client_id, u32 function,
                  u32 timeout_ms, struct vmmdev_hgcm_function_parameter *parms,
                  u32 parm_count, int *vbox_status);
 
-int vbg_hgcm_call32(
-       struct vbg_dev *gdev, u32 client_id, u32 function, u32 timeout_ms,
-       struct vmmdev_hgcm_function_parameter32 *parm32, u32 parm_count,
-       int *vbox_status);
-
 /**
  * Convert a VirtualBox status code to a standard Linux kernel return value.
  * Return: 0 or negative errno value.
index 988c7355bc22753373ab3f543cb922962f4451e7..fa1b5da2804e6041df3b8f47e5ce0c7ddf828a54 100644 (file)
@@ -157,6 +157,9 @@ int virtio_device_freeze(struct virtio_device *dev);
 int virtio_device_restore(struct virtio_device *dev);
 #endif
 
+#define virtio_device_for_each_vq(vdev, vq) \
+       list_for_each_entry(vq, &vdev->vqs, list)
+
 /**
  * virtio_driver - operations for a virtio I/O driver
  * @driver: underlying device driver (populate name and owner).
index 9318b2166439ab610b789343f385bcaf42e67cdf..2b0072fa5e92d2eb4ef6d779eb95be3f4d7e9ae3 100644 (file)
@@ -305,4 +305,21 @@ do {                                                                       \
        __ret;                                                          \
 })
 
+/**
+ * clear_and_wake_up_bit - clear a bit and wake up anyone waiting on that bit
+ *
+ * @bit: the bit of the word being waited on
+ * @word: the word being waited on, a kernel virtual address
+ *
+ * You can use this helper if bitflags are manipulated atomically rather than
+ * non-atomically under a lock.
+ */
+static inline void clear_and_wake_up_bit(int bit, void *word)
+{
+       clear_bit_unlock(bit, word);
+       /* See wake_up_bit() for which memory barrier you need to use. */
+       smp_mb__after_atomic();
+       wake_up_bit(word, bit);
+}
+
 #endif /* _LINUX_WAIT_BIT_H */
index 5ee007c1cead8e392a784f725a3960eee049f64b..cb213c1360896814057e42b95dd78c77d093265a 100644 (file)
@@ -5,7 +5,7 @@
  * Author: Santiago Nunez-Corrales <santiago.nunez@ridgerun.com>
  *
  * This code is partially based upon the TVP5150 driver
- * written by Mauro Carvalho Chehab (mchehab@infradead.org),
+ * written by Mauro Carvalho Chehab <mchehab@kernel.org>,
  * the TVP514x driver written by Vaibhav Hiremath <hvaibhav@ti.com>
  * and the TVP7002 driver in the TI LSP 2.10.00.14
  *
index 0bda0adc744f76f7a01702e05ebf010b0935d621..60a664febba0aa029853de4115dc4aee7686361f 100644 (file)
@@ -1,11 +1,11 @@
 /*
  * generic helper functions for handling video4linux capture buffers
  *
- * (c) 2007 Mauro Carvalho Chehab, <mchehab@infradead.org>
+ * (c) 2007 Mauro Carvalho Chehab, <mchehab@kernel.org>
  *
  * Highly based on video-buf written originally by:
  * (c) 2001,02 Gerd Knorr <kraxel@bytesex.org>
- * (c) 2006 Mauro Carvalho Chehab, <mchehab@infradead.org>
+ * (c) 2006 Mauro Carvalho Chehab, <mchehab@kernel.org>
  * (c) 2006 Ted Walther and John Sokol
  *
  * This program is free software; you can redistribute it and/or modify
index d8b27854e3bf1b90f273bb19529f9d6d4a44d20e..01bd142b979d82a438aece21d8f632defc01eb0a 100644 (file)
@@ -6,11 +6,11 @@
  * into PAGE_SIZE chunks).  They also assume the driver does not need
  * to touch the video data.
  *
- * (c) 2007 Mauro Carvalho Chehab, <mchehab@infradead.org>
+ * (c) 2007 Mauro Carvalho Chehab, <mchehab@kernel.org>
  *
  * Highly based on video-buf written originally by:
  * (c) 2001,02 Gerd Knorr <kraxel@bytesex.org>
- * (c) 2006 Mauro Carvalho Chehab, <mchehab@infradead.org>
+ * (c) 2006 Mauro Carvalho Chehab, <mchehab@kernel.org>
  * (c) 2006 Ted Walther and John Sokol
  *
  * This program is free software; you can redistribute it and/or modify
index 486a97efdb56e5024733ac244d1e9fd926ae6736..36c6a4ad3504fae36e1dc53285de3c2080e3668d 100644 (file)
@@ -6,7 +6,7 @@
  * into PAGE_SIZE chunks).  They also assume the driver does not need
  * to touch the video data.
  *
- * (c) 2007 Mauro Carvalho Chehab, <mchehab@infradead.org>
+ * (c) 2007 Mauro Carvalho Chehab, <mchehab@kernel.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
index 8312cc25a3af49e16cc51e6a4373e198c1d198e7..ff766ab207e0eaf4c42d7e4688a3fe5d8a3baa1a 100644 (file)
@@ -223,6 +223,20 @@ struct ipv6_stub {
                                 const struct in6_addr *addr);
        int (*ipv6_dst_lookup)(struct net *net, struct sock *sk,
                               struct dst_entry **dst, struct flowi6 *fl6);
+
+       struct fib6_table *(*fib6_get_table)(struct net *net, u32 id);
+       struct fib6_info *(*fib6_lookup)(struct net *net, int oif,
+                                        struct flowi6 *fl6, int flags);
+       struct fib6_info *(*fib6_table_lookup)(struct net *net,
+                                             struct fib6_table *table,
+                                             int oif, struct flowi6 *fl6,
+                                             int flags);
+       struct fib6_info *(*fib6_multipath_select)(const struct net *net,
+                                                  struct fib6_info *f6i,
+                                                  struct flowi6 *fl6, int oif,
+                                                  const struct sk_buff *skb,
+                                                  int strict);
+
        void (*udpv6_encap_enable)(void);
        void (*ndisc_send_na)(struct net_device *dev, const struct in6_addr *daddr,
                              const struct in6_addr *solicited_addr,
index b619a190ff1283a9bd43379c6dc54d4d8617dc4c..893bbbb5d2fa87404a22fe042b4d41f9e8e9fe10 100644 (file)
@@ -1393,6 +1393,8 @@ struct sk_buff *__hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen,
                               const void *param, u32 timeout);
 struct sk_buff *__hci_cmd_sync_ev(struct hci_dev *hdev, u16 opcode, u32 plen,
                                  const void *param, u8 event, u32 timeout);
+int __hci_cmd_send(struct hci_dev *hdev, u16 opcode, u32 plen,
+                  const void *param);
 
 int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen,
                 const void *param);
index f801fc940b298cb3180584b039a9605d3ee9f23e..808f1d1673494d3e09b1c42f229308371f4b2ad9 100644 (file)
@@ -198,6 +198,7 @@ struct bonding {
        struct   slave __rcu *primary_slave;
        struct   bond_up_slave __rcu *slave_arr; /* Array of usable slaves */
        bool     force_primary;
+       u32      nest_level;
        s32      slave_cnt; /* never change this value outside the attach/detach wrappers */
        int     (*recv_probe)(const struct sk_buff *, struct bonding *,
                              struct slave *);
@@ -284,8 +285,15 @@ static inline bool bond_needs_speed_duplex(const struct bonding *bond)
 
 static inline bool bond_is_nondyn_tlb(const struct bonding *bond)
 {
-       return (BOND_MODE(bond) == BOND_MODE_TLB)  &&
-              (bond->params.tlb_dynamic_lb == 0);
+       return (bond_is_lb(bond) && bond->params.tlb_dynamic_lb == 0);
+}
+
+static inline bool bond_mode_can_use_xmit_hash(const struct bonding *bond)
+{
+       return (BOND_MODE(bond) == BOND_MODE_8023AD ||
+               BOND_MODE(bond) == BOND_MODE_XOR ||
+               BOND_MODE(bond) == BOND_MODE_TLB ||
+               BOND_MODE(bond) == BOND_MODE_ALB);
 }
 
 static inline bool bond_mode_uses_xmit_hash(const struct bonding *bond)
index 2e4f71e16e95676c26948ae7a5a564c9a0652bf8..9686a1aa4ec90e382cb96dee62220ad320edd41f 100644 (file)
@@ -35,6 +35,14 @@ struct devlink {
        char priv[0] __aligned(NETDEV_ALIGN);
 };
 
+struct devlink_port_attrs {
+       bool set;
+       enum devlink_port_flavour flavour;
+       u32 port_number; /* same value as "split group" */
+       bool split;
+       u32 split_subport_number;
+};
+
 struct devlink_port {
        struct list_head list;
        struct devlink *devlink;
@@ -43,8 +51,7 @@ struct devlink_port {
        enum devlink_port_type type;
        enum devlink_port_type desired_type;
        void *type_dev;
-       bool split;
-       u32 split_group;
+       struct devlink_port_attrs attrs;
 };
 
 struct devlink_sb_pool_info {
@@ -367,8 +374,12 @@ void devlink_port_type_eth_set(struct devlink_port *devlink_port,
 void devlink_port_type_ib_set(struct devlink_port *devlink_port,
                              struct ib_device *ibdev);
 void devlink_port_type_clear(struct devlink_port *devlink_port);
-void devlink_port_split_set(struct devlink_port *devlink_port,
-                           u32 split_group);
+void devlink_port_attrs_set(struct devlink_port *devlink_port,
+                           enum devlink_port_flavour flavour,
+                           u32 port_number, bool split,
+                           u32 split_subport_number);
+int devlink_port_get_phys_port_name(struct devlink_port *devlink_port,
+                                   char *name, size_t len);
 int devlink_sb_register(struct devlink *devlink, unsigned int sb_index,
                        u32 size, u16 ingress_pools_count,
                        u16 egress_pools_count, u16 ingress_tc_count,
@@ -466,11 +477,20 @@ static inline void devlink_port_type_clear(struct devlink_port *devlink_port)
 {
 }
 
-static inline void devlink_port_split_set(struct devlink_port *devlink_port,
-                                         u32 split_group)
+static inline void devlink_port_attrs_set(struct devlink_port *devlink_port,
+                                         enum devlink_port_flavour flavour,
+                                         u32 port_number, bool split,
+                                         u32 split_subport_number)
 {
 }
 
+static inline int
+devlink_port_get_phys_port_name(struct devlink_port *devlink_port,
+                               char *name, size_t len)
+{
+       return -EOPNOTSUPP;
+}
+
 static inline int devlink_sb_register(struct devlink *devlink,
                                      unsigned int sb_index, u32 size,
                                      u16 ingress_pools_count,
index 60fb4ec8ba616060f17963caa7cb2ac23847917b..fdbd6082945d6448b58c7c4cd52169e2606b3fd2 100644 (file)
 #include <linux/of.h>
 #include <linux/ethtool.h>
 #include <linux/net_tstamp.h>
+#include <linux/phy.h>
 #include <net/devlink.h>
 #include <net/switchdev.h>
 
 struct tc_action;
 struct phy_device;
 struct fixed_phy_status;
+struct phylink_link_state;
 
 enum dsa_tag_protocol {
        DSA_TAG_PROTO_NONE = 0,
@@ -199,6 +201,7 @@ struct dsa_port {
        u8                      stp_state;
        struct net_device       *bridge_dev;
        struct devlink_port     devlink_port;
+       struct phylink          *pl;
        /*
         * Original copy of the master netdev ethtool_ops
         */
@@ -353,13 +356,37 @@ struct dsa_switch_ops {
        void    (*fixed_link_update)(struct dsa_switch *ds, int port,
                                struct fixed_phy_status *st);
 
+       /*
+        * PHYLINK integration
+        */
+       void    (*phylink_validate)(struct dsa_switch *ds, int port,
+                                   unsigned long *supported,
+                                   struct phylink_link_state *state);
+       int     (*phylink_mac_link_state)(struct dsa_switch *ds, int port,
+                                         struct phylink_link_state *state);
+       void    (*phylink_mac_config)(struct dsa_switch *ds, int port,
+                                     unsigned int mode,
+                                     const struct phylink_link_state *state);
+       void    (*phylink_mac_an_restart)(struct dsa_switch *ds, int port);
+       void    (*phylink_mac_link_down)(struct dsa_switch *ds, int port,
+                                        unsigned int mode,
+                                        phy_interface_t interface);
+       void    (*phylink_mac_link_up)(struct dsa_switch *ds, int port,
+                                      unsigned int mode,
+                                      phy_interface_t interface,
+                                      struct phy_device *phydev);
+       void    (*phylink_fixed_state)(struct dsa_switch *ds, int port,
+                                      struct phylink_link_state *state);
        /*
         * ethtool hardware statistics.
         */
-       void    (*get_strings)(struct dsa_switch *ds, int port, uint8_t *data);
+       void    (*get_strings)(struct dsa_switch *ds, int port,
+                              u32 stringset, uint8_t *data);
        void    (*get_ethtool_stats)(struct dsa_switch *ds,
                                     int port, uint64_t *data);
-       int     (*get_sset_count)(struct dsa_switch *ds, int port);
+       int     (*get_sset_count)(struct dsa_switch *ds, int port, int sset);
+       void    (*get_ethtool_phy_stats)(struct dsa_switch *ds,
+                                        int port, uint64_t *data);
 
        /*
         * ethtool Wake-on-LAN
@@ -588,4 +615,10 @@ static inline int call_dsa_notifiers(unsigned long val, struct net_device *dev,
 #define BRCM_TAG_GET_PORT(v)           ((v) >> 8)
 #define BRCM_TAG_GET_QUEUE(v)          ((v) & 0xff)
 
+
+int dsa_port_get_phy_strings(struct dsa_port *dp, uint8_t *data);
+int dsa_port_get_ethtool_phy_stats(struct dsa_port *dp, uint64_t *data);
+int dsa_port_get_phy_sset_count(struct dsa_port *dp);
+void dsa_port_phylink_mac_change(struct dsa_switch *ds, int port, bool up);
+
 #endif
index d044aa60cc76ff2490e8b0837d2a357e57e628e8..b39643ef4c95fa0727bea05b15d26625e80940c4 100644 (file)
@@ -219,6 +219,33 @@ static inline __be32 erspan_get_timestamp(void)
        return htonl((u32)h_usecs);
 }
 
+/* ERSPAN BSO (Bad/Short/Oversized), see RFC1757
+ *   00b --> Good frame with no error, or unknown integrity
+ *   01b --> Payload is a Short Frame
+ *   10b --> Payload is an Oversized Frame
+ *   11b --> Payload is a Bad Frame with CRC or Alignment Error
+ */
+enum erspan_bso {
+       BSO_NOERROR = 0x0,
+       BSO_SHORT = 0x1,
+       BSO_OVERSIZED = 0x2,
+       BSO_BAD = 0x3,
+};
+
+static inline u8 erspan_detect_bso(struct sk_buff *skb)
+{
+       /* BSO_BAD is not handled because the frame CRC
+        * or alignment error information is in FCS.
+        */
+       if (skb->len < ETH_ZLEN)
+               return BSO_SHORT;
+
+       if (skb->len > ETH_FRAME_LEN)
+               return BSO_OVERSIZED;
+
+       return BSO_NOERROR;
+}
+
 static inline void erspan_build_header_v2(struct sk_buff *skb,
                                          u32 id, u8 direction, u16 hwid,
                                          bool truncate, bool is_ipv4)
@@ -248,6 +275,7 @@ static inline void erspan_build_header_v2(struct sk_buff *skb,
                vlan_tci = ntohs(qp->tci);
        }
 
+       bso = erspan_detect_bso(skb);
        skb_push(skb, sizeof(*ershdr) + ERSPAN_V2_MDSIZE);
        ershdr = (struct erspan_base_hdr *)skb->data;
        memset(ershdr, 0, sizeof(*ershdr) + ERSPAN_V2_MDSIZE);
index 9a074776f70b6606496bc921dcaaea069e7f8696..adc24df56b907d4598a1b08d4ab5da01eddc0c85 100644 (file)
@@ -226,6 +226,11 @@ struct flow_dissector {
        unsigned short int offset[FLOW_DISSECTOR_KEY_MAX];
 };
 
+struct flow_keys_basic {
+       struct flow_dissector_key_control control;
+       struct flow_dissector_key_basic basic;
+};
+
 struct flow_keys {
        struct flow_dissector_key_control control;
 #define FLOW_KEYS_HASH_START_FIELD basic
@@ -244,14 +249,14 @@ __be32 flow_get_u32_src(const struct flow_keys *flow);
 __be32 flow_get_u32_dst(const struct flow_keys *flow);
 
 extern struct flow_dissector flow_keys_dissector;
-extern struct flow_dissector flow_keys_buf_dissector;
+extern struct flow_dissector flow_keys_basic_dissector;
 
 /* struct flow_keys_digest:
  *
  * This structure is used to hold a digest of the full flow keys. This is a
  * larger "hash" of a flow to allow definitively matching specific flows where
  * the 32 bit skb->hash is not large enough. The size is limited to 16 bytes so
- * that it can by used in CB of skb (see sch_choke for an example).
+ * that it can be used in CB of skb (see sch_choke for an example).
  */
 #define FLOW_KEYS_DIGEST_LEN   16
 struct flow_keys_digest {
index 44b9c00f72232c31d3cf478c901e7b164748bb04..e117617e3c347af5eec412ebd8cbb2271de22b56 100644 (file)
@@ -12,7 +12,8 @@
 void *ife_encode(struct sk_buff *skb, u16 metalen);
 void *ife_decode(struct sk_buff *skb, u16 *metalen);
 
-void *ife_tlv_meta_decode(void *skbdata, u16 *attrtype, u16 *dlen, u16 *totlen);
+void *ife_tlv_meta_decode(void *skbdata, const void *ifehdr_end, u16 *attrtype,
+                         u16 *dlen, u16 *totlen);
 int ife_tlv_meta_encode(void *skbdata, u16 attrtype, u16 dlen,
                        const void *dval);
 
index b68fea022a82e19976a9dfbaa1cdc44192e1fc08..0a6c9e0f2b5a35b8dd04cb222147664cec67218c 100644 (file)
@@ -23,8 +23,6 @@
 #include <net/inet_sock.h>
 #include <net/request_sock.h>
 
-#define INET_CSK_DEBUG 1
-
 /* Cancel timers, when they are not required. */
 #undef INET_CSK_CLEAR_TIMERS
 
@@ -77,6 +75,7 @@ struct inet_connection_sock_af_ops {
  * @icsk_af_ops                   Operations which are AF_INET{4,6} specific
  * @icsk_ulp_ops          Pluggable ULP control hook
  * @icsk_ulp_data         ULP private data
+ * @icsk_clean_acked      Clean acked data hook
  * @icsk_listen_portaddr_node  hash to the portaddr listener hashtable
  * @icsk_ca_state:        Congestion control state
  * @icsk_retransmits:     Number of unrecovered [RTO] timeouts
@@ -102,6 +101,7 @@ struct inet_connection_sock {
        const struct inet_connection_sock_af_ops *icsk_af_ops;
        const struct tcp_ulp_ops  *icsk_ulp_ops;
        void                      *icsk_ulp_data;
+       void (*icsk_clean_acked)(struct sock *sk, u32 acked_seq);
        struct hlist_node         icsk_listen_portaddr_node;
        unsigned int              (*icsk_sync_mss)(struct sock *sk, u32 pmtu);
        __u8                      icsk_ca_state:6,
@@ -194,10 +194,6 @@ static inline void inet_csk_delack_init(struct sock *sk)
 void inet_csk_delete_keepalive_timer(struct sock *sk);
 void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long timeout);
 
-#ifdef INET_CSK_DEBUG
-extern const char inet_csk_timer_bug_msg[];
-#endif
-
 static inline void inet_csk_clear_xmit_timer(struct sock *sk, const int what)
 {
        struct inet_connection_sock *icsk = inet_csk(sk);
@@ -212,12 +208,9 @@ static inline void inet_csk_clear_xmit_timer(struct sock *sk, const int what)
 #ifdef INET_CSK_CLEAR_TIMERS
                sk_stop_timer(sk, &icsk->icsk_delack_timer);
 #endif
+       } else {
+               pr_debug("inet_csk BUG: unknown timer value\n");
        }
-#ifdef INET_CSK_DEBUG
-       else {
-               pr_debug("%s", inet_csk_timer_bug_msg);
-       }
-#endif
 }
 
 /*
@@ -230,10 +223,8 @@ static inline void inet_csk_reset_xmit_timer(struct sock *sk, const int what,
        struct inet_connection_sock *icsk = inet_csk(sk);
 
        if (when > max_when) {
-#ifdef INET_CSK_DEBUG
                pr_debug("reset_xmit_timer: sk=%p %d when=0x%lx, caller=%p\n",
                         sk, what, when, current_text_addr());
-#endif
                when = max_when;
        }
 
@@ -247,12 +238,9 @@ static inline void inet_csk_reset_xmit_timer(struct sock *sk, const int what,
                icsk->icsk_ack.pending |= ICSK_ACK_TIMER;
                icsk->icsk_ack.timeout = jiffies + when;
                sk_reset_timer(sk, &icsk->icsk_delack_timer, icsk->icsk_ack.timeout);
+       } else {
+               pr_debug("inet_csk BUG: unknown timer value\n");
        }
-#ifdef INET_CSK_DEBUG
-       else {
-               pr_debug("%s", inet_csk_timer_bug_msg);
-       }
-#endif
 }
 
 static inline unsigned long
index 0a671c32d6b96008be5432cb94fa088bac210ae4..83d5b3c2ac421ca29e8ed654dcbf054379e3001b 100644 (file)
@@ -147,6 +147,7 @@ struct inet_cork {
        __u8                    ttl;
        __s16                   tos;
        char                    priority;
+       __u16                   gso_size;
 };
 
 struct inet_cork_full {
index c7be1ca8e562f82ee9b24122cc21f45172458e23..659d8ed5a3bc098dcfc697dcd5c8df83873c1950 100644 (file)
@@ -62,6 +62,7 @@ struct inet_timewait_sock {
 #define tw_dr                  __tw_common.skc_tw_dr
 
        int                     tw_timeout;
+       __u32                   tw_mark;
        volatile unsigned char  tw_substate;
        unsigned char           tw_rcv_wscale;
 
index dc4a2d6e58a516a5ba8fe38ea3fe07522653da93..bada1f1f871e163b1c7d0434e5928242648cbfad 100644 (file)
@@ -76,6 +76,7 @@ struct ipcm_cookie {
        __u8                    ttl;
        __s16                   tos;
        char                    priority;
+       __u16                   gso_size;
 };
 
 #define IPCB(skb) ((struct inet_skb_parm*)((skb)->cb))
@@ -171,7 +172,7 @@ struct sk_buff *ip_make_skb(struct sock *sk, struct flowi4 *fl4,
                                        int len, int odd, struct sk_buff *skb),
                            void *from, int length, int transhdrlen,
                            struct ipcm_cookie *ipc, struct rtable **rtp,
-                           unsigned int flags);
+                           struct inet_cork *cork, unsigned int flags);
 
 static inline struct sk_buff *ip_finish_skb(struct sock *sk, struct flowi4 *fl4)
 {
index 1af450d4e9233c531d154644d8b7b94cbdb611c4..cc70f6da84627a3a867c3d4a5e9ddd6d052f1298 100644 (file)
@@ -135,7 +135,7 @@ struct fib6_nh {
 
 struct fib6_info {
        struct fib6_table               *fib6_table;
-       struct fib6_info __rcu          *rt6_next;
+       struct fib6_info __rcu          *fib6_next;
        struct fib6_node __rcu          *fib6_node;
 
        /* Multipath routes:
@@ -192,11 +192,11 @@ struct rt6_info {
 
 #define for_each_fib6_node_rt_rcu(fn)                                  \
        for (rt = rcu_dereference((fn)->leaf); rt;                      \
-            rt = rcu_dereference(rt->rt6_next))
+            rt = rcu_dereference(rt->fib6_next))
 
 #define for_each_fib6_walker_rt(w)                                     \
        for (rt = (w)->leaf; rt;                                        \
-            rt = rcu_dereference_protected(rt->rt6_next, 1))
+            rt = rcu_dereference_protected(rt->fib6_next, 1))
 
 static inline struct inet6_dev *ip6_dst_idev(struct dst_entry *dst)
 {
@@ -376,9 +376,24 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
                                   const struct sk_buff *skb,
                                   int flags, pol_lookup_t lookup);
 
-struct fib6_node *fib6_lookup(struct fib6_node *root,
-                             const struct in6_addr *daddr,
-                             const struct in6_addr *saddr);
+/* called with rcu lock held; can return error pointer
+ * caller needs to select path
+ */
+struct fib6_info *fib6_lookup(struct net *net, int oif, struct flowi6 *fl6,
+                             int flags);
+
+/* called with rcu lock held; caller needs to select path */
+struct fib6_info *fib6_table_lookup(struct net *net, struct fib6_table *table,
+                                   int oif, struct flowi6 *fl6, int strict);
+
+struct fib6_info *fib6_multipath_select(const struct net *net,
+                                       struct fib6_info *match,
+                                       struct flowi6 *fl6, int oif,
+                                       const struct sk_buff *skb, int strict);
+
+struct fib6_node *fib6_node_lookup(struct fib6_node *root,
+                                  const struct in6_addr *daddr,
+                                  const struct in6_addr *saddr);
 
 struct fib6_node *fib6_locate(struct fib6_node *root,
                              const struct in6_addr *daddr, int dst_len,
index 8df4ff798b040dbef0e1b2959f7131ae4be3398c..4cf1ef935ed9e678f2551879dda9df0ce08b1758 100644 (file)
@@ -279,6 +279,27 @@ static inline bool rt6_duplicate_nexthop(struct fib6_info *a, struct fib6_info *
               !lwtunnel_cmp_encap(a->fib6_nh.nh_lwtstate, b->fib6_nh.nh_lwtstate);
 }
 
+static inline unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
+{
+       struct inet6_dev *idev;
+       unsigned int mtu;
+
+       if (dst_metric_locked(dst, RTAX_MTU)) {
+               mtu = dst_metric_raw(dst, RTAX_MTU);
+               if (mtu)
+                       return mtu;
+       }
+
+       mtu = IPV6_MIN_MTU;
+       rcu_read_lock();
+       idev = __in6_dev_get(dst->dev);
+       if (idev)
+               mtu = idev->cnf.mtu6;
+       rcu_read_unlock();
+
+       return mtu;
+}
+
 struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw,
                                   struct net_device *dev, struct sk_buff *skb,
                                   const void *daddr);
index 751646adc7692d605727472775f0d2d6d6fa9f22..90ff430f5e9d04b1899ccadbc888f6f1376921b4 100644 (file)
@@ -477,12 +477,12 @@ static inline struct ip_tunnel_info *lwt_tun_info(struct lwtunnel_state *lwtstat
        return (struct ip_tunnel_info *)lwtstate->data;
 }
 
-extern struct static_key ip_tunnel_metadata_cnt;
+DECLARE_STATIC_KEY_FALSE(ip_tunnel_metadata_cnt);
 
 /* Returns > 0 if metadata should be collected */
 static inline int ip_tunnel_collect_metadata(void)
 {
-       return static_key_false(&ip_tunnel_metadata_cnt);
+       return static_branch_unlikely(&ip_tunnel_metadata_cnt);
 }
 
 void __init ip_tunnel_core_init(void);
index eb0bec043c9618b6f44a5bcb3d2138056672c851..0ac795b41ab80029ba4149f70933994dd2db64e2 100644 (file)
@@ -668,6 +668,7 @@ struct ip_vs_dest {
        volatile unsigned int   flags;          /* dest status flags */
        atomic_t                conn_flags;     /* flags to copy to conn */
        atomic_t                weight;         /* server weight */
+       atomic_t                last_weight;    /* server latest weight */
 
        refcount_t              refcnt;         /* reference counter */
        struct ip_vs_stats      stats;          /* statistics */
index 68b167d988792f1278af3bdb808768bea23fe30c..798558fd16817f540e4e6923f5385fc193197c1e 100644 (file)
@@ -298,6 +298,7 @@ struct ipcm6_cookie {
        __s16 tclass;
        __s8  dontfrag;
        struct ipv6_txoptions *opt;
+       __u16 gso_size;
 };
 
 static inline struct ipv6_txoptions *txopt_get(const struct ipv6_pinfo *np)
@@ -950,6 +951,7 @@ struct sk_buff *ip6_make_skb(struct sock *sk,
                             void *from, int length, int transhdrlen,
                             struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
                             struct rt6_info *rt, unsigned int flags,
+                            struct inet_cork_full *cork,
                             const struct sockcm_cookie *sockc);
 
 static inline struct sk_buff *ip6_finish_skb(struct sock *sk)
@@ -958,8 +960,6 @@ static inline struct sk_buff *ip6_finish_skb(struct sock *sk)
                              &inet6_sk(sk)->cork);
 }
 
-unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst);
-
 int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
                   struct flowi6 *fl6);
 struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,
index 5c40f118c0fad6448796579b01acba5f77e1e4fa..df528a6235487d3678ffdccb28facf977a1e7098 100644 (file)
@@ -97,6 +97,7 @@ static __inline__ char llc_backlog_type(struct sk_buff *skb)
 
 struct sock *llc_sk_alloc(struct net *net, int family, gfp_t priority,
                          struct proto *prot, int kern);
+void llc_sk_stop_all_timers(struct sock *sk, bool sync);
 void llc_sk_free(struct sock *sk);
 
 void llc_sk_reset(struct sock *sk);
index d2279b2d61aa98ca4bee0f338f00dbca13f6b2cf..b2f3a0c018e78c43cebb3a70cf2e357fdd6ea355 100644 (file)
@@ -2080,7 +2080,7 @@ struct ieee80211_txq {
  *     virtual interface might not be given air time for the transmission of
  *     the frame, as it is not synced with the AP/P2P GO yet, and thus the
  *     deauthentication frame might not be transmitted.
- >
+ *
  * @IEEE80211_HW_DOESNT_SUPPORT_QOS_NDP: The driver (or firmware) doesn't
  *     support QoS NDP for AP probing - that's most likely a driver bug.
  *
index e421f86af043b942b7addfe048dc21bc4d0a6bfc..6c1eecd56a4d072f14074b7e1f5fe59ebb92ede2 100644 (file)
@@ -246,6 +246,7 @@ static inline void *neighbour_priv(const struct neighbour *n)
 #define NEIGH_UPDATE_F_OVERRIDE                        0x00000001
 #define NEIGH_UPDATE_F_WEAK_OVERRIDE           0x00000002
 #define NEIGH_UPDATE_F_OVERRIDE_ISROUTER       0x00000004
+#define NEIGH_UPDATE_F_EXT_LEARNED             0x20000000
 #define NEIGH_UPDATE_F_ISROUTER                        0x40000000
 #define NEIGH_UPDATE_F_ADMIN                   0x80000000
 
@@ -526,5 +527,21 @@ static inline void neigh_ha_snapshot(char *dst, const struct neighbour *n,
        } while (read_seqretry(&n->ha_lock, seq));
 }
 
-
+static inline void neigh_update_ext_learned(struct neighbour *neigh, u32 flags,
+                                           int *notify)
+{
+       u8 ndm_flags = 0;
+
+       if (!(flags & NEIGH_UPDATE_F_ADMIN))
+               return;
+
+       ndm_flags |= (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0;
+       if ((neigh->flags ^ ndm_flags) & NTF_EXT_LEARNED) {
+               if (ndm_flags & NTF_EXT_LEARNED)
+                       neigh->flags |= NTF_EXT_LEARNED;
+               else
+                       neigh->flags &= ~NTF_EXT_LEARNED;
+               *notify = 1;
+       }
+}
 #endif
index ebd869473603af5dc8f6225ff6b3c05008a40e06..cd24be4c4a99bd633f5718c822920f1aa44b7f12 100644 (file)
@@ -6,7 +6,7 @@
 
 unsigned int
 nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
-                      const struct nf_nat_range *range,
+                      const struct nf_nat_range2 *range,
                       const struct net_device *out);
 
 void nf_nat_masquerade_ipv4_register_notifier(void);
index 1ed4f2631ed6db65e1bc6d427495a8892fabeb99..0c3b5ebf0bb8d4832322ed16c1c3a106712d73d3 100644 (file)
@@ -3,7 +3,7 @@
 #define _NF_NAT_MASQUERADE_IPV6_H_
 
 unsigned int
-nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range *range,
+nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
                       const struct net_device *out);
 void nf_nat_masquerade_ipv6_register_notifier(void);
 void nf_nat_masquerade_ipv6_unregister_notifier(void);
index 833752dd0c58365631270cce2224459ee8de382a..ba9fa4592f2b238fb642cbefce130d7a100bb0ef 100644 (file)
@@ -6,6 +6,7 @@
 #include <linux/netdevice.h>
 #include <linux/rhashtable.h>
 #include <linux/rcupdate.h>
+#include <linux/netfilter/nf_conntrack_tuple_common.h>
 #include <net/dst.h>
 
 struct nf_flowtable;
@@ -13,25 +14,24 @@ struct nf_flowtable;
 struct nf_flowtable_type {
        struct list_head                list;
        int                             family;
-       void                            (*gc)(struct work_struct *work);
+       int                             (*init)(struct nf_flowtable *ft);
        void                            (*free)(struct nf_flowtable *ft);
-       const struct rhashtable_params  *params;
        nf_hookfn                       *hook;
        struct module                   *owner;
 };
 
 struct nf_flowtable {
+       struct list_head                list;
        struct rhashtable               rhashtable;
        const struct nf_flowtable_type  *type;
        struct delayed_work             gc_work;
 };
 
 enum flow_offload_tuple_dir {
-       FLOW_OFFLOAD_DIR_ORIGINAL,
-       FLOW_OFFLOAD_DIR_REPLY,
-       __FLOW_OFFLOAD_DIR_MAX          = FLOW_OFFLOAD_DIR_REPLY,
+       FLOW_OFFLOAD_DIR_ORIGINAL = IP_CT_DIR_ORIGINAL,
+       FLOW_OFFLOAD_DIR_REPLY = IP_CT_DIR_REPLY,
+       FLOW_OFFLOAD_DIR_MAX = IP_CT_DIR_MAX
 };
-#define FLOW_OFFLOAD_DIR_MAX   (__FLOW_OFFLOAD_DIR_MAX + 1)
 
 struct flow_offload_tuple {
        union {
@@ -55,6 +55,8 @@ struct flow_offload_tuple {
 
        int                             oifidx;
 
+       u16                             mtu;
+
        struct dst_entry                *dst_cache;
 };
 
@@ -66,6 +68,7 @@ struct flow_offload_tuple_rhash {
 #define FLOW_OFFLOAD_SNAT      0x1
 #define FLOW_OFFLOAD_DNAT      0x2
 #define FLOW_OFFLOAD_DYING     0x4
+#define FLOW_OFFLOAD_TEARDOWN  0x8
 
 struct flow_offload {
        struct flow_offload_tuple_rhash         tuplehash[FLOW_OFFLOAD_DIR_MAX];
@@ -98,11 +101,14 @@ int nf_flow_table_iterate(struct nf_flowtable *flow_table,
 
 void nf_flow_table_cleanup(struct net *net, struct net_device *dev);
 
+int nf_flow_table_init(struct nf_flowtable *flow_table);
 void nf_flow_table_free(struct nf_flowtable *flow_table);
-void nf_flow_offload_work_gc(struct work_struct *work);
-extern const struct rhashtable_params nf_flow_offload_rhash_params;
 
-void flow_offload_dead(struct flow_offload *flow);
+void flow_offload_teardown(struct flow_offload *flow);
+static inline void flow_offload_dead(struct flow_offload *flow)
+{
+       flow->flags |= FLOW_OFFLOAD_DYING;
+}
 
 int nf_flow_snat_port(const struct flow_offload *flow,
                      struct sk_buff *skb, unsigned int thoff,
index 207a467e7ca60962bc6572933f4476ba57562ac0..da3d601cadeeb35d2a7b7944af1479b1193f4c91 100644 (file)
@@ -39,7 +39,7 @@ struct nf_conn_nat {
 
 /* Set up the info structure to map into this range. */
 unsigned int nf_nat_setup_info(struct nf_conn *ct,
-                              const struct nf_nat_range *range,
+                              const struct nf_nat_range2 *range,
                               enum nf_nat_manip_type maniptype);
 
 extern unsigned int nf_nat_alloc_null_binding(struct nf_conn *ct,
index ce7c2b4e64bb7bd1dd7575412bd0a2c0918f0bab..8bad2560576f04f4e4c26a3b9552f04044ab38e4 100644 (file)
@@ -7,7 +7,7 @@ struct nf_nat_l3proto {
        u8      l3proto;
 
        bool    (*in_range)(const struct nf_conntrack_tuple *t,
-                           const struct nf_nat_range *range);
+                           const struct nf_nat_range2 *range);
 
        u32     (*secure_port)(const struct nf_conntrack_tuple *t, __be16);
 
@@ -33,7 +33,7 @@ struct nf_nat_l3proto {
                                  struct flowi *fl);
 
        int     (*nlattr_to_range)(struct nlattr *tb[],
-                                  struct nf_nat_range *range);
+                                  struct nf_nat_range2 *range);
 };
 
 int nf_nat_l3proto_register(const struct nf_nat_l3proto *);
@@ -48,30 +48,26 @@ unsigned int nf_nat_ipv4_in(void *priv, struct sk_buff *skb,
                            const struct nf_hook_state *state,
                            unsigned int (*do_chain)(void *priv,
                                                     struct sk_buff *skb,
-                                                    const struct nf_hook_state *state,
-                                                    struct nf_conn *ct));
+                                                    const struct nf_hook_state *state));
 
 unsigned int nf_nat_ipv4_out(void *priv, struct sk_buff *skb,
                             const struct nf_hook_state *state,
                             unsigned int (*do_chain)(void *priv,
                                                      struct sk_buff *skb,
-                                                     const struct nf_hook_state *state,
-                                                     struct nf_conn *ct));
+                                                     const struct nf_hook_state *state));
 
 unsigned int nf_nat_ipv4_local_fn(void *priv,
                                  struct sk_buff *skb,
                                  const struct nf_hook_state *state,
                                  unsigned int (*do_chain)(void *priv,
                                                           struct sk_buff *skb,
-                                                          const struct nf_hook_state *state,
-                                                          struct nf_conn *ct));
+                                                          const struct nf_hook_state *state));
 
 unsigned int nf_nat_ipv4_fn(void *priv, struct sk_buff *skb,
                            const struct nf_hook_state *state,
                            unsigned int (*do_chain)(void *priv,
                                                     struct sk_buff *skb,
-                                                    const struct nf_hook_state *state,
-                                                    struct nf_conn *ct));
+                                                    const struct nf_hook_state *state));
 
 int nf_nat_icmpv6_reply_translation(struct sk_buff *skb, struct nf_conn *ct,
                                    enum ip_conntrack_info ctinfo,
@@ -81,29 +77,25 @@ unsigned int nf_nat_ipv6_in(void *priv, struct sk_buff *skb,
                            const struct nf_hook_state *state,
                            unsigned int (*do_chain)(void *priv,
                                                     struct sk_buff *skb,
-                                                    const struct nf_hook_state *state,
-                                                    struct nf_conn *ct));
+                                                    const struct nf_hook_state *state));
 
 unsigned int nf_nat_ipv6_out(void *priv, struct sk_buff *skb,
                             const struct nf_hook_state *state,
                             unsigned int (*do_chain)(void *priv,
                                                      struct sk_buff *skb,
-                                                     const struct nf_hook_state *state,
-                                                     struct nf_conn *ct));
+                                                     const struct nf_hook_state *state));
 
 unsigned int nf_nat_ipv6_local_fn(void *priv,
                                  struct sk_buff *skb,
                                  const struct nf_hook_state *state,
                                  unsigned int (*do_chain)(void *priv,
                                                           struct sk_buff *skb,
-                                                          const struct nf_hook_state *state,
-                                                          struct nf_conn *ct));
+                                                          const struct nf_hook_state *state));
 
 unsigned int nf_nat_ipv6_fn(void *priv, struct sk_buff *skb,
                            const struct nf_hook_state *state,
                            unsigned int (*do_chain)(void *priv,
                                                     struct sk_buff *skb,
-                                                    const struct nf_hook_state *state,
-                                                    struct nf_conn *ct));
+                                                    const struct nf_hook_state *state));
 
 #endif /* _NF_NAT_L3PROTO_H */
index 67835ff8a2d98d4cb8914ee8457f07072962f94b..b4d6b29bca62af11eb25906d0c1f31cd5a97acca 100644 (file)
@@ -34,12 +34,12 @@ struct nf_nat_l4proto {
         */
        void (*unique_tuple)(const struct nf_nat_l3proto *l3proto,
                             struct nf_conntrack_tuple *tuple,
-                            const struct nf_nat_range *range,
+                            const struct nf_nat_range2 *range,
                             enum nf_nat_manip_type maniptype,
                             const struct nf_conn *ct);
 
        int (*nlattr_to_range)(struct nlattr *tb[],
-                              struct nf_nat_range *range);
+                              struct nf_nat_range2 *range);
 };
 
 /* Protocol registration. */
@@ -72,11 +72,11 @@ bool nf_nat_l4proto_in_range(const struct nf_conntrack_tuple *tuple,
 
 void nf_nat_l4proto_unique_tuple(const struct nf_nat_l3proto *l3proto,
                                 struct nf_conntrack_tuple *tuple,
-                                const struct nf_nat_range *range,
+                                const struct nf_nat_range2 *range,
                                 enum nf_nat_manip_type maniptype,
                                 const struct nf_conn *ct, u16 *rover);
 
 int nf_nat_l4proto_nlattr_to_range(struct nlattr *tb[],
-                                  struct nf_nat_range *range);
+                                  struct nf_nat_range2 *range);
 
 #endif /*_NF_NAT_L4PROTO_H*/
index 5ddabb08c472da405effc78affee4fa995cd8a3a..c129aacc8ae8f9c77c323df5b58ecc499d416e12 100644 (file)
@@ -7,7 +7,7 @@ nf_nat_redirect_ipv4(struct sk_buff *skb,
                     const struct nf_nat_ipv4_multi_range_compat *mr,
                     unsigned int hooknum);
 unsigned int
-nf_nat_redirect_ipv6(struct sk_buff *skb, const struct nf_nat_range *range,
+nf_nat_redirect_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
                     unsigned int hooknum);
 
 #endif /* _NF_NAT_REDIRECT_H_ */
index cd368d1b8cb8439fffb094fa00b228a88142ea14..fe23dc584be6b46df899f88dcabada703923e845 100644 (file)
@@ -170,6 +170,7 @@ struct nft_data_desc {
 int nft_data_init(const struct nft_ctx *ctx,
                  struct nft_data *data, unsigned int size,
                  struct nft_data_desc *desc, const struct nlattr *nla);
+void nft_data_hold(const struct nft_data *data, enum nft_data_types type);
 void nft_data_release(const struct nft_data *data, enum nft_data_types type);
 int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data,
                  enum nft_data_types type, unsigned int len);
@@ -275,23 +276,6 @@ struct nft_set_estimate {
        enum nft_set_class      space;
 };
 
-/**
- *      struct nft_set_type - nf_tables set type
- *
- *      @select_ops: function to select nft_set_ops
- *      @ops: default ops, used when no select_ops functions is present
- *      @list: used internally
- *      @owner: module reference
- */
-struct nft_set_type {
-       const struct nft_set_ops        *(*select_ops)(const struct nft_ctx *,
-                                                      const struct nft_set_desc *desc,
-                                                      u32 flags);
-       const struct nft_set_ops        *ops;
-       struct list_head                list;
-       struct module                   *owner;
-};
-
 struct nft_set_ext;
 struct nft_expr;
 
@@ -310,7 +294,6 @@ struct nft_expr;
  *     @init: initialize private data of new set instance
  *     @destroy: destroy private data of set instance
  *     @elemsize: element private size
- *     @features: features supported by the implementation
  */
 struct nft_set_ops {
        bool                            (*lookup)(const struct net *net,
@@ -361,9 +344,23 @@ struct nft_set_ops {
        void                            (*destroy)(const struct nft_set *set);
 
        unsigned int                    elemsize;
+};
+
+/**
+ *      struct nft_set_type - nf_tables set type
+ *
+ *      @ops: set ops for this type
+ *      @list: used internally
+ *      @owner: module reference
+ *      @features: features supported by the implementation
+ */
+struct nft_set_type {
+       const struct nft_set_ops        ops;
+       struct list_head                list;
+       struct module                   *owner;
        u32                             features;
-       const struct nft_set_type       *type;
 };
+#define to_set_type(o) container_of(o, struct nft_set_type, ops)
 
 int nft_register_set(struct nft_set_type *type);
 void nft_unregister_set(struct nft_set_type *type);
@@ -589,7 +586,7 @@ static inline u64 *nft_set_ext_timeout(const struct nft_set_ext *ext)
        return nft_set_ext(ext, NFT_SET_EXT_TIMEOUT);
 }
 
-static inline unsigned long *nft_set_ext_expiration(const struct nft_set_ext *ext)
+static inline u64 *nft_set_ext_expiration(const struct nft_set_ext *ext)
 {
        return nft_set_ext(ext, NFT_SET_EXT_EXPIRATION);
 }
@@ -607,7 +604,7 @@ static inline struct nft_expr *nft_set_ext_expr(const struct nft_set_ext *ext)
 static inline bool nft_set_elem_expired(const struct nft_set_ext *ext)
 {
        return nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION) &&
-              time_is_before_eq_jiffies(*nft_set_ext_expiration(ext));
+              time_is_before_eq_jiffies64(*nft_set_ext_expiration(ext));
 }
 
 static inline struct nft_set_ext *nft_set_elem_ext(const struct nft_set *set,
@@ -736,6 +733,10 @@ struct nft_expr_ops {
        int                             (*init)(const struct nft_ctx *ctx,
                                                const struct nft_expr *expr,
                                                const struct nlattr * const tb[]);
+       void                            (*activate)(const struct nft_ctx *ctx,
+                                                   const struct nft_expr *expr);
+       void                            (*deactivate)(const struct nft_ctx *ctx,
+                                                     const struct nft_expr *expr);
        void                            (*destroy)(const struct nft_ctx *ctx,
                                                   const struct nft_expr *expr);
        int                             (*dump)(struct sk_buff *skb,
@@ -1015,9 +1016,9 @@ static inline void *nft_obj_data(const struct nft_object *obj)
 
 #define nft_expr_obj(expr)     *((struct nft_object **)nft_expr_priv(expr))
 
-struct nft_object *nf_tables_obj_lookup(const struct nft_table *table,
-                                       const struct nlattr *nla, u32 objtype,
-                                       u8 genmask);
+struct nft_object *nft_obj_lookup(const struct nft_table *table,
+                                 const struct nlattr *nla, u32 objtype,
+                                 u8 genmask);
 
 void nft_obj_notify(struct net *net, struct nft_table *table,
                    struct nft_object *obj, u32 portid, u32 seq,
@@ -1106,12 +1107,9 @@ struct nft_flowtable {
        struct nf_flowtable             data;
 };
 
-struct nft_flowtable *nf_tables_flowtable_lookup(const struct nft_table *table,
-                                                const struct nlattr *nla,
-                                                u8 genmask);
-void nft_flow_table_iterate(struct net *net,
-                           void (*iter)(struct nf_flowtable *flowtable, void *data),
-                           void *data);
+struct nft_flowtable *nft_flowtable_lookup(const struct nft_table *table,
+                                          const struct nlattr *nla,
+                                          u8 genmask);
 
 void nft_register_flowtable_type(struct nf_flowtable_type *type);
 void nft_unregister_flowtable_type(struct nf_flowtable_type *type);
index ea5aab568be831d7db6ae68e759a0af05daa880c..cd6915b6c054e363e8bb621068b5e18e87fa8905 100644 (file)
@@ -10,6 +10,9 @@ extern struct nft_expr_type nft_byteorder_type;
 extern struct nft_expr_type nft_payload_type;
 extern struct nft_expr_type nft_dynset_type;
 extern struct nft_expr_type nft_range_type;
+extern struct nft_expr_type nft_meta_type;
+extern struct nft_expr_type nft_rt_type;
+extern struct nft_expr_type nft_exthdr_type;
 
 int nf_tables_core_module_init(void);
 void nf_tables_core_module_exit(void);
index 612cfb63ac682181e65befda355c923a3e84d816..ea32a7d3cf1bc87963b259a9902042a7c33e41e4 100644 (file)
@@ -1,18 +1 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _KER_NFNETLINK_LOG_H
-#define _KER_NFNETLINK_LOG_H
-
-void
-nfulnl_log_packet(struct net *net,
-                 u_int8_t pf,
-                 unsigned int hooknum,
-                 const struct sk_buff *skb,
-                 const struct net_device *in,
-                 const struct net_device *out,
-                 const struct nf_loginfo *li_user,
-                 const char *prefix);
-
-#define NFULNL_COPY_DISABLED    0xff
-
-#endif /* _KER_NFNETLINK_LOG_H */
-
diff --git a/include/net/netfilter/nft_meta.h b/include/net/netfilter/nft_meta.h
deleted file mode 100644 (file)
index 5c69e9b..0000000
+++ /dev/null
@@ -1,44 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _NFT_META_H_
-#define _NFT_META_H_
-
-struct nft_meta {
-       enum nft_meta_keys      key:8;
-       union {
-               enum nft_registers      dreg:8;
-               enum nft_registers      sreg:8;
-       };
-};
-
-extern const struct nla_policy nft_meta_policy[];
-
-int nft_meta_get_init(const struct nft_ctx *ctx,
-                     const struct nft_expr *expr,
-                     const struct nlattr * const tb[]);
-
-int nft_meta_set_init(const struct nft_ctx *ctx,
-                     const struct nft_expr *expr,
-                     const struct nlattr * const tb[]);
-
-int nft_meta_get_dump(struct sk_buff *skb,
-                     const struct nft_expr *expr);
-
-int nft_meta_set_dump(struct sk_buff *skb,
-                     const struct nft_expr *expr);
-
-void nft_meta_get_eval(const struct nft_expr *expr,
-                      struct nft_regs *regs,
-                      const struct nft_pktinfo *pkt);
-
-void nft_meta_set_eval(const struct nft_expr *expr,
-                      struct nft_regs *regs,
-                      const struct nft_pktinfo *pkt);
-
-void nft_meta_set_destroy(const struct nft_ctx *ctx,
-                         const struct nft_expr *expr);
-
-int nft_meta_set_validate(const struct nft_ctx *ctx,
-                         const struct nft_expr *expr,
-                         const struct nft_data **data);
-
-#endif
index 8491bc9c86b1553ab603e4363e8e38ca7ff547e0..661348f23ea5a3a9320b2cafcd17e23960214771 100644 (file)
@@ -160,6 +160,8 @@ struct netns_ipv4 {
        int sysctl_tcp_pacing_ca_ratio;
        int sysctl_tcp_wmem[3];
        int sysctl_tcp_rmem[3];
+       int sysctl_tcp_comp_sack_nr;
+       unsigned long sysctl_tcp_comp_sack_delay_ns;
        struct inet_timewait_death_row tcp_death_row;
        int sysctl_max_syn_backlog;
        int sysctl_tcp_fastopen;
index 97b3a54579c82595061ec5e76bcf6f7243016fc3..c978a31b0f846210b4c2a369af960d5349b5395a 100644 (file)
@@ -43,6 +43,7 @@ struct netns_sysctl_ipv6 {
        int max_hbh_opts_cnt;
        int max_dst_opts_len;
        int max_hbh_opts_len;
+       int seg6_flowlabel;
 };
 
 struct netns_ipv6 {
index e828d31be5dae0ae8c69016dfde50379296484aa..0005f0b40fe9310d8160018b3baaccf2cc098c4d 100644 (file)
@@ -683,9 +683,11 @@ static inline bool tc_skip_sw(u32 flags)
 /* SKIP_HW and SKIP_SW are mutually exclusive flags. */
 static inline bool tc_flags_valid(u32 flags)
 {
-       if (flags & ~(TCA_CLS_FLAGS_SKIP_HW | TCA_CLS_FLAGS_SKIP_SW))
+       if (flags & ~(TCA_CLS_FLAGS_SKIP_HW | TCA_CLS_FLAGS_SKIP_SW |
+                     TCA_CLS_FLAGS_VERBOSE))
                return false;
 
+       flags &= TCA_CLS_FLAGS_SKIP_HW | TCA_CLS_FLAGS_SKIP_SW;
        if (!(flags ^ (TCA_CLS_FLAGS_SKIP_HW | TCA_CLS_FLAGS_SKIP_SW)))
                return false;
 
@@ -705,7 +707,7 @@ tc_cls_common_offload_init(struct tc_cls_common_offload *cls_common,
        cls_common->chain_index = tp->chain->index;
        cls_common->protocol = tp->protocol;
        cls_common->prio = tp->prio;
-       if (tc_skip_sw(flags))
+       if (tc_skip_sw(flags) || flags & TCA_CLS_FLAGS_VERBOSE)
                cls_common->extack = extack;
 }
 
index 5154c8300262a041a6fdae1c4c462a7388e2aab7..98c10a28cd01c39ebeeb3d0090f15537f2aec429 100644 (file)
@@ -30,7 +30,6 @@ struct qdisc_rate_table {
 enum qdisc_state_t {
        __QDISC_STATE_SCHED,
        __QDISC_STATE_DEACTIVATED,
-       __QDISC_STATE_RUNNING,
 };
 
 struct qdisc_size_table {
@@ -102,6 +101,7 @@ struct Qdisc {
        refcount_t              refcnt;
 
        spinlock_t              busylock ____cacheline_aligned_in_smp;
+       spinlock_t              seqlock;
 };
 
 static inline void qdisc_refcount_inc(struct Qdisc *qdisc)
@@ -111,15 +111,21 @@ static inline void qdisc_refcount_inc(struct Qdisc *qdisc)
        refcount_inc(&qdisc->refcnt);
 }
 
-static inline bool qdisc_is_running(const struct Qdisc *qdisc)
+static inline bool qdisc_is_running(struct Qdisc *qdisc)
 {
+       if (qdisc->flags & TCQ_F_NOLOCK)
+               return spin_is_locked(&qdisc->seqlock);
        return (raw_read_seqcount(&qdisc->running) & 1) ? true : false;
 }
 
 static inline bool qdisc_run_begin(struct Qdisc *qdisc)
 {
-       if (qdisc_is_running(qdisc))
+       if (qdisc->flags & TCQ_F_NOLOCK) {
+               if (!spin_trylock(&qdisc->seqlock))
+                       return false;
+       } else if (qdisc_is_running(qdisc)) {
                return false;
+       }
        /* Variant of write_seqcount_begin() telling lockdep a trylock
         * was attempted.
         */
@@ -131,6 +137,8 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc)
 static inline void qdisc_run_end(struct Qdisc *qdisc)
 {
        write_seqcount_end(&qdisc->running);
+       if (qdisc->flags & TCQ_F_NOLOCK)
+               spin_unlock(&qdisc->seqlock);
 }
 
 static inline bool qdisc_may_bulk(const struct Qdisc *qdisc)
index 20ff237c5eb2ff358589b30bbd89d636e41b2d92..86f034b524d46178e4d26e868be3a2bf87acac4a 100644 (file)
@@ -254,11 +254,10 @@ enum { SCTP_ARBITRARY_COOKIE_ECHO_LEN = 200 };
 #define SCTP_TSN_MAP_SIZE 4096
 
 /* We will not record more than this many duplicate TSNs between two
- * SACKs.  The minimum PMTU is 576.  Remove all the headers and there
- * is enough room for 131 duplicate reports.  Round down to the
+ * SACKs.  The minimum PMTU is 512.  Remove all the headers and there
+ * is enough room for 117 duplicate reports.  Round down to the
  * nearest power of 2.
  */
-enum { SCTP_MIN_PMTU = 576 };
 enum { SCTP_MAX_DUP_TSNS = 16 };
 enum { SCTP_MAX_GABS = 16 };
 
index 28b996d6349072fb031de91e10115b688d89b42d..f66d4435000799f523fbaa34b0a57dbdeebda040 100644 (file)
@@ -428,32 +428,6 @@ static inline int sctp_list_single_entry(struct list_head *head)
        return (head->next != head) && (head->next == head->prev);
 }
 
-/* Break down data chunks at this point.  */
-static inline int sctp_frag_point(const struct sctp_association *asoc, int pmtu)
-{
-       struct sctp_sock *sp = sctp_sk(asoc->base.sk);
-       struct sctp_af *af = sp->pf->af;
-       int frag = pmtu;
-
-       frag -= af->ip_options_len(asoc->base.sk);
-       frag -= af->net_header_len;
-       frag -= sizeof(struct sctphdr) + sctp_datachk_len(&asoc->stream);
-
-       if (asoc->user_frag)
-               frag = min_t(int, frag, asoc->user_frag);
-
-       frag = SCTP_TRUNC4(min_t(int, frag, SCTP_MAX_CHUNK_LEN -
-                                           sctp_datachk_len(&asoc->stream)));
-
-       return frag;
-}
-
-static inline void sctp_assoc_pending_pmtu(struct sctp_association *asoc)
-{
-       sctp_assoc_sync_pmtu(asoc);
-       asoc->pmtu_pending = 0;
-}
-
 static inline bool sctp_chunk_pending(const struct sctp_chunk *chunk)
 {
        return !list_empty(&chunk->list);
@@ -607,17 +581,29 @@ static inline struct dst_entry *sctp_transport_dst_check(struct sctp_transport *
        return t->dst;
 }
 
-static inline bool sctp_transport_pmtu_check(struct sctp_transport *t)
+/* Calculate max payload size given a MTU, or the total overhead if
+ * given MTU is zero
+ */
+static inline __u32 sctp_mtu_payload(const struct sctp_sock *sp,
+                                    __u32 mtu, __u32 extra)
 {
-       __u32 pmtu = max_t(size_t, SCTP_TRUNC4(dst_mtu(t->dst)),
-                          SCTP_DEFAULT_MINSEGMENT);
+       __u32 overhead = sizeof(struct sctphdr) + extra;
 
-       if (t->pathmtu == pmtu)
-               return true;
+       if (sp)
+               overhead += sp->pf->af->net_header_len;
+       else
+               overhead += sizeof(struct ipv6hdr);
 
-       t->pathmtu = pmtu;
+       if (WARN_ON_ONCE(mtu && mtu <= overhead))
+               mtu = overhead;
 
-       return false;
+       return mtu ? mtu - overhead : overhead;
+}
+
+static inline __u32 sctp_dst_mtu(const struct dst_entry *dst)
+{
+       return SCTP_TRUNC4(max_t(__u32, dst_mtu(dst),
+                                SCTP_DEFAULT_MINSEGMENT));
 }
 
 #endif /* __net_sctp_h__ */
index 2d0e782c90551377ad654bcef1224bbdb75ba394..5ef1bad81ef54906b375dbfd0e4fd897d078abce 100644 (file)
@@ -207,7 +207,7 @@ struct sctp_chunk *sctp_make_datafrag_empty(const struct sctp_association *asoc,
                                            int len, __u8 flags, gfp_t gfp);
 struct sctp_chunk *sctp_make_ecne(const struct sctp_association *asoc,
                                  const __u32 lowest_tsn);
-struct sctp_chunk *sctp_make_sack(const struct sctp_association *asoc);
+struct sctp_chunk *sctp_make_sack(struct sctp_association *asoc);
 struct sctp_chunk *sctp_make_shutdown(const struct sctp_association *asoc,
                                      const struct sctp_chunk *chunk);
 struct sctp_chunk *sctp_make_shutdown_ack(const struct sctp_association *asoc,
@@ -215,7 +215,7 @@ struct sctp_chunk *sctp_make_shutdown_ack(const struct sctp_association *asoc,
 struct sctp_chunk *sctp_make_shutdown_complete(
                                        const struct sctp_association *asoc,
                                        const struct sctp_chunk *chunk);
-void sctp_init_cause(struct sctp_chunk *chunk, __be16 cause, size_t paylen);
+int sctp_init_cause(struct sctp_chunk *chunk, __be16 cause, size_t paylen);
 struct sctp_chunk *sctp_make_abort(const struct sctp_association *asoc,
                                   const struct sctp_chunk *chunk,
                                   const size_t hint);
index a0ec462bc1a9f43ac3ca2990010f08999593a24f..ebf809eed33add7905ddd13abf98712a833ae0e0 100644 (file)
@@ -2091,16 +2091,14 @@ void sctp_assoc_control_transport(struct sctp_association *asoc,
                                  enum sctp_transport_cmd command,
                                  sctp_sn_error_t error);
 struct sctp_transport *sctp_assoc_lookup_tsn(struct sctp_association *, __u32);
-struct sctp_transport *sctp_assoc_is_match(struct sctp_association *,
-                                          struct net *,
-                                          const union sctp_addr *,
-                                          const union sctp_addr *);
 void sctp_assoc_migrate(struct sctp_association *, struct sock *);
 int sctp_assoc_update(struct sctp_association *old,
                      struct sctp_association *new);
 
 __u32 sctp_association_get_next_tsn(struct sctp_association *);
 
+void sctp_assoc_update_frag_point(struct sctp_association *asoc);
+void sctp_assoc_set_pmtu(struct sctp_association *asoc, __u32 pmtu);
 void sctp_assoc_sync_pmtu(struct sctp_association *asoc);
 void sctp_assoc_rwnd_increase(struct sctp_association *, unsigned int);
 void sctp_assoc_rwnd_decrease(struct sctp_association *, unsigned int);
index 74d725fdbe0f836be4544aebb1efe8a5ef24b5ce..4f7c584e97658902e62ccc090fc0e7cc39e6e396 100644 (file)
@@ -481,6 +481,11 @@ struct sock {
        void                    (*sk_error_report)(struct sock *sk);
        int                     (*sk_backlog_rcv)(struct sock *sk,
                                                  struct sk_buff *skb);
+#ifdef CONFIG_SOCK_VALIDATE_XMIT
+       struct sk_buff*         (*sk_validate_xmit_skb)(struct sock *sk,
+                                                       struct net_device *dev,
+                                                       struct sk_buff *skb);
+#endif
        void                    (*sk_destruct)(struct sock *sk);
        struct sock_reuseport __rcu     *sk_reuseport_cb;
        struct rcu_head         sk_rcu;
@@ -803,10 +808,10 @@ static inline bool sock_flag(const struct sock *sk, enum sock_flags flag)
 }
 
 #ifdef CONFIG_NET
-extern struct static_key memalloc_socks;
+DECLARE_STATIC_KEY_FALSE(memalloc_socks_key);
 static inline int sk_memalloc_socks(void)
 {
-       return static_key_false(&memalloc_socks);
+       return static_branch_unlikely(&memalloc_socks_key);
 }
 #else
 
@@ -2332,6 +2337,22 @@ static inline bool sk_fullsock(const struct sock *sk)
        return (1 << sk->sk_state) & ~(TCPF_TIME_WAIT | TCPF_NEW_SYN_RECV);
 }
 
+/* Checks if this SKB belongs to an HW offloaded socket
+ * and whether any SW fallbacks are required based on dev.
+ */
+static inline struct sk_buff *sk_validate_xmit_skb(struct sk_buff *skb,
+                                                  struct net_device *dev)
+{
+#ifdef CONFIG_SOCK_VALIDATE_XMIT
+       struct sock *sk = skb->sk;
+
+       if (sk && sk_fullsock(sk) && sk->sk_validate_xmit_skb)
+               skb = sk->sk_validate_xmit_skb(sk, dev, skb);
+#endif
+
+       return skb;
+}
+
 /* This helper checks if a socket is a LISTEN or NEW_SYN_RECV
  * SYNACK messages can be attached to either ones (depending on SYNCOOKIE)
  */
index 39bc855d7fee6644ade78f0273ab672cf204265a..d574ce63bf220d1069d2d2a5fa7177496984d92f 100644 (file)
@@ -155,6 +155,7 @@ struct switchdev_notifier_fdb_info {
        struct switchdev_notifier_info info; /* must be first */
        const unsigned char *addr;
        u16 vid;
+       bool added_by_user;
 };
 
 static inline struct net_device *
index 833154e3df173ea41aa16dd1ec739a175c679c5c..952d842a604a3ed79e1bf87a712db20a461c35a9 100644 (file)
@@ -245,6 +245,7 @@ extern long sysctl_tcp_mem[3];
 
 #define TCP_RACK_LOSS_DETECTION  0x1 /* Use RACK to detect losses */
 #define TCP_RACK_STATIC_REO_WND  0x2 /* Use static RACK reo wnd */
+#define TCP_RACK_NO_DUPTHRESH    0x4 /* Do not use DUPACK threshold in RACK */
 
 extern atomic_long_t tcp_memory_allocated;
 extern struct percpu_counter tcp_sockets_allocated;
@@ -557,7 +558,12 @@ void tcp_fin(struct sock *sk);
 void tcp_init_xmit_timers(struct sock *);
 static inline void tcp_clear_xmit_timers(struct sock *sk)
 {
-       hrtimer_cancel(&tcp_sk(sk)->pacing_timer);
+       if (hrtimer_try_to_cancel(&tcp_sk(sk)->pacing_timer) == 1)
+               __sock_put(sk);
+
+       if (hrtimer_try_to_cancel(&tcp_sk(sk)->compressed_ack_timer) == 1)
+               __sock_put(sk);
+
        inet_csk_clear_xmit_timers(sk);
 }
 
@@ -814,9 +820,8 @@ struct tcp_skb_cb {
 #endif
                } header;       /* For incoming skbs */
                struct {
-                       __u32 key;
                        __u32 flags;
-                       struct bpf_map *map;
+                       struct sock *sk_redir;
                        void *data_end;
                } bpf;
        };
@@ -1875,6 +1880,10 @@ void tcp_v4_init(void);
 void tcp_init(void);
 
 /* tcp_recovery.c */
+void tcp_mark_skb_lost(struct sock *sk, struct sk_buff *skb);
+void tcp_newreno_mark_lost(struct sock *sk, bool snd_una_advanced);
+extern s32 tcp_rack_skb_timeout(struct tcp_sock *tp, struct sk_buff *skb,
+                               u32 reo_wnd);
 extern void tcp_rack_mark_lost(struct sock *sk);
 extern void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq,
                             u64 xmit_time);
@@ -2105,4 +2114,12 @@ static inline bool tcp_bpf_ca_needs_ecn(struct sock *sk)
 #if IS_ENABLED(CONFIG_SMC)
 extern struct static_key_false tcp_have_smc;
 #endif
+
+#if IS_ENABLED(CONFIG_TLS_DEVICE)
+void clean_acked_data_enable(struct inet_connection_sock *icsk,
+                            void (*cad)(struct sock *sk, u32 ack_seq));
+void clean_acked_data_disable(struct inet_connection_sock *icsk);
+
+#endif
+
 #endif /* _TCP_H */
index 07670ec022a714bd9a675d2ffd89455f10a04a01..f0e7e6bc1befa83b0e23e9ef3d5c4855c5bd135f 100644 (file)
@@ -44,11 +44,11 @@ struct tipc_basic_hdr {
        __be32 w[4];
 };
 
-static inline u32 tipc_hdr_rps_key(struct tipc_basic_hdr *hdr)
+static inline __be32 tipc_hdr_rps_key(struct tipc_basic_hdr *hdr)
 {
        u32 w0 = ntohl(hdr->w[0]);
        bool keepalive_msg = (w0 & KEEPALIVE_MSG_MASK) == KEEPALIVE_MSG_MASK;
-       int key;
+       __be32 key;
 
        /* Return source node identity as key */
        if (likely(!keepalive_msg))
index 3da8e13a6d9661c399d6d88f4b1d78c41aee4bb3..70c273777fe9fe27b2ef1ba7c2c80970da8ea5c4 100644 (file)
@@ -83,21 +83,10 @@ struct tls_device {
        void (*unhash)(struct tls_device *device, struct sock *sk);
 };
 
-struct tls_sw_context {
+struct tls_sw_context_tx {
        struct crypto_aead *aead_send;
-       struct crypto_aead *aead_recv;
        struct crypto_wait async_wait;
 
-       /* Receive context */
-       struct strparser strp;
-       void (*saved_data_ready)(struct sock *sk);
-       unsigned int (*sk_poll)(struct file *file, struct socket *sock,
-                               struct poll_table_struct *wait);
-       struct sk_buff *recv_pkt;
-       u8 control;
-       bool decrypted;
-
-       /* Sending context */
        char aad_space[TLS_AAD_SPACE_SIZE];
 
        unsigned int sg_plaintext_size;
@@ -114,6 +103,54 @@ struct tls_sw_context {
        struct scatterlist sg_aead_out[2];
 };
 
+struct tls_sw_context_rx {
+       struct crypto_aead *aead_recv;
+       struct crypto_wait async_wait;
+
+       struct strparser strp;
+       void (*saved_data_ready)(struct sock *sk);
+       unsigned int (*sk_poll)(struct file *file, struct socket *sock,
+                               struct poll_table_struct *wait);
+       struct sk_buff *recv_pkt;
+       u8 control;
+       bool decrypted;
+
+       char rx_aad_ciphertext[TLS_AAD_SPACE_SIZE];
+       char rx_aad_plaintext[TLS_AAD_SPACE_SIZE];
+
+};
+
+struct tls_record_info {
+       struct list_head list;
+       u32 end_seq;
+       int len;
+       int num_frags;
+       skb_frag_t frags[MAX_SKB_FRAGS];
+};
+
+struct tls_offload_context {
+       struct crypto_aead *aead_send;
+       spinlock_t lock;        /* protects records list */
+       struct list_head records_list;
+       struct tls_record_info *open_record;
+       struct tls_record_info *retransmit_hint;
+       u64 hint_record_sn;
+       u64 unacked_record_sn;
+
+       struct scatterlist sg_tx_data[MAX_SKB_FRAGS];
+       void (*sk_destruct)(struct sock *sk);
+       u8 driver_state[];
+       /* The TLS layer reserves room for driver specific state
+        * Currently the belief is that there is not enough
+        * driver specific state to justify another layer of indirection
+        */
+#define TLS_DRIVER_STATE_SIZE (max_t(size_t, 8, sizeof(void *)))
+};
+
+#define TLS_OFFLOAD_CONTEXT_SIZE                                               \
+       (ALIGN(sizeof(struct tls_offload_context), sizeof(void *)) +           \
+        TLS_DRIVER_STATE_SIZE)
+
 enum {
        TLS_PENDING_CLOSED_RECORD
 };
@@ -138,9 +175,15 @@ struct tls_context {
                struct tls12_crypto_info_aes_gcm_128 crypto_recv_aes_gcm_128;
        };
 
-       void *priv_ctx;
+       struct list_head list;
+       struct net_device *netdev;
+       refcount_t refcount;
+
+       void *priv_ctx_tx;
+       void *priv_ctx_rx;
 
-       u8 conf:3;
+       u8 tx_conf:3;
+       u8 rx_conf:3;
 
        struct cipher_context tx;
        struct cipher_context rx;
@@ -148,6 +191,7 @@ struct tls_context {
        struct scatterlist *partially_sent_record;
        u16 partially_sent_offset;
        unsigned long flags;
+       bool in_tcp_sendpages;
 
        u16 pending_open_record_frags;
        int (*push_pending_record)(struct sock *sk, int flags);
@@ -177,7 +221,8 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
 int tls_sw_sendpage(struct sock *sk, struct page *page,
                    int offset, size_t size, int flags);
 void tls_sw_close(struct sock *sk, long timeout);
-void tls_sw_free_resources(struct sock *sk);
+void tls_sw_free_resources_tx(struct sock *sk);
+void tls_sw_free_resources_rx(struct sock *sk);
 int tls_sw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
                   int nonblock, int flags, int *addr_len);
 unsigned int tls_sw_poll(struct file *file, struct socket *sock,
@@ -186,9 +231,28 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos,
                           struct pipe_inode_info *pipe,
                           size_t len, unsigned int flags);
 
-void tls_sk_destruct(struct sock *sk, struct tls_context *ctx);
-void tls_icsk_clean_acked(struct sock *sk);
+int tls_set_device_offload(struct sock *sk, struct tls_context *ctx);
+int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
+int tls_device_sendpage(struct sock *sk, struct page *page,
+                       int offset, size_t size, int flags);
+void tls_device_sk_destruct(struct sock *sk);
+void tls_device_init(void);
+void tls_device_cleanup(void);
+
+struct tls_record_info *tls_get_record(struct tls_offload_context *context,
+                                      u32 seq, u64 *p_record_sn);
+
+static inline bool tls_record_is_start_marker(struct tls_record_info *rec)
+{
+       return rec->len == 0;
+}
+
+static inline u32 tls_record_start_seq(struct tls_record_info *rec)
+{
+       return rec->end_seq - rec->len;
+}
 
+void tls_sk_destruct(struct sock *sk, struct tls_context *ctx);
 int tls_push_sg(struct sock *sk, struct tls_context *ctx,
                struct scatterlist *sg, u16 first_offset,
                int flags);
@@ -225,6 +289,13 @@ static inline bool tls_is_pending_open_record(struct tls_context *tls_ctx)
        return tls_ctx->pending_open_record_frags;
 }
 
+static inline bool tls_is_sk_tx_device_offloaded(struct sock *sk)
+{
+       return sk_fullsock(sk) &&
+              /* matches smp_store_release in tls_set_device_offload */
+              smp_load_acquire(&sk->sk_destruct) == &tls_device_sk_destruct;
+}
+
 static inline void tls_err_abort(struct sock *sk, int err)
 {
        sk->sk_err = err;
@@ -297,16 +368,22 @@ static inline struct tls_context *tls_get_ctx(const struct sock *sk)
        return icsk->icsk_ulp_data;
 }
 
-static inline struct tls_sw_context *tls_sw_ctx(
+static inline struct tls_sw_context_rx *tls_sw_ctx_rx(
                const struct tls_context *tls_ctx)
 {
-       return (struct tls_sw_context *)tls_ctx->priv_ctx;
+       return (struct tls_sw_context_rx *)tls_ctx->priv_ctx_rx;
+}
+
+static inline struct tls_sw_context_tx *tls_sw_ctx_tx(
+               const struct tls_context *tls_ctx)
+{
+       return (struct tls_sw_context_tx *)tls_ctx->priv_ctx_tx;
 }
 
 static inline struct tls_offload_context *tls_offload_ctx(
                const struct tls_context *tls_ctx)
 {
-       return (struct tls_offload_context *)tls_ctx->priv_ctx;
+       return (struct tls_offload_context *)tls_ctx->priv_ctx_tx;
 }
 
 int tls_proccess_cmsg(struct sock *sk, struct msghdr *msg,
@@ -314,4 +391,12 @@ int tls_proccess_cmsg(struct sock *sk, struct msghdr *msg,
 void tls_register_device(struct tls_device *device);
 void tls_unregister_device(struct tls_device *device);
 
+struct sk_buff *tls_validate_xmit_skb(struct sock *sk,
+                                     struct net_device *dev,
+                                     struct sk_buff *skb);
+
+int tls_sw_fallback_init(struct sock *sk,
+                        struct tls_offload_context *offload_ctx,
+                        struct tls_crypto_info *crypto_info);
+
 #endif /* _TLS_OFFLOAD_H */
index 0676b272f6ac8bf91d07f18b7f75e39481a47eaa..9289b64250322b861400cc19f1089b36755daedb 100644 (file)
@@ -174,6 +174,9 @@ struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb,
                                 struct udphdr *uh, udp_lookup_t lookup);
 int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t lookup);
 
+struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
+                                 netdev_features_t features);
+
 static inline struct udphdr *udp_gro_udphdr(struct sk_buff *skb)
 {
        struct udphdr *uh;
@@ -269,6 +272,7 @@ int udp_abort(struct sock *sk, int err);
 int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len);
 int udp_push_pending_frames(struct sock *sk);
 void udp_flush_pending_frames(struct sock *sk);
+int udp_cmsg_send(struct sock *sk, struct msghdr *msg, u16 *gso_size);
 void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst);
 int udp_rcv(struct sk_buff *skb);
 int udp_ioctl(struct sock *sk, int cmd, unsigned long arg);
index 137ad5f9f40f2d9ec4d31c830968815ff1b3bedd..0b689cf561c744cc09d8bf159212569fa2fae5c8 100644 (file)
@@ -104,6 +104,7 @@ struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp)
 }
 
 void xdp_return_frame(struct xdp_frame *xdpf);
+void xdp_return_buff(struct xdp_buff *xdp);
 
 int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
                     struct net_device *dev, u32 queue_index);
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
new file mode 100644 (file)
index 0000000..185f492
--- /dev/null
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * AF_XDP internal functions
+ * Copyright(c) 2018 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _LINUX_XDP_SOCK_H
+#define _LINUX_XDP_SOCK_H
+
+#include <linux/mutex.h>
+#include <net/sock.h>
+
+struct net_device;
+struct xsk_queue;
+struct xdp_umem;
+
+struct xdp_sock {
+       /* struct sock must be the first member of struct xdp_sock */
+       struct sock sk;
+       struct xsk_queue *rx;
+       struct net_device *dev;
+       struct xdp_umem *umem;
+       struct list_head flush_node;
+       u16 queue_id;
+       struct xsk_queue *tx ____cacheline_aligned_in_smp;
+       /* Protects multiple processes in the control path */
+       struct mutex mutex;
+       u64 rx_dropped;
+};
+
+struct xdp_buff;
+#ifdef CONFIG_XDP_SOCKETS
+int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp);
+int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp);
+void xsk_flush(struct xdp_sock *xs);
+bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs);
+#else
+static inline int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
+{
+       return -ENOTSUPP;
+}
+
+static inline int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
+{
+       return -ENOTSUPP;
+}
+
+static inline void xsk_flush(struct xdp_sock *xs)
+{
+}
+
+static inline bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs)
+{
+       return false;
+}
+#endif /* CONFIG_XDP_SOCKETS */
+
+#endif /* _LINUX_XDP_SOCK_H */
index a872379b69da4e8e03a2b07d98f0ef4ce45cdcf7..45e75c36b738bf647584db536c22eddd6dd439cc 100644 (file)
@@ -375,6 +375,7 @@ struct xfrm_input_afinfo {
 int xfrm_input_register_afinfo(const struct xfrm_input_afinfo *afinfo);
 int xfrm_input_unregister_afinfo(const struct xfrm_input_afinfo *afinfo);
 
+void xfrm_flush_gc(void);
 void xfrm_state_delete_tunnel(struct xfrm_state *x);
 
 struct xfrm_type {
index 04e0679767f633f3ba80c962299914ec7f727957..e03bd9d41fa8fd693fa9bcf88620bc2db887980d 100644 (file)
@@ -11,8 +11,6 @@ struct scsi_sense_hdr;
 extern void scsi_print_command(struct scsi_cmnd *);
 extern size_t __scsi_format_command(char *, size_t,
                                   const unsigned char *, size_t);
-extern void scsi_show_extd_sense(const struct scsi_device *, const char *,
-                                unsigned char, unsigned char);
 extern void scsi_print_sense_hdr(const struct scsi_device *, const char *,
                                 const struct scsi_sense_hdr *);
 extern void scsi_print_sense(const struct scsi_cmnd *);
index 50df5b28d2c9df6eb0cffb217104cdf8ee59e9ed..8ee8991aa099af3a1c9d8435bab12821dfef8928 100644 (file)
@@ -143,13 +143,13 @@ struct rpi_firmware *rpi_firmware_get(struct device_node *firmware_node);
 static inline int rpi_firmware_property(struct rpi_firmware *fw, u32 tag,
                                        void *data, size_t len)
 {
-       return 0;
+       return -ENOSYS;
 }
 
 static inline int rpi_firmware_property_list(struct rpi_firmware *fw,
                                             void *data, size_t tag_size)
 {
-       return 0;
+       return -ENOSYS;
 }
 
 static inline struct rpi_firmware *rpi_firmware_get(struct device_node *firmware_node)
index ca13a44ae9d44e971a977e489244b831ac83040c..6011a58d3e2086886278b8f42582bfdc52784fd0 100644 (file)
@@ -23,6 +23,7 @@
  */
 
 #include <linux/wait.h>
+#include <linux/nospec.h>
 #include <sound/asound.h>
 
 #define snd_kcontrol_chip(kcontrol) ((kcontrol)->private_data)
@@ -148,12 +149,14 @@ int snd_ctl_get_preferred_subdevice(struct snd_card *card, int type);
 
 static inline unsigned int snd_ctl_get_ioffnum(struct snd_kcontrol *kctl, struct snd_ctl_elem_id *id)
 {
-       return id->numid - kctl->id.numid;
+       unsigned int ioff = id->numid - kctl->id.numid;
+       return array_index_nospec(ioff, kctl->count);
 }
 
 static inline unsigned int snd_ctl_get_ioffidx(struct snd_kcontrol *kctl, struct snd_ctl_elem_id *id)
 {
-       return id->index - kctl->id.index;
+       unsigned int ioff = id->index - kctl->id.index;
+       return array_index_nospec(ioff, kctl->count);
 }
 
 static inline unsigned int snd_ctl_get_ioff(struct snd_kcontrol *kctl, struct snd_ctl_elem_id *id)
index f0820554caa9d0630410dc12fbd06bae76f4eeac..d0a341bc45404b06c08f434d26030dd8be3138db 100644 (file)
@@ -575,6 +575,48 @@ TRACE_EVENT(afs_protocol_error,
                      __entry->call, __entry->error, __entry->where)
            );
 
+TRACE_EVENT(afs_cm_no_server,
+           TP_PROTO(struct afs_call *call, struct sockaddr_rxrpc *srx),
+
+           TP_ARGS(call, srx),
+
+           TP_STRUCT__entry(
+                   __field(unsigned int,                       call    )
+                   __field(unsigned int,                       op_id   )
+                   __field_struct(struct sockaddr_rxrpc,       srx     )
+                            ),
+
+           TP_fast_assign(
+                   __entry->call = call->debug_id;
+                   __entry->op_id = call->operation_ID;
+                   memcpy(&__entry->srx, srx, sizeof(__entry->srx));
+                          ),
+
+           TP_printk("c=%08x op=%u %pISpc",
+                     __entry->call, __entry->op_id, &__entry->srx.transport)
+           );
+
+TRACE_EVENT(afs_cm_no_server_u,
+           TP_PROTO(struct afs_call *call, const uuid_t *uuid),
+
+           TP_ARGS(call, uuid),
+
+           TP_STRUCT__entry(
+                   __field(unsigned int,                       call    )
+                   __field(unsigned int,                       op_id   )
+                   __field_struct(uuid_t,                      uuid    )
+                            ),
+
+           TP_fast_assign(
+                   __entry->call = call->debug_id;
+                   __entry->op_id = call->operation_ID;
+                   memcpy(&__entry->uuid, uuid, sizeof(__entry->uuid));
+                          ),
+
+           TP_printk("c=%08x op=%u %pU",
+                     __entry->call, __entry->op_id, &__entry->uuid)
+           );
+
 #endif /* _TRACE_AFS_H */
 
 /* This part must be outside protection */
diff --git a/include/trace/events/bpf.h b/include/trace/events/bpf.h
deleted file mode 100644 (file)
index 1501856..0000000
+++ /dev/null
@@ -1,355 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM bpf
-
-#if !defined(_TRACE_BPF_H) || defined(TRACE_HEADER_MULTI_READ)
-#define _TRACE_BPF_H
-
-/* These are only used within the BPF_SYSCALL code */
-#ifdef CONFIG_BPF_SYSCALL
-
-#include <linux/filter.h>
-#include <linux/bpf.h>
-#include <linux/fs.h>
-#include <linux/tracepoint.h>
-
-#define __PROG_TYPE_MAP(FN)    \
-       FN(SOCKET_FILTER)       \
-       FN(KPROBE)              \
-       FN(SCHED_CLS)           \
-       FN(SCHED_ACT)           \
-       FN(TRACEPOINT)          \
-       FN(XDP)                 \
-       FN(PERF_EVENT)          \
-       FN(CGROUP_SKB)          \
-       FN(CGROUP_SOCK)         \
-       FN(LWT_IN)              \
-       FN(LWT_OUT)             \
-       FN(LWT_XMIT)
-
-#define __MAP_TYPE_MAP(FN)     \
-       FN(HASH)                \
-       FN(ARRAY)               \
-       FN(PROG_ARRAY)          \
-       FN(PERF_EVENT_ARRAY)    \
-       FN(PERCPU_HASH)         \
-       FN(PERCPU_ARRAY)        \
-       FN(STACK_TRACE)         \
-       FN(CGROUP_ARRAY)        \
-       FN(LRU_HASH)            \
-       FN(LRU_PERCPU_HASH)     \
-       FN(LPM_TRIE)
-
-#define __PROG_TYPE_TP_FN(x)   \
-       TRACE_DEFINE_ENUM(BPF_PROG_TYPE_##x);
-#define __PROG_TYPE_SYM_FN(x)  \
-       { BPF_PROG_TYPE_##x, #x },
-#define __PROG_TYPE_SYM_TAB    \
-       __PROG_TYPE_MAP(__PROG_TYPE_SYM_FN) { -1, 0 }
-__PROG_TYPE_MAP(__PROG_TYPE_TP_FN)
-
-#define __MAP_TYPE_TP_FN(x)    \
-       TRACE_DEFINE_ENUM(BPF_MAP_TYPE_##x);
-#define __MAP_TYPE_SYM_FN(x)   \
-       { BPF_MAP_TYPE_##x, #x },
-#define __MAP_TYPE_SYM_TAB     \
-       __MAP_TYPE_MAP(__MAP_TYPE_SYM_FN) { -1, 0 }
-__MAP_TYPE_MAP(__MAP_TYPE_TP_FN)
-
-DECLARE_EVENT_CLASS(bpf_prog_event,
-
-       TP_PROTO(const struct bpf_prog *prg),
-
-       TP_ARGS(prg),
-
-       TP_STRUCT__entry(
-               __array(u8, prog_tag, 8)
-               __field(u32, type)
-       ),
-
-       TP_fast_assign(
-               BUILD_BUG_ON(sizeof(__entry->prog_tag) != sizeof(prg->tag));
-               memcpy(__entry->prog_tag, prg->tag, sizeof(prg->tag));
-               __entry->type = prg->type;
-       ),
-
-       TP_printk("prog=%s type=%s",
-                 __print_hex_str(__entry->prog_tag, 8),
-                 __print_symbolic(__entry->type, __PROG_TYPE_SYM_TAB))
-);
-
-DEFINE_EVENT(bpf_prog_event, bpf_prog_get_type,
-
-       TP_PROTO(const struct bpf_prog *prg),
-
-       TP_ARGS(prg)
-);
-
-DEFINE_EVENT(bpf_prog_event, bpf_prog_put_rcu,
-
-       TP_PROTO(const struct bpf_prog *prg),
-
-       TP_ARGS(prg)
-);
-
-TRACE_EVENT(bpf_prog_load,
-
-       TP_PROTO(const struct bpf_prog *prg, int ufd),
-
-       TP_ARGS(prg, ufd),
-
-       TP_STRUCT__entry(
-               __array(u8, prog_tag, 8)
-               __field(u32, type)
-               __field(int, ufd)
-       ),
-
-       TP_fast_assign(
-               BUILD_BUG_ON(sizeof(__entry->prog_tag) != sizeof(prg->tag));
-               memcpy(__entry->prog_tag, prg->tag, sizeof(prg->tag));
-               __entry->type = prg->type;
-               __entry->ufd  = ufd;
-       ),
-
-       TP_printk("prog=%s type=%s ufd=%d",
-                 __print_hex_str(__entry->prog_tag, 8),
-                 __print_symbolic(__entry->type, __PROG_TYPE_SYM_TAB),
-                 __entry->ufd)
-);
-
-TRACE_EVENT(bpf_map_create,
-
-       TP_PROTO(const struct bpf_map *map, int ufd),
-
-       TP_ARGS(map, ufd),
-
-       TP_STRUCT__entry(
-               __field(u32, type)
-               __field(u32, size_key)
-               __field(u32, size_value)
-               __field(u32, max_entries)
-               __field(u32, flags)
-               __field(int, ufd)
-       ),
-
-       TP_fast_assign(
-               __entry->type        = map->map_type;
-               __entry->size_key    = map->key_size;
-               __entry->size_value  = map->value_size;
-               __entry->max_entries = map->max_entries;
-               __entry->flags       = map->map_flags;
-               __entry->ufd         = ufd;
-       ),
-
-       TP_printk("map type=%s ufd=%d key=%u val=%u max=%u flags=%x",
-                 __print_symbolic(__entry->type, __MAP_TYPE_SYM_TAB),
-                 __entry->ufd, __entry->size_key, __entry->size_value,
-                 __entry->max_entries, __entry->flags)
-);
-
-DECLARE_EVENT_CLASS(bpf_obj_prog,
-
-       TP_PROTO(const struct bpf_prog *prg, int ufd,
-                const struct filename *pname),
-
-       TP_ARGS(prg, ufd, pname),
-
-       TP_STRUCT__entry(
-               __array(u8, prog_tag, 8)
-               __field(int, ufd)
-               __string(path, pname->name)
-       ),
-
-       TP_fast_assign(
-               BUILD_BUG_ON(sizeof(__entry->prog_tag) != sizeof(prg->tag));
-               memcpy(__entry->prog_tag, prg->tag, sizeof(prg->tag));
-               __assign_str(path, pname->name);
-               __entry->ufd = ufd;
-       ),
-
-       TP_printk("prog=%s path=%s ufd=%d",
-                 __print_hex_str(__entry->prog_tag, 8),
-                 __get_str(path), __entry->ufd)
-);
-
-DEFINE_EVENT(bpf_obj_prog, bpf_obj_pin_prog,
-
-       TP_PROTO(const struct bpf_prog *prg, int ufd,
-                const struct filename *pname),
-
-       TP_ARGS(prg, ufd, pname)
-);
-
-DEFINE_EVENT(bpf_obj_prog, bpf_obj_get_prog,
-
-       TP_PROTO(const struct bpf_prog *prg, int ufd,
-                const struct filename *pname),
-
-       TP_ARGS(prg, ufd, pname)
-);
-
-DECLARE_EVENT_CLASS(bpf_obj_map,
-
-       TP_PROTO(const struct bpf_map *map, int ufd,
-                const struct filename *pname),
-
-       TP_ARGS(map, ufd, pname),
-
-       TP_STRUCT__entry(
-               __field(u32, type)
-               __field(int, ufd)
-               __string(path, pname->name)
-       ),
-
-       TP_fast_assign(
-               __assign_str(path, pname->name);
-               __entry->type = map->map_type;
-               __entry->ufd  = ufd;
-       ),
-
-       TP_printk("map type=%s ufd=%d path=%s",
-                 __print_symbolic(__entry->type, __MAP_TYPE_SYM_TAB),
-                 __entry->ufd, __get_str(path))
-);
-
-DEFINE_EVENT(bpf_obj_map, bpf_obj_pin_map,
-
-       TP_PROTO(const struct bpf_map *map, int ufd,
-                const struct filename *pname),
-
-       TP_ARGS(map, ufd, pname)
-);
-
-DEFINE_EVENT(bpf_obj_map, bpf_obj_get_map,
-
-       TP_PROTO(const struct bpf_map *map, int ufd,
-                const struct filename *pname),
-
-       TP_ARGS(map, ufd, pname)
-);
-
-DECLARE_EVENT_CLASS(bpf_map_keyval,
-
-       TP_PROTO(const struct bpf_map *map, int ufd,
-                const void *key, const void *val),
-
-       TP_ARGS(map, ufd, key, val),
-
-       TP_STRUCT__entry(
-               __field(u32, type)
-               __field(u32, key_len)
-               __dynamic_array(u8, key, map->key_size)
-               __field(bool, key_trunc)
-               __field(u32, val_len)
-               __dynamic_array(u8, val, map->value_size)
-               __field(bool, val_trunc)
-               __field(int, ufd)
-       ),
-
-       TP_fast_assign(
-               memcpy(__get_dynamic_array(key), key, map->key_size);
-               memcpy(__get_dynamic_array(val), val, map->value_size);
-               __entry->type      = map->map_type;
-               __entry->key_len   = min(map->key_size, 16U);
-               __entry->key_trunc = map->key_size != __entry->key_len;
-               __entry->val_len   = min(map->value_size, 16U);
-               __entry->val_trunc = map->value_size != __entry->val_len;
-               __entry->ufd       = ufd;
-       ),
-
-       TP_printk("map type=%s ufd=%d key=[%s%s] val=[%s%s]",
-                 __print_symbolic(__entry->type, __MAP_TYPE_SYM_TAB),
-                 __entry->ufd,
-                 __print_hex(__get_dynamic_array(key), __entry->key_len),
-                 __entry->key_trunc ? " ..." : "",
-                 __print_hex(__get_dynamic_array(val), __entry->val_len),
-                 __entry->val_trunc ? " ..." : "")
-);
-
-DEFINE_EVENT(bpf_map_keyval, bpf_map_lookup_elem,
-
-       TP_PROTO(const struct bpf_map *map, int ufd,
-                const void *key, const void *val),
-
-       TP_ARGS(map, ufd, key, val)
-);
-
-DEFINE_EVENT(bpf_map_keyval, bpf_map_update_elem,
-
-       TP_PROTO(const struct bpf_map *map, int ufd,
-                const void *key, const void *val),
-
-       TP_ARGS(map, ufd, key, val)
-);
-
-TRACE_EVENT(bpf_map_delete_elem,
-
-       TP_PROTO(const struct bpf_map *map, int ufd,
-                const void *key),
-
-       TP_ARGS(map, ufd, key),
-
-       TP_STRUCT__entry(
-               __field(u32, type)
-               __field(u32, key_len)
-               __dynamic_array(u8, key, map->key_size)
-               __field(bool, key_trunc)
-               __field(int, ufd)
-       ),
-
-       TP_fast_assign(
-               memcpy(__get_dynamic_array(key), key, map->key_size);
-               __entry->type      = map->map_type;
-               __entry->key_len   = min(map->key_size, 16U);
-               __entry->key_trunc = map->key_size != __entry->key_len;
-               __entry->ufd       = ufd;
-       ),
-
-       TP_printk("map type=%s ufd=%d key=[%s%s]",
-                 __print_symbolic(__entry->type, __MAP_TYPE_SYM_TAB),
-                 __entry->ufd,
-                 __print_hex(__get_dynamic_array(key), __entry->key_len),
-                 __entry->key_trunc ? " ..." : "")
-);
-
-TRACE_EVENT(bpf_map_next_key,
-
-       TP_PROTO(const struct bpf_map *map, int ufd,
-                const void *key, const void *key_next),
-
-       TP_ARGS(map, ufd, key, key_next),
-
-       TP_STRUCT__entry(
-               __field(u32, type)
-               __field(u32, key_len)
-               __dynamic_array(u8, key, map->key_size)
-               __dynamic_array(u8, nxt, map->key_size)
-               __field(bool, key_trunc)
-               __field(bool, key_null)
-               __field(int, ufd)
-       ),
-
-       TP_fast_assign(
-               if (key)
-                       memcpy(__get_dynamic_array(key), key, map->key_size);
-               __entry->key_null = !key;
-               memcpy(__get_dynamic_array(nxt), key_next, map->key_size);
-               __entry->type      = map->map_type;
-               __entry->key_len   = min(map->key_size, 16U);
-               __entry->key_trunc = map->key_size != __entry->key_len;
-               __entry->ufd       = ufd;
-       ),
-
-       TP_printk("map type=%s ufd=%d key=[%s%s] next=[%s%s]",
-                 __print_symbolic(__entry->type, __MAP_TYPE_SYM_TAB),
-                 __entry->ufd,
-                 __entry->key_null ? "NULL" : __print_hex(__get_dynamic_array(key),
-                                                          __entry->key_len),
-                 __entry->key_trunc && !__entry->key_null ? " ..." : "",
-                 __print_hex(__get_dynamic_array(nxt), __entry->key_len),
-                 __entry->key_trunc ? " ..." : "")
-);
-#endif /* CONFIG_BPF_SYSCALL */
-#endif /* _TRACE_BPF_H */
-
-#include <trace/define_trace.h>
index 7e8d48a81b9192a07dc0f702193b0539160dcde8..1b8d951e3c124ce4f68577b464e05d077a8cd84e 100644 (file)
 
 TRACE_EVENT(fib6_table_lookup,
 
-       TP_PROTO(const struct net *net, const struct rt6_info *rt,
+       TP_PROTO(const struct net *net, const struct fib6_info *f6i,
                 struct fib6_table *table, const struct flowi6 *flp),
 
-       TP_ARGS(net, rt, table, flp),
+       TP_ARGS(net, f6i, table, flp),
 
        TP_STRUCT__entry(
                __field(        u32,    tb_id           )
@@ -48,20 +48,20 @@ TRACE_EVENT(fib6_table_lookup,
                in6 = (struct in6_addr *)__entry->dst;
                *in6 = flp->daddr;
 
-               if (rt->rt6i_idev) {
-                       __assign_str(name, rt->rt6i_idev->dev->name);
+               if (f6i->fib6_nh.nh_dev) {
+                       __assign_str(name, f6i->fib6_nh.nh_dev);
                } else {
                        __assign_str(name, "");
                }
-               if (rt == net->ipv6.ip6_null_entry) {
+               if (f6i == net->ipv6.fib6_null_entry) {
                        struct in6_addr in6_zero = {};
 
                        in6 = (struct in6_addr *)__entry->gw;
                        *in6 = in6_zero;
 
-               } else if (rt) {
+               } else if (f6i) {
                        in6 = (struct in6_addr *)__entry->gw;
-                       *in6 = rt->rt6i_gateway;
+                       *in6 = f6i->fib6_nh.nh_gw;
                }
        ),
 
index 8d6cf10d27c92cf68ffc642a308378c5df8a9fee..eb903c3f195f590b7c13e245af1412e7263000ec 100644 (file)
@@ -31,7 +31,11 @@ TRACE_EVENT(initcall_start,
        TP_ARGS(func),
 
        TP_STRUCT__entry(
-               __field(initcall_t, func)
+               /*
+                * Use field_struct to avoid is_signed_type()
+                * comparison of a function pointer
+                */
+               __field_struct(initcall_t, func)
        ),
 
        TP_fast_assign(
@@ -48,8 +52,12 @@ TRACE_EVENT(initcall_finish,
        TP_ARGS(func, ret),
 
        TP_STRUCT__entry(
-               __field(initcall_t,     func)
-               __field(int,            ret)
+               /*
+                * Use field_struct to avoid is_signed_type()
+                * comparison of a function pointer
+                */
+               __field_struct(initcall_t,      func)
+               __field(int,                    ret)
        ),
 
        TP_fast_assign(
index 9e96c2fe2793e5286d1cb8882cf87da5075f6f0d..077e664ac9a20986eebf07fc829339738882f8a9 100644 (file)
@@ -15,6 +15,7 @@
 #define _TRACE_RXRPC_H
 
 #include <linux/tracepoint.h>
+#include <linux/errqueue.h>
 
 /*
  * Define enums for tracing information.
@@ -210,6 +211,20 @@ enum rxrpc_congest_change {
        rxrpc_cong_saw_nack,
 };
 
+enum rxrpc_tx_fail_trace {
+       rxrpc_tx_fail_call_abort,
+       rxrpc_tx_fail_call_ack,
+       rxrpc_tx_fail_call_data_frag,
+       rxrpc_tx_fail_call_data_nofrag,
+       rxrpc_tx_fail_call_final_resend,
+       rxrpc_tx_fail_conn_abort,
+       rxrpc_tx_fail_conn_challenge,
+       rxrpc_tx_fail_conn_response,
+       rxrpc_tx_fail_reject,
+       rxrpc_tx_fail_version_keepalive,
+       rxrpc_tx_fail_version_reply,
+};
+
 #endif /* end __RXRPC_DECLARE_TRACE_ENUMS_ONCE_ONLY */
 
 /*
@@ -437,6 +452,19 @@ enum rxrpc_congest_change {
        EM(RXRPC_CALL_LOCAL_ERROR,              "LocalError") \
        E_(RXRPC_CALL_NETWORK_ERROR,            "NetError")
 
+#define rxrpc_tx_fail_traces \
+       EM(rxrpc_tx_fail_call_abort,            "CallAbort") \
+       EM(rxrpc_tx_fail_call_ack,              "CallAck") \
+       EM(rxrpc_tx_fail_call_data_frag,        "CallDataFrag") \
+       EM(rxrpc_tx_fail_call_data_nofrag,      "CallDataNofrag") \
+       EM(rxrpc_tx_fail_call_final_resend,     "CallFinalResend") \
+       EM(rxrpc_tx_fail_conn_abort,            "ConnAbort") \
+       EM(rxrpc_tx_fail_conn_challenge,        "ConnChall") \
+       EM(rxrpc_tx_fail_conn_response,         "ConnResp") \
+       EM(rxrpc_tx_fail_reject,                "Reject") \
+       EM(rxrpc_tx_fail_version_keepalive,     "VerKeepalive") \
+       E_(rxrpc_tx_fail_version_reply,         "VerReply")
+
 /*
  * Export enum symbols via userspace.
  */
@@ -460,6 +488,7 @@ rxrpc_propose_ack_traces;
 rxrpc_propose_ack_outcomes;
 rxrpc_congest_modes;
 rxrpc_congest_changes;
+rxrpc_tx_fail_traces;
 
 /*
  * Now redefine the EM() and E_() macros to map the enums to the strings that
@@ -1374,6 +1403,62 @@ TRACE_EVENT(rxrpc_resend,
                      __entry->anno)
            );
 
+TRACE_EVENT(rxrpc_rx_icmp,
+           TP_PROTO(struct rxrpc_peer *peer, struct sock_extended_err *ee,
+                    struct sockaddr_rxrpc *srx),
+
+           TP_ARGS(peer, ee, srx),
+
+           TP_STRUCT__entry(
+                   __field(unsigned int,                       peer    )
+                   __field_struct(struct sock_extended_err,    ee      )
+                   __field_struct(struct sockaddr_rxrpc,       srx     )
+                            ),
+
+           TP_fast_assign(
+                   __entry->peer = peer->debug_id;
+                   memcpy(&__entry->ee, ee, sizeof(__entry->ee));
+                   memcpy(&__entry->srx, srx, sizeof(__entry->srx));
+                          ),
+
+           TP_printk("P=%08x o=%u t=%u c=%u i=%u d=%u e=%d %pISp",
+                     __entry->peer,
+                     __entry->ee.ee_origin,
+                     __entry->ee.ee_type,
+                     __entry->ee.ee_code,
+                     __entry->ee.ee_info,
+                     __entry->ee.ee_data,
+                     __entry->ee.ee_errno,
+                     &__entry->srx.transport)
+           );
+
+TRACE_EVENT(rxrpc_tx_fail,
+           TP_PROTO(unsigned int debug_id, rxrpc_serial_t serial, int ret,
+                    enum rxrpc_tx_fail_trace what),
+
+           TP_ARGS(debug_id, serial, ret, what),
+
+           TP_STRUCT__entry(
+                   __field(unsigned int,               debug_id        )
+                   __field(rxrpc_serial_t,             serial          )
+                   __field(int,                        ret             )
+                   __field(enum rxrpc_tx_fail_trace,   what            )
+                            ),
+
+           TP_fast_assign(
+                   __entry->debug_id = debug_id;
+                   __entry->serial = serial;
+                   __entry->ret = ret;
+                   __entry->what = what;
+                          ),
+
+           TP_printk("c=%08x r=%x ret=%d %s",
+                     __entry->debug_id,
+                     __entry->serial,
+                     __entry->ret,
+                     __print_symbolic(__entry->what, rxrpc_tx_fail_traces))
+           );
+
 #endif /* _TRACE_RXRPC_H */
 
 /* This part must be outside protection */
index 335d87242439db1b64d2ed7ad0c167844d0c5dad..bbb08a3ef5ccce100d2551580b35f6b42fa1fa9e 100644 (file)
@@ -224,6 +224,8 @@ TRACE_EVENT(rpc_stats_latency,
        TP_ARGS(task, backlog, rtt, execute),
 
        TP_STRUCT__entry(
+               __field(unsigned int, task_id)
+               __field(unsigned int, client_id)
                __field(u32, xid)
                __field(int, version)
                __string(progname, task->tk_client->cl_program->name)
@@ -231,13 +233,11 @@ TRACE_EVENT(rpc_stats_latency,
                __field(unsigned long, backlog)
                __field(unsigned long, rtt)
                __field(unsigned long, execute)
-               __string(addr,
-                        task->tk_xprt->address_strings[RPC_DISPLAY_ADDR])
-               __string(port,
-                        task->tk_xprt->address_strings[RPC_DISPLAY_PORT])
        ),
 
        TP_fast_assign(
+               __entry->client_id = task->tk_client->cl_clid;
+               __entry->task_id = task->tk_pid;
                __entry->xid = be32_to_cpu(task->tk_rqstp->rq_xid);
                __entry->version = task->tk_client->cl_vers;
                __assign_str(progname, task->tk_client->cl_program->name)
@@ -245,14 +245,10 @@ TRACE_EVENT(rpc_stats_latency,
                __entry->backlog = ktime_to_us(backlog);
                __entry->rtt = ktime_to_us(rtt);
                __entry->execute = ktime_to_us(execute);
-               __assign_str(addr,
-                            task->tk_xprt->address_strings[RPC_DISPLAY_ADDR]);
-               __assign_str(port,
-                            task->tk_xprt->address_strings[RPC_DISPLAY_PORT]);
        ),
 
-       TP_printk("peer=[%s]:%s xid=0x%08x %sv%d %s backlog=%lu rtt=%lu execute=%lu",
-               __get_str(addr), __get_str(port), __entry->xid,
+       TP_printk("task:%u@%d xid=0x%08x %sv%d %s backlog=%lu rtt=%lu execute=%lu",
+               __entry->task_id, __entry->client_id, __entry->xid,
                __get_str(progname), __entry->version, __get_str(procname),
                __entry->backlog, __entry->rtt, __entry->execute)
 );
index bf6f82673492ca252c6537b1bd3817e7987840ef..f8260e5c79ad31d6e825cc796c15f686bcca25ef 100644 (file)
@@ -257,6 +257,33 @@ TRACE_EVENT(ufshcd_command,
        )
 );
 
+TRACE_EVENT(ufshcd_upiu,
+       TP_PROTO(const char *dev_name, const char *str, void *hdr, void *tsf),
+
+       TP_ARGS(dev_name, str, hdr, tsf),
+
+       TP_STRUCT__entry(
+               __string(dev_name, dev_name)
+               __string(str, str)
+               __array(unsigned char, hdr, 12)
+               __array(unsigned char, tsf, 16)
+       ),
+
+       TP_fast_assign(
+               __assign_str(dev_name, dev_name);
+               __assign_str(str, str);
+               memcpy(__entry->hdr, hdr, sizeof(__entry->hdr));
+               memcpy(__entry->tsf, tsf, sizeof(__entry->tsf));
+       ),
+
+       TP_printk(
+               "%s: %s: HDR:%s, CDB:%s",
+               __get_str(str), __get_str(dev_name),
+               __print_hex(__entry->hdr, sizeof(__entry->hdr)),
+               __print_hex(__entry->tsf, sizeof(__entry->tsf))
+       )
+);
+
 #endif /* if !defined(_TRACE_UFS_H) || defined(TRACE_HEADER_MULTI_READ) */
 
 /* This part must be outside protection */
index 2f057a494d93180dd0e3543b8c43ff7c4de5f629..9a761bc6a251a45358ef6d1a1b7ae3e678333561 100644 (file)
@@ -25,6 +25,8 @@ DECLARE_EVENT_CLASS(workqueue_work,
        TP_printk("work struct %p", __entry->work)
 );
 
+struct pool_workqueue;
+
 /**
  * workqueue_queue_work - called when a work gets queued
  * @req_cpu:   the requested cpu
index 7dd8f34c37dfea26f8ec460d3937a46c8f109f4d..fdcf88bcf0ea3dec3df105aa7e7d40fe2aaaaf0c 100644 (file)
@@ -352,22 +352,6 @@ DECLARE_EVENT_CLASS(xen_mmu_pgd,
 DEFINE_XEN_MMU_PGD_EVENT(xen_mmu_pgd_pin);
 DEFINE_XEN_MMU_PGD_EVENT(xen_mmu_pgd_unpin);
 
-TRACE_EVENT(xen_mmu_flush_tlb_all,
-           TP_PROTO(int x),
-           TP_ARGS(x),
-           TP_STRUCT__entry(__array(char, x, 0)),
-           TP_fast_assign((void)x),
-           TP_printk("%s", "")
-       );
-
-TRACE_EVENT(xen_mmu_flush_tlb,
-           TP_PROTO(int x),
-           TP_ARGS(x),
-           TP_STRUCT__entry(__array(char, x, 0)),
-           TP_fast_assign((void)x),
-           TP_printk("%s", "")
-       );
-
 TRACE_EVENT(xen_mmu_flush_tlb_one_user,
            TP_PROTO(unsigned long addr),
            TP_ARGS(addr),
index c8383a289f7b0983a3ff96acf7962dbed8280887..d94d333a82259cbf07f6725445f7768b96a24419 100644 (file)
@@ -96,6 +96,7 @@ enum bpf_cmd {
        BPF_PROG_QUERY,
        BPF_RAW_TRACEPOINT_OPEN,
        BPF_BTF_LOAD,
+       BPF_BTF_GET_FD_BY_ID,
 };
 
 enum bpf_map_type {
@@ -116,6 +117,8 @@ enum bpf_map_type {
        BPF_MAP_TYPE_DEVMAP,
        BPF_MAP_TYPE_SOCKMAP,
        BPF_MAP_TYPE_CPUMAP,
+       BPF_MAP_TYPE_XSKMAP,
+       BPF_MAP_TYPE_SOCKHASH,
 };
 
 enum bpf_prog_type {
@@ -343,6 +346,7 @@ union bpf_attr {
                        __u32           start_id;
                        __u32           prog_id;
                        __u32           map_id;
+                       __u32           btf_id;
                };
                __u32           next_id;
                __u32           open_flags;
@@ -377,403 +381,1527 @@ union bpf_attr {
        };
 } __attribute__((aligned(8)));
 
-/* BPF helper function descriptions:
- *
- * void *bpf_map_lookup_elem(&map, &key)
- *     Return: Map value or NULL
- *
- * int bpf_map_update_elem(&map, &key, &value, flags)
- *     Return: 0 on success or negative error
- *
- * int bpf_map_delete_elem(&map, &key)
- *     Return: 0 on success or negative error
- *
- * int bpf_probe_read(void *dst, int size, void *src)
- *     Return: 0 on success or negative error
+/* The description below is an attempt at providing documentation to eBPF
+ * developers about the multiple available eBPF helper functions. It can be
+ * parsed and used to produce a manual page. The workflow is the following,
+ * and requires the rst2man utility:
+ *
+ *     $ ./scripts/bpf_helpers_doc.py \
+ *             --filename include/uapi/linux/bpf.h > /tmp/bpf-helpers.rst
+ *     $ rst2man /tmp/bpf-helpers.rst > /tmp/bpf-helpers.7
+ *     $ man /tmp/bpf-helpers.7
+ *
+ * Note that in order to produce this external documentation, some RST
+ * formatting is used in the descriptions to get "bold" and "italics" in
+ * manual pages. Also note that the few trailing white spaces are
+ * intentional, removing them would break paragraphs for rst2man.
+ *
+ * Start of BPF helper function descriptions:
+ *
+ * void *bpf_map_lookup_elem(struct bpf_map *map, const void *key)
+ *     Description
+ *             Perform a lookup in *map* for an entry associated to *key*.
+ *     Return
+ *             Map value associated to *key*, or **NULL** if no entry was
+ *             found.
+ *
+ * int bpf_map_update_elem(struct bpf_map *map, const void *key, const void *value, u64 flags)
+ *     Description
+ *             Add or update the value of the entry associated to *key* in
+ *             *map* with *value*. *flags* is one of:
+ *
+ *             **BPF_NOEXIST**
+ *                     The entry for *key* must not exist in the map.
+ *             **BPF_EXIST**
+ *                     The entry for *key* must already exist in the map.
+ *             **BPF_ANY**
+ *                     No condition on the existence of the entry for *key*.
+ *
+ *             Flag value **BPF_NOEXIST** cannot be used for maps of types
+ *             **BPF_MAP_TYPE_ARRAY** or **BPF_MAP_TYPE_PERCPU_ARRAY**  (all
+ *             elements always exist), the helper would return an error.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * int bpf_map_delete_elem(struct bpf_map *map, const void *key)
+ *     Description
+ *             Delete entry with *key* from *map*.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * int bpf_probe_read(void *dst, u32 size, const void *src)
+ *     Description
+ *             For tracing programs, safely attempt to read *size* bytes from
+ *             address *src* and store the data in *dst*.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
  *
  * u64 bpf_ktime_get_ns(void)
- *     Return: current ktime
- *
- * int bpf_trace_printk(const char *fmt, int fmt_size, ...)
- *     Return: length of buffer written or negative error
- *
- * u32 bpf_prandom_u32(void)
- *     Return: random value
- *
- * u32 bpf_raw_smp_processor_id(void)
- *     Return: SMP processor ID
- *
- * int bpf_skb_store_bytes(skb, offset, from, len, flags)
- *     store bytes into packet
- *     @skb: pointer to skb
- *     @offset: offset within packet from skb->mac_header
- *     @from: pointer where to copy bytes from
- *     @len: number of bytes to store into packet
- *     @flags: bit 0 - if true, recompute skb->csum
- *             other bits - reserved
- *     Return: 0 on success or negative error
- *
- * int bpf_l3_csum_replace(skb, offset, from, to, flags)
- *     recompute IP checksum
- *     @skb: pointer to skb
- *     @offset: offset within packet where IP checksum is located
- *     @from: old value of header field
- *     @to: new value of header field
- *     @flags: bits 0-3 - size of header field
- *             other bits - reserved
- *     Return: 0 on success or negative error
- *
- * int bpf_l4_csum_replace(skb, offset, from, to, flags)
- *     recompute TCP/UDP checksum
- *     @skb: pointer to skb
- *     @offset: offset within packet where TCP/UDP checksum is located
- *     @from: old value of header field
- *     @to: new value of header field
- *     @flags: bits 0-3 - size of header field
- *             bit 4 - is pseudo header
- *             other bits - reserved
- *     Return: 0 on success or negative error
- *
- * int bpf_tail_call(ctx, prog_array_map, index)
- *     jump into another BPF program
- *     @ctx: context pointer passed to next program
- *     @prog_array_map: pointer to map which type is BPF_MAP_TYPE_PROG_ARRAY
- *     @index: 32-bit index inside array that selects specific program to run
- *     Return: 0 on success or negative error
- *
- * int bpf_clone_redirect(skb, ifindex, flags)
- *     redirect to another netdev
- *     @skb: pointer to skb
- *     @ifindex: ifindex of the net device
- *     @flags: bit 0 - if set, redirect to ingress instead of egress
- *             other bits - reserved
- *     Return: 0 on success or negative error
+ *     Description
+ *             Return the time elapsed since system boot, in nanoseconds.
+ *     Return
+ *             Current *ktime*.
+ *
+ * int bpf_trace_printk(const char *fmt, u32 fmt_size, ...)
+ *     Description
+ *             This helper is a "printk()-like" facility for debugging. It
+ *             prints a message defined by format *fmt* (of size *fmt_size*)
+ *             to file *\/sys/kernel/debug/tracing/trace* from DebugFS, if
+ *             available. It can take up to three additional **u64**
+ *             arguments (as an eBPF helpers, the total number of arguments is
+ *             limited to five).
+ *
+ *             Each time the helper is called, it appends a line to the trace.
+ *             The format of the trace is customizable, and the exact output
+ *             one will get depends on the options set in
+ *             *\/sys/kernel/debug/tracing/trace_options* (see also the
+ *             *README* file under the same directory). However, it usually
+ *             defaults to something like:
+ *
+ *             ::
+ *
+ *                     telnet-470   [001] .N.. 419421.045894: 0x00000001: <formatted msg>
+ *
+ *             In the above:
+ *
+ *                     * ``telnet`` is the name of the current task.
+ *                     * ``470`` is the PID of the current task.
+ *                     * ``001`` is the CPU number on which the task is
+ *                       running.
+ *                     * In ``.N..``, each character refers to a set of
+ *                       options (whether irqs are enabled, scheduling
+ *                       options, whether hard/softirqs are running, level of
+ *                       preempt_disabled respectively). **N** means that
+ *                       **TIF_NEED_RESCHED** and **PREEMPT_NEED_RESCHED**
+ *                       are set.
+ *                     * ``419421.045894`` is a timestamp.
+ *                     * ``0x00000001`` is a fake value used by BPF for the
+ *                       instruction pointer register.
+ *                     * ``<formatted msg>`` is the message formatted with
+ *                       *fmt*.
+ *
+ *             The conversion specifiers supported by *fmt* are similar, but
+ *             more limited than for printk(). They are **%d**, **%i**,
+ *             **%u**, **%x**, **%ld**, **%li**, **%lu**, **%lx**, **%lld**,
+ *             **%lli**, **%llu**, **%llx**, **%p**, **%s**. No modifier (size
+ *             of field, padding with zeroes, etc.) is available, and the
+ *             helper will return **-EINVAL** (but print nothing) if it
+ *             encounters an unknown specifier.
+ *
+ *             Also, note that **bpf_trace_printk**\ () is slow, and should
+ *             only be used for debugging purposes. For this reason, a notice
+ *             bloc (spanning several lines) is printed to kernel logs and
+ *             states that the helper should not be used "for production use"
+ *             the first time this helper is used (or more precisely, when
+ *             **trace_printk**\ () buffers are allocated). For passing values
+ *             to user space, perf events should be preferred.
+ *     Return
+ *             The number of bytes written to the buffer, or a negative error
+ *             in case of failure.
+ *
+ * u32 bpf_get_prandom_u32(void)
+ *     Description
+ *             Get a pseudo-random number.
+ *
+ *             From a security point of view, this helper uses its own
+ *             pseudo-random internal state, and cannot be used to infer the
+ *             seed of other random functions in the kernel. However, it is
+ *             essential to note that the generator used by the helper is not
+ *             cryptographically secure.
+ *     Return
+ *             A random 32-bit unsigned value.
+ *
+ * u32 bpf_get_smp_processor_id(void)
+ *     Description
+ *             Get the SMP (symmetric multiprocessing) processor id. Note that
+ *             all programs run with preemption disabled, which means that the
+ *             SMP processor id is stable during all the execution of the
+ *             program.
+ *     Return
+ *             The SMP id of the processor running the program.
+ *
+ * int bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags)
+ *     Description
+ *             Store *len* bytes from address *from* into the packet
+ *             associated to *skb*, at *offset*. *flags* are a combination of
+ *             **BPF_F_RECOMPUTE_CSUM** (automatically recompute the
+ *             checksum for the packet after storing the bytes) and
+ *             **BPF_F_INVALIDATE_HASH** (set *skb*\ **->hash**, *skb*\
+ *             **->swhash** and *skb*\ **->l4hash** to 0).
+ *
+ *             A call to this helper is susceptible to change the underlaying
+ *             packet buffer. Therefore, at load time, all checks on pointers
+ *             previously done by the verifier are invalidated and must be
+ *             performed again, if the helper is used in combination with
+ *             direct packet access.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * int bpf_l3_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 size)
+ *     Description
+ *             Recompute the layer 3 (e.g. IP) checksum for the packet
+ *             associated to *skb*. Computation is incremental, so the helper
+ *             must know the former value of the header field that was
+ *             modified (*from*), the new value of this field (*to*), and the
+ *             number of bytes (2 or 4) for this field, stored in *size*.
+ *             Alternatively, it is possible to store the difference between
+ *             the previous and the new values of the header field in *to*, by
+ *             setting *from* and *size* to 0. For both methods, *offset*
+ *             indicates the location of the IP checksum within the packet.
+ *
+ *             This helper works in combination with **bpf_csum_diff**\ (),
+ *             which does not update the checksum in-place, but offers more
+ *             flexibility and can handle sizes larger than 2 or 4 for the
+ *             checksum to update.
+ *
+ *             A call to this helper is susceptible to change the underlaying
+ *             packet buffer. Therefore, at load time, all checks on pointers
+ *             previously done by the verifier are invalidated and must be
+ *             performed again, if the helper is used in combination with
+ *             direct packet access.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * int bpf_l4_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 flags)
+ *     Description
+ *             Recompute the layer 4 (e.g. TCP, UDP or ICMP) checksum for the
+ *             packet associated to *skb*. Computation is incremental, so the
+ *             helper must know the former value of the header field that was
+ *             modified (*from*), the new value of this field (*to*), and the
+ *             number of bytes (2 or 4) for this field, stored on the lowest
+ *             four bits of *flags*. Alternatively, it is possible to store
+ *             the difference between the previous and the new values of the
+ *             header field in *to*, by setting *from* and the four lowest
+ *             bits of *flags* to 0. For both methods, *offset* indicates the
+ *             location of the IP checksum within the packet. In addition to
+ *             the size of the field, *flags* can be added (bitwise OR) actual
+ *             flags. With **BPF_F_MARK_MANGLED_0**, a null checksum is left
+ *             untouched (unless **BPF_F_MARK_ENFORCE** is added as well), and
+ *             for updates resulting in a null checksum the value is set to
+ *             **CSUM_MANGLED_0** instead. Flag **BPF_F_PSEUDO_HDR** indicates
+ *             the checksum is to be computed against a pseudo-header.
+ *
+ *             This helper works in combination with **bpf_csum_diff**\ (),
+ *             which does not update the checksum in-place, but offers more
+ *             flexibility and can handle sizes larger than 2 or 4 for the
+ *             checksum to update.
+ *
+ *             A call to this helper is susceptible to change the underlaying
+ *             packet buffer. Therefore, at load time, all checks on pointers
+ *             previously done by the verifier are invalidated and must be
+ *             performed again, if the helper is used in combination with
+ *             direct packet access.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * int bpf_tail_call(void *ctx, struct bpf_map *prog_array_map, u32 index)
+ *     Description
+ *             This special helper is used to trigger a "tail call", or in
+ *             other words, to jump into another eBPF program. The same stack
+ *             frame is used (but values on stack and in registers for the
+ *             caller are not accessible to the callee). This mechanism allows
+ *             for program chaining, either for raising the maximum number of
+ *             available eBPF instructions, or to execute given programs in
+ *             conditional blocks. For security reasons, there is an upper
+ *             limit to the number of successive tail calls that can be
+ *             performed.
+ *
+ *             Upon call of this helper, the program attempts to jump into a
+ *             program referenced at index *index* in *prog_array_map*, a
+ *             special map of type **BPF_MAP_TYPE_PROG_ARRAY**, and passes
+ *             *ctx*, a pointer to the context.
+ *
+ *             If the call succeeds, the kernel immediately runs the first
+ *             instruction of the new program. This is not a function call,
+ *             and it never returns to the previous program. If the call
+ *             fails, then the helper has no effect, and the caller continues
+ *             to run its subsequent instructions. A call can fail if the
+ *             destination program for the jump does not exist (i.e. *index*
+ *             is superior to the number of entries in *prog_array_map*), or
+ *             if the maximum number of tail calls has been reached for this
+ *             chain of programs. This limit is defined in the kernel by the
+ *             macro **MAX_TAIL_CALL_CNT** (not accessible to user space),
+ *             which is currently set to 32.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * int bpf_clone_redirect(struct sk_buff *skb, u32 ifindex, u64 flags)
+ *     Description
+ *             Clone and redirect the packet associated to *skb* to another
+ *             net device of index *ifindex*. Both ingress and egress
+ *             interfaces can be used for redirection. The **BPF_F_INGRESS**
+ *             value in *flags* is used to make the distinction (ingress path
+ *             is selected if the flag is present, egress path otherwise).
+ *             This is the only flag supported for now.
+ *
+ *             In comparison with **bpf_redirect**\ () helper,
+ *             **bpf_clone_redirect**\ () has the associated cost of
+ *             duplicating the packet buffer, but this can be executed out of
+ *             the eBPF program. Conversely, **bpf_redirect**\ () is more
+ *             efficient, but it is handled through an action code where the
+ *             redirection happens only after the eBPF program has returned.
+ *
+ *             A call to this helper is susceptible to change the underlaying
+ *             packet buffer. Therefore, at load time, all checks on pointers
+ *             previously done by the verifier are invalidated and must be
+ *             performed again, if the helper is used in combination with
+ *             direct packet access.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
  *
  * u64 bpf_get_current_pid_tgid(void)
- *     Return: current->tgid << 32 | current->pid
+ *     Return
+ *             A 64-bit integer containing the current tgid and pid, and
+ *             created as such:
+ *             *current_task*\ **->tgid << 32 \|**
+ *             *current_task*\ **->pid**.
  *
  * u64 bpf_get_current_uid_gid(void)
- *     Return: current_gid << 32 | current_uid
- *
- * int bpf_get_current_comm(char *buf, int size_of_buf)
- *     stores current->comm into buf
- *     Return: 0 on success or negative error
- *
- * u32 bpf_get_cgroup_classid(skb)
- *     retrieve a proc's classid
- *     @skb: pointer to skb
- *     Return: classid if != 0
- *
- * int bpf_skb_vlan_push(skb, vlan_proto, vlan_tci)
- *     Return: 0 on success or negative error
- *
- * int bpf_skb_vlan_pop(skb)
- *     Return: 0 on success or negative error
- *
- * int bpf_skb_get_tunnel_key(skb, key, size, flags)
- * int bpf_skb_set_tunnel_key(skb, key, size, flags)
- *     retrieve or populate tunnel metadata
- *     @skb: pointer to skb
- *     @key: pointer to 'struct bpf_tunnel_key'
- *     @size: size of 'struct bpf_tunnel_key'
- *     @flags: room for future extensions
- *     Return: 0 on success or negative error
- *
- * u64 bpf_perf_event_read(map, flags)
- *     read perf event counter value
- *     @map: pointer to perf_event_array map
- *     @flags: index of event in the map or bitmask flags
- *     Return: value of perf event counter read or error code
- *
- * int bpf_redirect(ifindex, flags)
- *     redirect to another netdev
- *     @ifindex: ifindex of the net device
- *     @flags:
- *       cls_bpf:
- *          bit 0 - if set, redirect to ingress instead of egress
- *          other bits - reserved
- *       xdp_bpf:
- *         all bits - reserved
- *     Return: cls_bpf: TC_ACT_REDIRECT on success or TC_ACT_SHOT on error
- *            xdp_bfp: XDP_REDIRECT on success or XDP_ABORT on error
- * int bpf_redirect_map(map, key, flags)
- *     redirect to endpoint in map
- *     @map: pointer to dev map
- *     @key: index in map to lookup
- *     @flags: --
- *     Return: XDP_REDIRECT on success or XDP_ABORT on error
- *
- * u32 bpf_get_route_realm(skb)
- *     retrieve a dst's tclassid
- *     @skb: pointer to skb
- *     Return: realm if != 0
- *
- * int bpf_perf_event_output(ctx, map, flags, data, size)
- *     output perf raw sample
- *     @ctx: struct pt_regs*
- *     @map: pointer to perf_event_array map
- *     @flags: index of event in the map or bitmask flags
- *     @data: data on stack to be output as raw data
- *     @size: size of data
- *     Return: 0 on success or negative error
- *
- * int bpf_get_stackid(ctx, map, flags)
- *     walk user or kernel stack and return id
- *     @ctx: struct pt_regs*
- *     @map: pointer to stack_trace map
- *     @flags: bits 0-7 - numer of stack frames to skip
- *             bit 8 - collect user stack instead of kernel
- *             bit 9 - compare stacks by hash only
- *             bit 10 - if two different stacks hash into the same stackid
- *                      discard old
- *             other bits - reserved
- *     Return: >= 0 stackid on success or negative error
- *
- * s64 bpf_csum_diff(from, from_size, to, to_size, seed)
- *     calculate csum diff
- *     @from: raw from buffer
- *     @from_size: length of from buffer
- *     @to: raw to buffer
- *     @to_size: length of to buffer
- *     @seed: optional seed
- *     Return: csum result or negative error code
- *
- * int bpf_skb_get_tunnel_opt(skb, opt, size)
- *     retrieve tunnel options metadata
- *     @skb: pointer to skb
- *     @opt: pointer to raw tunnel option data
- *     @size: size of @opt
- *     Return: option size
- *
- * int bpf_skb_set_tunnel_opt(skb, opt, size)
- *     populate tunnel options metadata
- *     @skb: pointer to skb
- *     @opt: pointer to raw tunnel option data
- *     @size: size of @opt
- *     Return: 0 on success or negative error
- *
- * int bpf_skb_change_proto(skb, proto, flags)
- *     Change protocol of the skb. Currently supported is v4 -> v6,
- *     v6 -> v4 transitions. The helper will also resize the skb. eBPF
- *     program is expected to fill the new headers via skb_store_bytes
- *     and lX_csum_replace.
- *     @skb: pointer to skb
- *     @proto: new skb->protocol type
- *     @flags: reserved
- *     Return: 0 on success or negative error
- *
- * int bpf_skb_change_type(skb, type)
- *     Change packet type of skb.
- *     @skb: pointer to skb
- *     @type: new skb->pkt_type type
- *     Return: 0 on success or negative error
- *
- * int bpf_skb_under_cgroup(skb, map, index)
- *     Check cgroup2 membership of skb
- *     @skb: pointer to skb
- *     @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
- *     @index: index of the cgroup in the bpf_map
- *     Return:
- *       == 0 skb failed the cgroup2 descendant test
- *       == 1 skb succeeded the cgroup2 descendant test
- *        < 0 error
- *
- * u32 bpf_get_hash_recalc(skb)
- *     Retrieve and possibly recalculate skb->hash.
- *     @skb: pointer to skb
- *     Return: hash
+ *     Return
+ *             A 64-bit integer containing the current GID and UID, and
+ *             created as such: *current_gid* **<< 32 \|** *current_uid*.
+ *
+ * int bpf_get_current_comm(char *buf, u32 size_of_buf)
+ *     Description
+ *             Copy the **comm** attribute of the current task into *buf* of
+ *             *size_of_buf*. The **comm** attribute contains the name of
+ *             the executable (excluding the path) for the current task. The
+ *             *size_of_buf* must be strictly positive. On success, the
+ *             helper makes sure that the *buf* is NUL-terminated. On failure,
+ *             it is filled with zeroes.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * u32 bpf_get_cgroup_classid(struct sk_buff *skb)
+ *     Description
+ *             Retrieve the classid for the current task, i.e. for the net_cls
+ *             cgroup to which *skb* belongs.
+ *
+ *             This helper can be used on TC egress path, but not on ingress.
+ *
+ *             The net_cls cgroup provides an interface to tag network packets
+ *             based on a user-provided identifier for all traffic coming from
+ *             the tasks belonging to the related cgroup. See also the related
+ *             kernel documentation, available from the Linux sources in file
+ *             *Documentation/cgroup-v1/net_cls.txt*.
+ *
+ *             The Linux kernel has two versions for cgroups: there are
+ *             cgroups v1 and cgroups v2. Both are available to users, who can
+ *             use a mixture of them, but note that the net_cls cgroup is for
+ *             cgroup v1 only. This makes it incompatible with BPF programs
+ *             run on cgroups, which is a cgroup-v2-only feature (a socket can
+ *             only hold data for one version of cgroups at a time).
+ *
+ *             This helper is only available is the kernel was compiled with
+ *             the **CONFIG_CGROUP_NET_CLASSID** configuration option set to
+ *             "**y**" or to "**m**".
+ *     Return
+ *             The classid, or 0 for the default unconfigured classid.
+ *
+ * int bpf_skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci)
+ *     Description
+ *             Push a *vlan_tci* (VLAN tag control information) of protocol
+ *             *vlan_proto* to the packet associated to *skb*, then update
+ *             the checksum. Note that if *vlan_proto* is different from
+ *             **ETH_P_8021Q** and **ETH_P_8021AD**, it is considered to
+ *             be **ETH_P_8021Q**.
+ *
+ *             A call to this helper is susceptible to change the underlaying
+ *             packet buffer. Therefore, at load time, all checks on pointers
+ *             previously done by the verifier are invalidated and must be
+ *             performed again, if the helper is used in combination with
+ *             direct packet access.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_vlan_pop(struct sk_buff *skb)
+ *     Description
+ *             Pop a VLAN header from the packet associated to *skb*.
+ *
+ *             A call to this helper is susceptible to change the underlaying
+ *             packet buffer. Therefore, at load time, all checks on pointers
+ *             previously done by the verifier are invalidated and must be
+ *             performed again, if the helper is used in combination with
+ *             direct packet access.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_get_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags)
+ *     Description
+ *             Get tunnel metadata. This helper takes a pointer *key* to an
+ *             empty **struct bpf_tunnel_key** of **size**, that will be
+ *             filled with tunnel metadata for the packet associated to *skb*.
+ *             The *flags* can be set to **BPF_F_TUNINFO_IPV6**, which
+ *             indicates that the tunnel is based on IPv6 protocol instead of
+ *             IPv4.
+ *
+ *             The **struct bpf_tunnel_key** is an object that generalizes the
+ *             principal parameters used by various tunneling protocols into a
+ *             single struct. This way, it can be used to easily make a
+ *             decision based on the contents of the encapsulation header,
+ *             "summarized" in this struct. In particular, it holds the IP
+ *             address of the remote end (IPv4 or IPv6, depending on the case)
+ *             in *key*\ **->remote_ipv4** or *key*\ **->remote_ipv6**. Also,
+ *             this struct exposes the *key*\ **->tunnel_id**, which is
+ *             generally mapped to a VNI (Virtual Network Identifier), making
+ *             it programmable together with the **bpf_skb_set_tunnel_key**\
+ *             () helper.
+ *
+ *             Let's imagine that the following code is part of a program
+ *             attached to the TC ingress interface, on one end of a GRE
+ *             tunnel, and is supposed to filter out all messages coming from
+ *             remote ends with IPv4 address other than 10.0.0.1:
+ *
+ *             ::
+ *
+ *                     int ret;
+ *                     struct bpf_tunnel_key key = {};
+ *                     
+ *                     ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
+ *                     if (ret < 0)
+ *                             return TC_ACT_SHOT;     // drop packet
+ *                     
+ *                     if (key.remote_ipv4 != 0x0a000001)
+ *                             return TC_ACT_SHOT;     // drop packet
+ *                     
+ *                     return TC_ACT_OK;               // accept packet
+ *
+ *             This interface can also be used with all encapsulation devices
+ *             that can operate in "collect metadata" mode: instead of having
+ *             one network device per specific configuration, the "collect
+ *             metadata" mode only requires a single device where the
+ *             configuration can be extracted from this helper.
+ *
+ *             This can be used together with various tunnels such as VXLan,
+ *             Geneve, GRE or IP in IP (IPIP).
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_set_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags)
+ *     Description
+ *             Populate tunnel metadata for packet associated to *skb.* The
+ *             tunnel metadata is set to the contents of *key*, of *size*. The
+ *             *flags* can be set to a combination of the following values:
+ *
+ *             **BPF_F_TUNINFO_IPV6**
+ *                     Indicate that the tunnel is based on IPv6 protocol
+ *                     instead of IPv4.
+ *             **BPF_F_ZERO_CSUM_TX**
+ *                     For IPv4 packets, add a flag to tunnel metadata
+ *                     indicating that checksum computation should be skipped
+ *                     and checksum set to zeroes.
+ *             **BPF_F_DONT_FRAGMENT**
+ *                     Add a flag to tunnel metadata indicating that the
+ *                     packet should not be fragmented.
+ *             **BPF_F_SEQ_NUMBER**
+ *                     Add a flag to tunnel metadata indicating that a
+ *                     sequence number should be added to tunnel header before
+ *                     sending the packet. This flag was added for GRE
+ *                     encapsulation, but might be used with other protocols
+ *                     as well in the future.
+ *
+ *             Here is a typical usage on the transmit path:
+ *
+ *             ::
+ *
+ *                     struct bpf_tunnel_key key;
+ *                          populate key ...
+ *                     bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0);
+ *                     bpf_clone_redirect(skb, vxlan_dev_ifindex, 0);
+ *
+ *             See also the description of the **bpf_skb_get_tunnel_key**\ ()
+ *             helper for additional information.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * u64 bpf_perf_event_read(struct bpf_map *map, u64 flags)
+ *     Description
+ *             Read the value of a perf event counter. This helper relies on a
+ *             *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. The nature of
+ *             the perf event counter is selected when *map* is updated with
+ *             perf event file descriptors. The *map* is an array whose size
+ *             is the number of available CPUs, and each cell contains a value
+ *             relative to one CPU. The value to retrieve is indicated by
+ *             *flags*, that contains the index of the CPU to look up, masked
+ *             with **BPF_F_INDEX_MASK**. Alternatively, *flags* can be set to
+ *             **BPF_F_CURRENT_CPU** to indicate that the value for the
+ *             current CPU should be retrieved.
+ *
+ *             Note that before Linux 4.13, only hardware perf event can be
+ *             retrieved.
+ *
+ *             Also, be aware that the newer helper
+ *             **bpf_perf_event_read_value**\ () is recommended over
+ *             **bpf_perf_event_read**\ () in general. The latter has some ABI
+ *             quirks where error and counter value are used as a return code
+ *             (which is wrong to do since ranges may overlap). This issue is
+ *             fixed with **bpf_perf_event_read_value**\ (), which at the same
+ *             time provides more features over the **bpf_perf_event_read**\
+ *             () interface. Please refer to the description of
+ *             **bpf_perf_event_read_value**\ () for details.
+ *     Return
+ *             The value of the perf event counter read from the map, or a
+ *             negative error code in case of failure.
+ *
+ * int bpf_redirect(u32 ifindex, u64 flags)
+ *     Description
+ *             Redirect the packet to another net device of index *ifindex*.
+ *             This helper is somewhat similar to **bpf_clone_redirect**\
+ *             (), except that the packet is not cloned, which provides
+ *             increased performance.
+ *
+ *             Except for XDP, both ingress and egress interfaces can be used
+ *             for redirection. The **BPF_F_INGRESS** value in *flags* is used
+ *             to make the distinction (ingress path is selected if the flag
+ *             is present, egress path otherwise). Currently, XDP only
+ *             supports redirection to the egress interface, and accepts no
+ *             flag at all.
+ *
+ *             The same effect can be attained with the more generic
+ *             **bpf_redirect_map**\ (), which requires specific maps to be
+ *             used but offers better performance.
+ *     Return
+ *             For XDP, the helper returns **XDP_REDIRECT** on success or
+ *             **XDP_ABORTED** on error. For other program types, the values
+ *             are **TC_ACT_REDIRECT** on success or **TC_ACT_SHOT** on
+ *             error.
+ *
+ * u32 bpf_get_route_realm(struct sk_buff *skb)
+ *     Description
+ *             Retrieve the realm or the route, that is to say the
+ *             **tclassid** field of the destination for the *skb*. The
+ *             indentifier retrieved is a user-provided tag, similar to the
+ *             one used with the net_cls cgroup (see description for
+ *             **bpf_get_cgroup_classid**\ () helper), but here this tag is
+ *             held by a route (a destination entry), not by a task.
+ *
+ *             Retrieving this identifier works with the clsact TC egress hook
+ *             (see also **tc-bpf(8)**), or alternatively on conventional
+ *             classful egress qdiscs, but not on TC ingress path. In case of
+ *             clsact TC egress hook, this has the advantage that, internally,
+ *             the destination entry has not been dropped yet in the transmit
+ *             path. Therefore, the destination entry does not need to be
+ *             artificially held via **netif_keep_dst**\ () for a classful
+ *             qdisc until the *skb* is freed.
+ *
+ *             This helper is available only if the kernel was compiled with
+ *             **CONFIG_IP_ROUTE_CLASSID** configuration option.
+ *     Return
+ *             The realm of the route for the packet associated to *skb*, or 0
+ *             if none was found.
+ *
+ * int bpf_perf_event_output(struct pt_reg *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
+ *     Description
+ *             Write raw *data* blob into a special BPF perf event held by
+ *             *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf
+ *             event must have the following attributes: **PERF_SAMPLE_RAW**
+ *             as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and
+ *             **PERF_COUNT_SW_BPF_OUTPUT** as **config**.
+ *
+ *             The *flags* are used to indicate the index in *map* for which
+ *             the value must be put, masked with **BPF_F_INDEX_MASK**.
+ *             Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU**
+ *             to indicate that the index of the current CPU core should be
+ *             used.
+ *
+ *             The value to write, of *size*, is passed through eBPF stack and
+ *             pointed by *data*.
+ *
+ *             The context of the program *ctx* needs also be passed to the
+ *             helper.
+ *
+ *             On user space, a program willing to read the values needs to
+ *             call **perf_event_open**\ () on the perf event (either for
+ *             one or for all CPUs) and to store the file descriptor into the
+ *             *map*. This must be done before the eBPF program can send data
+ *             into it. An example is available in file
+ *             *samples/bpf/trace_output_user.c* in the Linux kernel source
+ *             tree (the eBPF program counterpart is in
+ *             *samples/bpf/trace_output_kern.c*).
+ *
+ *             **bpf_perf_event_output**\ () achieves better performance
+ *             than **bpf_trace_printk**\ () for sharing data with user
+ *             space, and is much better suitable for streaming data from eBPF
+ *             programs.
+ *
+ *             Note that this helper is not restricted to tracing use cases
+ *             and can be used with programs attached to TC or XDP as well,
+ *             where it allows for passing data to user space listeners. Data
+ *             can be:
+ *
+ *             * Only custom structs,
+ *             * Only the packet payload, or
+ *             * A combination of both.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset, void *to, u32 len)
+ *     Description
+ *             This helper was provided as an easy way to load data from a
+ *             packet. It can be used to load *len* bytes from *offset* from
+ *             the packet associated to *skb*, into the buffer pointed by
+ *             *to*.
+ *
+ *             Since Linux 4.7, usage of this helper has mostly been replaced
+ *             by "direct packet access", enabling packet data to be
+ *             manipulated with *skb*\ **->data** and *skb*\ **->data_end**
+ *             pointing respectively to the first byte of packet data and to
+ *             the byte after the last byte of packet data. However, it
+ *             remains useful if one wishes to read large quantities of data
+ *             at once from a packet into the eBPF stack.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * int bpf_get_stackid(struct pt_reg *ctx, struct bpf_map *map, u64 flags)
+ *     Description
+ *             Walk a user or a kernel stack and return its id. To achieve
+ *             this, the helper needs *ctx*, which is a pointer to the context
+ *             on which the tracing program is executed, and a pointer to a
+ *             *map* of type **BPF_MAP_TYPE_STACK_TRACE**.
+ *
+ *             The last argument, *flags*, holds the number of stack frames to
+ *             skip (from 0 to 255), masked with
+ *             **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set
+ *             a combination of the following flags:
+ *
+ *             **BPF_F_USER_STACK**
+ *                     Collect a user space stack instead of a kernel stack.
+ *             **BPF_F_FAST_STACK_CMP**
+ *                     Compare stacks by hash only.
+ *             **BPF_F_REUSE_STACKID**
+ *                     If two different stacks hash into the same *stackid*,
+ *                     discard the old one.
+ *
+ *             The stack id retrieved is a 32 bit long integer handle which
+ *             can be further combined with other data (including other stack
+ *             ids) and used as a key into maps. This can be useful for
+ *             generating a variety of graphs (such as flame graphs or off-cpu
+ *             graphs).
+ *
+ *             For walking a stack, this helper is an improvement over
+ *             **bpf_probe_read**\ (), which can be used with unrolled loops
+ *             but is not efficient and consumes a lot of eBPF instructions.
+ *             Instead, **bpf_get_stackid**\ () can collect up to
+ *             **PERF_MAX_STACK_DEPTH** both kernel and user frames. Note that
+ *             this limit can be controlled with the **sysctl** program, and
+ *             that it should be manually increased in order to profile long
+ *             user stacks (such as stacks for Java programs). To do so, use:
+ *
+ *             ::
+ *
+ *                     # sysctl kernel.perf_event_max_stack=<new value>
+ *
+ *     Return
+ *             The positive or null stack id on success, or a negative error
+ *             in case of failure.
+ *
+ * s64 bpf_csum_diff(__be32 *from, u32 from_size, __be32 *to, u32 to_size, __wsum seed)
+ *     Description
+ *             Compute a checksum difference, from the raw buffer pointed by
+ *             *from*, of length *from_size* (that must be a multiple of 4),
+ *             towards the raw buffer pointed by *to*, of size *to_size*
+ *             (same remark). An optional *seed* can be added to the value
+ *             (this can be cascaded, the seed may come from a previous call
+ *             to the helper).
+ *
+ *             This is flexible enough to be used in several ways:
+ *
+ *             * With *from_size* == 0, *to_size* > 0 and *seed* set to
+ *               checksum, it can be used when pushing new data.
+ *             * With *from_size* > 0, *to_size* == 0 and *seed* set to
+ *               checksum, it can be used when removing data from a packet.
+ *             * With *from_size* > 0, *to_size* > 0 and *seed* set to 0, it
+ *               can be used to compute a diff. Note that *from_size* and
+ *               *to_size* do not need to be equal.
+ *
+ *             This helper can be used in combination with
+ *             **bpf_l3_csum_replace**\ () and **bpf_l4_csum_replace**\ (), to
+ *             which one can feed in the difference computed with
+ *             **bpf_csum_diff**\ ().
+ *     Return
+ *             The checksum result, or a negative error code in case of
+ *             failure.
+ *
+ * int bpf_skb_get_tunnel_opt(struct sk_buff *skb, u8 *opt, u32 size)
+ *     Description
+ *             Retrieve tunnel options metadata for the packet associated to
+ *             *skb*, and store the raw tunnel option data to the buffer *opt*
+ *             of *size*.
+ *
+ *             This helper can be used with encapsulation devices that can
+ *             operate in "collect metadata" mode (please refer to the related
+ *             note in the description of **bpf_skb_get_tunnel_key**\ () for
+ *             more details). A particular example where this can be used is
+ *             in combination with the Geneve encapsulation protocol, where it
+ *             allows for pushing (with **bpf_skb_get_tunnel_opt**\ () helper)
+ *             and retrieving arbitrary TLVs (Type-Length-Value headers) from
+ *             the eBPF program. This allows for full customization of these
+ *             headers.
+ *     Return
+ *             The size of the option data retrieved.
+ *
+ * int bpf_skb_set_tunnel_opt(struct sk_buff *skb, u8 *opt, u32 size)
+ *     Description
+ *             Set tunnel options metadata for the packet associated to *skb*
+ *             to the option data contained in the raw buffer *opt* of *size*.
+ *
+ *             See also the description of the **bpf_skb_get_tunnel_opt**\ ()
+ *             helper for additional information.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_change_proto(struct sk_buff *skb, __be16 proto, u64 flags)
+ *     Description
+ *             Change the protocol of the *skb* to *proto*. Currently
+ *             supported are transition from IPv4 to IPv6, and from IPv6 to
+ *             IPv4. The helper takes care of the groundwork for the
+ *             transition, including resizing the socket buffer. The eBPF
+ *             program is expected to fill the new headers, if any, via
+ *             **skb_store_bytes**\ () and to recompute the checksums with
+ *             **bpf_l3_csum_replace**\ () and **bpf_l4_csum_replace**\
+ *             (). The main case for this helper is to perform NAT64
+ *             operations out of an eBPF program.
+ *
+ *             Internally, the GSO type is marked as dodgy so that headers are
+ *             checked and segments are recalculated by the GSO/GRO engine.
+ *             The size for GSO target is adapted as well.
+ *
+ *             All values for *flags* are reserved for future usage, and must
+ *             be left at zero.
+ *
+ *             A call to this helper is susceptible to change the underlaying
+ *             packet buffer. Therefore, at load time, all checks on pointers
+ *             previously done by the verifier are invalidated and must be
+ *             performed again, if the helper is used in combination with
+ *             direct packet access.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_change_type(struct sk_buff *skb, u32 type)
+ *     Description
+ *             Change the packet type for the packet associated to *skb*. This
+ *             comes down to setting *skb*\ **->pkt_type** to *type*, except
+ *             the eBPF program does not have a write access to *skb*\
+ *             **->pkt_type** beside this helper. Using a helper here allows
+ *             for graceful handling of errors.
+ *
+ *             The major use case is to change incoming *skb*s to
+ *             **PACKET_HOST** in a programmatic way instead of having to
+ *             recirculate via **redirect**\ (..., **BPF_F_INGRESS**), for
+ *             example.
+ *
+ *             Note that *type* only allows certain values. At this time, they
+ *             are:
+ *
+ *             **PACKET_HOST**
+ *                     Packet is for us.
+ *             **PACKET_BROADCAST**
+ *                     Send packet to all.
+ *             **PACKET_MULTICAST**
+ *                     Send packet to group.
+ *             **PACKET_OTHERHOST**
+ *                     Send packet to someone else.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_under_cgroup(struct sk_buff *skb, struct bpf_map *map, u32 index)
+ *     Description
+ *             Check whether *skb* is a descendant of the cgroup2 held by
+ *             *map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*.
+ *     Return
+ *             The return value depends on the result of the test, and can be:
+ *
+ *             * 0, if the *skb* failed the cgroup2 descendant test.
+ *             * 1, if the *skb* succeeded the cgroup2 descendant test.
+ *             * A negative error code, if an error occurred.
+ *
+ * u32 bpf_get_hash_recalc(struct sk_buff *skb)
+ *     Description
+ *             Retrieve the hash of the packet, *skb*\ **->hash**. If it is
+ *             not set, in particular if the hash was cleared due to mangling,
+ *             recompute this hash. Later accesses to the hash can be done
+ *             directly with *skb*\ **->hash**.
+ *
+ *             Calling **bpf_set_hash_invalid**\ (), changing a packet
+ *             prototype with **bpf_skb_change_proto**\ (), or calling
+ *             **bpf_skb_store_bytes**\ () with the
+ *             **BPF_F_INVALIDATE_HASH** are actions susceptible to clear
+ *             the hash and to trigger a new computation for the next call to
+ *             **bpf_get_hash_recalc**\ ().
+ *     Return
+ *             The 32-bit hash.
  *
  * u64 bpf_get_current_task(void)
- *     Returns current task_struct
- *     Return: current
- *
- * int bpf_probe_write_user(void *dst, void *src, int len)
- *     safely attempt to write to a location
- *     @dst: destination address in userspace
- *     @src: source address on stack
- *     @len: number of bytes to copy
- *     Return: 0 on success or negative error
- *
- * int bpf_current_task_under_cgroup(map, index)
- *     Check cgroup2 membership of current task
- *     @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
- *     @index: index of the cgroup in the bpf_map
- *     Return:
- *       == 0 current failed the cgroup2 descendant test
- *       == 1 current succeeded the cgroup2 descendant test
- *        < 0 error
- *
- * int bpf_skb_change_tail(skb, len, flags)
- *     The helper will resize the skb to the given new size, to be used f.e.
- *     with control messages.
- *     @skb: pointer to skb
- *     @len: new skb length
- *     @flags: reserved
- *     Return: 0 on success or negative error
- *
- * int bpf_skb_pull_data(skb, len)
- *     The helper will pull in non-linear data in case the skb is non-linear
- *     and not all of len are part of the linear section. Only needed for
- *     read/write with direct packet access.
- *     @skb: pointer to skb
- *     @len: len to make read/writeable
- *     Return: 0 on success or negative error
- *
- * s64 bpf_csum_update(skb, csum)
- *     Adds csum into skb->csum in case of CHECKSUM_COMPLETE.
- *     @skb: pointer to skb
- *     @csum: csum to add
- *     Return: csum on success or negative error
- *
- * void bpf_set_hash_invalid(skb)
- *     Invalidate current skb->hash.
- *     @skb: pointer to skb
- *
- * int bpf_get_numa_node_id()
- *     Return: Id of current NUMA node.
- *
- * int bpf_skb_change_head()
- *     Grows headroom of skb and adjusts MAC header offset accordingly.
- *     Will extends/reallocae as required automatically.
- *     May change skb data pointer and will thus invalidate any check
- *     performed for direct packet access.
- *     @skb: pointer to skb
- *     @len: length of header to be pushed in front
- *     @flags: Flags (unused for now)
- *     Return: 0 on success or negative error
- *
- * int bpf_xdp_adjust_head(xdp_md, delta)
- *     Adjust the xdp_md.data by delta
- *     @xdp_md: pointer to xdp_md
- *     @delta: An positive/negative integer to be added to xdp_md.data
- *     Return: 0 on success or negative on error
+ *     Return
+ *             A pointer to the current task struct.
+ *
+ * int bpf_probe_write_user(void *dst, const void *src, u32 len)
+ *     Description
+ *             Attempt in a safe way to write *len* bytes from the buffer
+ *             *src* to *dst* in memory. It only works for threads that are in
+ *             user context, and *dst* must be a valid user space address.
+ *
+ *             This helper should not be used to implement any kind of
+ *             security mechanism because of TOC-TOU attacks, but rather to
+ *             debug, divert, and manipulate execution of semi-cooperative
+ *             processes.
+ *
+ *             Keep in mind that this feature is meant for experiments, and it
+ *             has a risk of crashing the system and running programs.
+ *             Therefore, when an eBPF program using this helper is attached,
+ *             a warning including PID and process name is printed to kernel
+ *             logs.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * int bpf_current_task_under_cgroup(struct bpf_map *map, u32 index)
+ *     Description
+ *             Check whether the probe is being run is the context of a given
+ *             subset of the cgroup2 hierarchy. The cgroup2 to test is held by
+ *             *map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*.
+ *     Return
+ *             The return value depends on the result of the test, and can be:
+ *
+ *             * 0, if the *skb* task belongs to the cgroup2.
+ *             * 1, if the *skb* task does not belong to the cgroup2.
+ *             * A negative error code, if an error occurred.
+ *
+ * int bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags)
+ *     Description
+ *             Resize (trim or grow) the packet associated to *skb* to the
+ *             new *len*. The *flags* are reserved for future usage, and must
+ *             be left at zero.
+ *
+ *             The basic idea is that the helper performs the needed work to
+ *             change the size of the packet, then the eBPF program rewrites
+ *             the rest via helpers like **bpf_skb_store_bytes**\ (),
+ *             **bpf_l3_csum_replace**\ (), **bpf_l3_csum_replace**\ ()
+ *             and others. This helper is a slow path utility intended for
+ *             replies with control messages. And because it is targeted for
+ *             slow path, the helper itself can afford to be slow: it
+ *             implicitly linearizes, unclones and drops offloads from the
+ *             *skb*.
+ *
+ *             A call to this helper is susceptible to change the underlaying
+ *             packet buffer. Therefore, at load time, all checks on pointers
+ *             previously done by the verifier are invalidated and must be
+ *             performed again, if the helper is used in combination with
+ *             direct packet access.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_pull_data(struct sk_buff *skb, u32 len)
+ *     Description
+ *             Pull in non-linear data in case the *skb* is non-linear and not
+ *             all of *len* are part of the linear section. Make *len* bytes
+ *             from *skb* readable and writable. If a zero value is passed for
+ *             *len*, then the whole length of the *skb* is pulled.
+ *
+ *             This helper is only needed for reading and writing with direct
+ *             packet access.
+ *
+ *             For direct packet access, testing that offsets to access
+ *             are within packet boundaries (test on *skb*\ **->data_end**) is
+ *             susceptible to fail if offsets are invalid, or if the requested
+ *             data is in non-linear parts of the *skb*. On failure the
+ *             program can just bail out, or in the case of a non-linear
+ *             buffer, use a helper to make the data available. The
+ *             **bpf_skb_load_bytes**\ () helper is a first solution to access
+ *             the data. Another one consists in using **bpf_skb_pull_data**
+ *             to pull in once the non-linear parts, then retesting and
+ *             eventually access the data.
+ *
+ *             At the same time, this also makes sure the *skb* is uncloned,
+ *             which is a necessary condition for direct write. As this needs
+ *             to be an invariant for the write part only, the verifier
+ *             detects writes and adds a prologue that is calling
+ *             **bpf_skb_pull_data()** to effectively unclone the *skb* from
+ *             the very beginning in case it is indeed cloned.
+ *
+ *             A call to this helper is susceptible to change the underlaying
+ *             packet buffer. Therefore, at load time, all checks on pointers
+ *             previously done by the verifier are invalidated and must be
+ *             performed again, if the helper is used in combination with
+ *             direct packet access.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * s64 bpf_csum_update(struct sk_buff *skb, __wsum csum)
+ *     Description
+ *             Add the checksum *csum* into *skb*\ **->csum** in case the
+ *             driver has supplied a checksum for the entire packet into that
+ *             field. Return an error otherwise. This helper is intended to be
+ *             used in combination with **bpf_csum_diff**\ (), in particular
+ *             when the checksum needs to be updated after data has been
+ *             written into the packet through direct packet access.
+ *     Return
+ *             The checksum on success, or a negative error code in case of
+ *             failure.
+ *
+ * void bpf_set_hash_invalid(struct sk_buff *skb)
+ *     Description
+ *             Invalidate the current *skb*\ **->hash**. It can be used after
+ *             mangling on headers through direct packet access, in order to
+ *             indicate that the hash is outdated and to trigger a
+ *             recalculation the next time the kernel tries to access this
+ *             hash or when the **bpf_get_hash_recalc**\ () helper is called.
+ *
+ * int bpf_get_numa_node_id(void)
+ *     Description
+ *             Return the id of the current NUMA node. The primary use case
+ *             for this helper is the selection of sockets for the local NUMA
+ *             node, when the program is attached to sockets using the
+ *             **SO_ATTACH_REUSEPORT_EBPF** option (see also **socket(7)**),
+ *             but the helper is also available to other eBPF program types,
+ *             similarly to **bpf_get_smp_processor_id**\ ().
+ *     Return
+ *             The id of current NUMA node.
+ *
+ * int bpf_skb_change_head(struct sk_buff *skb, u32 len, u64 flags)
+ *     Description
+ *             Grows headroom of packet associated to *skb* and adjusts the
+ *             offset of the MAC header accordingly, adding *len* bytes of
+ *             space. It automatically extends and reallocates memory as
+ *             required.
+ *
+ *             This helper can be used on a layer 3 *skb* to push a MAC header
+ *             for redirection into a layer 2 device.
+ *
+ *             All values for *flags* are reserved for future usage, and must
+ *             be left at zero.
+ *
+ *             A call to this helper is susceptible to change the underlaying
+ *             packet buffer. Therefore, at load time, all checks on pointers
+ *             previously done by the verifier are invalidated and must be
+ *             performed again, if the helper is used in combination with
+ *             direct packet access.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * int bpf_xdp_adjust_head(struct xdp_buff *xdp_md, int delta)
+ *     Description
+ *             Adjust (move) *xdp_md*\ **->data** by *delta* bytes. Note that
+ *             it is possible to use a negative value for *delta*. This helper
+ *             can be used to prepare the packet for pushing or popping
+ *             headers.
+ *
+ *             A call to this helper is susceptible to change the underlaying
+ *             packet buffer. Therefore, at load time, all checks on pointers
+ *             previously done by the verifier are invalidated and must be
+ *             performed again, if the helper is used in combination with
+ *             direct packet access.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
  *
  * int bpf_probe_read_str(void *dst, int size, const void *unsafe_ptr)
- *     Copy a NUL terminated string from unsafe address. In case the string
- *     length is smaller than size, the target is not padded with further NUL
- *     bytes. In case the string length is larger than size, just count-1
- *     bytes are copied and the last byte is set to NUL.
- *     @dst: destination address
- *     @size: maximum number of bytes to copy, including the trailing NUL
- *     @unsafe_ptr: unsafe address
- *     Return:
- *       > 0 length of the string including the trailing NUL on success
- *       < 0 error
- *
- * u64 bpf_get_socket_cookie(skb)
- *     Get the cookie for the socket stored inside sk_buff.
- *     @skb: pointer to skb
- *     Return: 8 Bytes non-decreasing number on success or 0 if the socket
- *     field is missing inside sk_buff
- *
- * u32 bpf_get_socket_uid(skb)
- *     Get the owner uid of the socket stored inside sk_buff.
- *     @skb: pointer to skb
- *     Return: uid of the socket owner on success or overflowuid if failed.
- *
- * u32 bpf_set_hash(skb, hash)
- *     Set full skb->hash.
- *     @skb: pointer to skb
- *     @hash: hash to set
- *
- * int bpf_setsockopt(bpf_socket, level, optname, optval, optlen)
- *     Calls setsockopt. Not all opts are available, only those with
- *     integer optvals plus TCP_CONGESTION.
- *     Supported levels: SOL_SOCKET and IPPROTO_TCP
- *     @bpf_socket: pointer to bpf_socket
- *     @level: SOL_SOCKET or IPPROTO_TCP
- *     @optname: option name
- *     @optval: pointer to option value
- *     @optlen: length of optval in bytes
- *     Return: 0 or negative error
- *
- * int bpf_getsockopt(bpf_socket, level, optname, optval, optlen)
- *     Calls getsockopt. Not all opts are available.
- *     Supported levels: IPPROTO_TCP
- *     @bpf_socket: pointer to bpf_socket
- *     @level: IPPROTO_TCP
- *     @optname: option name
- *     @optval: pointer to option value
- *     @optlen: length of optval in bytes
- *     Return: 0 or negative error
- *
- * int bpf_sock_ops_cb_flags_set(bpf_sock_ops, flags)
- *     Set callback flags for sock_ops
- *     @bpf_sock_ops: pointer to bpf_sock_ops_kern struct
- *     @flags: flags value
- *     Return: 0 for no error
- *             -EINVAL if there is no full tcp socket
- *             bits in flags that are not supported by current kernel
- *
- * int bpf_skb_adjust_room(skb, len_diff, mode, flags)
- *     Grow or shrink room in sk_buff.
- *     @skb: pointer to skb
- *     @len_diff: (signed) amount of room to grow/shrink
- *     @mode: operation mode (enum bpf_adj_room_mode)
- *     @flags: reserved for future use
- *     Return: 0 on success or negative error code
- *
- * int bpf_sk_redirect_map(map, key, flags)
- *     Redirect skb to a sock in map using key as a lookup key for the
- *     sock in map.
- *     @map: pointer to sockmap
- *     @key: key to lookup sock in map
- *     @flags: reserved for future use
- *     Return: SK_PASS
- *
- * int bpf_sock_map_update(skops, map, key, flags)
- *     @skops: pointer to bpf_sock_ops
- *     @map: pointer to sockmap to update
- *     @key: key to insert/update sock in map
- *     @flags: same flags as map update elem
- *
- * int bpf_xdp_adjust_meta(xdp_md, delta)
- *     Adjust the xdp_md.data_meta by delta
- *     @xdp_md: pointer to xdp_md
- *     @delta: An positive/negative integer to be added to xdp_md.data_meta
- *     Return: 0 on success or negative on error
- *
- * int bpf_perf_event_read_value(map, flags, buf, buf_size)
- *     read perf event counter value and perf event enabled/running time
- *     @map: pointer to perf_event_array map
- *     @flags: index of event in the map or bitmask flags
- *     @buf: buf to fill
- *     @buf_size: size of the buf
- *     Return: 0 on success or negative error code
- *
- * int bpf_perf_prog_read_value(ctx, buf, buf_size)
- *     read perf prog attached perf event counter and enabled/running time
- *     @ctx: pointer to ctx
- *     @buf: buf to fill
- *     @buf_size: size of the buf
- *     Return : 0 on success or negative error code
- *
- * int bpf_override_return(pt_regs, rc)
- *     @pt_regs: pointer to struct pt_regs
- *     @rc: the return value to set
- *
- * int bpf_msg_redirect_map(map, key, flags)
- *     Redirect msg to a sock in map using key as a lookup key for the
- *     sock in map.
- *     @map: pointer to sockmap
- *     @key: key to lookup sock in map
- *     @flags: reserved for future use
- *     Return: SK_PASS
- *
- * int bpf_bind(ctx, addr, addr_len)
- *     Bind socket to address. Only binding to IP is supported, no port can be
- *     set in addr.
- *     @ctx: pointer to context of type bpf_sock_addr
- *     @addr: pointer to struct sockaddr to bind socket to
- *     @addr_len: length of sockaddr structure
- *     Return: 0 on success or negative error code
- *
- * int bpf_xdp_adjust_tail(xdp_md, delta)
- *     Adjust the xdp_md.data_end by delta. Only shrinking of packet's
- *     size is supported.
- *     @xdp_md: pointer to xdp_md
- *     @delta: A negative integer to be added to xdp_md.data_end
- *     Return: 0 on success or negative on error
+ *     Description
+ *             Copy a NUL terminated string from an unsafe address
+ *             *unsafe_ptr* to *dst*. The *size* should include the
+ *             terminating NUL byte. In case the string length is smaller than
+ *             *size*, the target is not padded with further NUL bytes. If the
+ *             string length is larger than *size*, just *size*-1 bytes are
+ *             copied and the last byte is set to NUL.
+ *
+ *             On success, the length of the copied string is returned. This
+ *             makes this helper useful in tracing programs for reading
+ *             strings, and more importantly to get its length at runtime. See
+ *             the following snippet:
+ *
+ *             ::
+ *
+ *                     SEC("kprobe/sys_open")
+ *                     void bpf_sys_open(struct pt_regs *ctx)
+ *                     {
+ *                             char buf[PATHLEN]; // PATHLEN is defined to 256
+ *                             int res = bpf_probe_read_str(buf, sizeof(buf),
+ *                                                          ctx->di);
+ *
+ *                             // Consume buf, for example push it to
+ *                             // userspace via bpf_perf_event_output(); we
+ *                             // can use res (the string length) as event
+ *                             // size, after checking its boundaries.
+ *                     }
+ *
+ *             In comparison, using **bpf_probe_read()** helper here instead
+ *             to read the string would require to estimate the length at
+ *             compile time, and would often result in copying more memory
+ *             than necessary.
+ *
+ *             Another useful use case is when parsing individual process
+ *             arguments or individual environment variables navigating
+ *             *current*\ **->mm->arg_start** and *current*\
+ *             **->mm->env_start**: using this helper and the return value,
+ *             one can quickly iterate at the right offset of the memory area.
+ *     Return
+ *             On success, the strictly positive length of the string,
+ *             including the trailing NUL character. On error, a negative
+ *             value.
+ *
+ * u64 bpf_get_socket_cookie(struct sk_buff *skb)
+ *     Description
+ *             If the **struct sk_buff** pointed by *skb* has a known socket,
+ *             retrieve the cookie (generated by the kernel) of this socket.
+ *             If no cookie has been set yet, generate a new cookie. Once
+ *             generated, the socket cookie remains stable for the life of the
+ *             socket. This helper can be useful for monitoring per socket
+ *             networking traffic statistics as it provides a unique socket
+ *             identifier per namespace.
+ *     Return
+ *             A 8-byte long non-decreasing number on success, or 0 if the
+ *             socket field is missing inside *skb*.
+ *
+ * u32 bpf_get_socket_uid(struct sk_buff *skb)
+ *     Return
+ *             The owner UID of the socket associated to *skb*. If the socket
+ *             is **NULL**, or if it is not a full socket (i.e. if it is a
+ *             time-wait or a request socket instead), **overflowuid** value
+ *             is returned (note that **overflowuid** might also be the actual
+ *             UID value for the socket).
+ *
+ * u32 bpf_set_hash(struct sk_buff *skb, u32 hash)
+ *     Description
+ *             Set the full hash for *skb* (set the field *skb*\ **->hash**)
+ *             to value *hash*.
+ *     Return
+ *             0
+ *
+ * int bpf_setsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, char *optval, int optlen)
+ *     Description
+ *             Emulate a call to **setsockopt()** on the socket associated to
+ *             *bpf_socket*, which must be a full socket. The *level* at
+ *             which the option resides and the name *optname* of the option
+ *             must be specified, see **setsockopt(2)** for more information.
+ *             The option value of length *optlen* is pointed by *optval*.
+ *
+ *             This helper actually implements a subset of **setsockopt()**.
+ *             It supports the following *level*\ s:
+ *
+ *             * **SOL_SOCKET**, which supports the following *optname*\ s:
+ *               **SO_RCVBUF**, **SO_SNDBUF**, **SO_MAX_PACING_RATE**,
+ *               **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**.
+ *             * **IPPROTO_TCP**, which supports the following *optname*\ s:
+ *               **TCP_CONGESTION**, **TCP_BPF_IW**,
+ *               **TCP_BPF_SNDCWND_CLAMP**.
+ *             * **IPPROTO_IP**, which supports *optname* **IP_TOS**.
+ *             * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_adjust_room(struct sk_buff *skb, u32 len_diff, u32 mode, u64 flags)
+ *     Description
+ *             Grow or shrink the room for data in the packet associated to
+ *             *skb* by *len_diff*, and according to the selected *mode*.
+ *
+ *             There is a single supported mode at this time:
+ *
+ *             * **BPF_ADJ_ROOM_NET**: Adjust room at the network layer
+ *               (room space is added or removed below the layer 3 header).
+ *
+ *             All values for *flags* are reserved for future usage, and must
+ *             be left at zero.
+ *
+ *             A call to this helper is susceptible to change the underlaying
+ *             packet buffer. Therefore, at load time, all checks on pointers
+ *             previously done by the verifier are invalidated and must be
+ *             performed again, if the helper is used in combination with
+ *             direct packet access.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * int bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags)
+ *     Description
+ *             Redirect the packet to the endpoint referenced by *map* at
+ *             index *key*. Depending on its type, this *map* can contain
+ *             references to net devices (for forwarding packets through other
+ *             ports), or to CPUs (for redirecting XDP frames to another CPU;
+ *             but this is only implemented for native XDP (with driver
+ *             support) as of this writing).
+ *
+ *             All values for *flags* are reserved for future usage, and must
+ *             be left at zero.
+ *
+ *             When used to redirect packets to net devices, this helper
+ *             provides a high performance increase over **bpf_redirect**\ ().
+ *             This is due to various implementation details of the underlying
+ *             mechanisms, one of which is the fact that **bpf_redirect_map**\
+ *             () tries to send packet as a "bulk" to the device.
+ *     Return
+ *             **XDP_REDIRECT** on success, or **XDP_ABORTED** on error.
+ *
+ * int bpf_sk_redirect_map(struct bpf_map *map, u32 key, u64 flags)
+ *     Description
+ *             Redirect the packet to the socket referenced by *map* (of type
+ *             **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and
+ *             egress interfaces can be used for redirection. The
+ *             **BPF_F_INGRESS** value in *flags* is used to make the
+ *             distinction (ingress path is selected if the flag is present,
+ *             egress path otherwise). This is the only flag supported for now.
+ *     Return
+ *             **SK_PASS** on success, or **SK_DROP** on error.
+ *
+ * int bpf_sock_map_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags)
+ *     Description
+ *             Add an entry to, or update a *map* referencing sockets. The
+ *             *skops* is used as a new value for the entry associated to
+ *             *key*. *flags* is one of:
+ *
+ *             **BPF_NOEXIST**
+ *                     The entry for *key* must not exist in the map.
+ *             **BPF_EXIST**
+ *                     The entry for *key* must already exist in the map.
+ *             **BPF_ANY**
+ *                     No condition on the existence of the entry for *key*.
+ *
+ *             If the *map* has eBPF programs (parser and verdict), those will
+ *             be inherited by the socket being added. If the socket is
+ *             already attached to eBPF programs, this results in an error.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * int bpf_xdp_adjust_meta(struct xdp_buff *xdp_md, int delta)
+ *     Description
+ *             Adjust the address pointed by *xdp_md*\ **->data_meta** by
+ *             *delta* (which can be positive or negative). Note that this
+ *             operation modifies the address stored in *xdp_md*\ **->data**,
+ *             so the latter must be loaded only after the helper has been
+ *             called.
+ *
+ *             The use of *xdp_md*\ **->data_meta** is optional and programs
+ *             are not required to use it. The rationale is that when the
+ *             packet is processed with XDP (e.g. as DoS filter), it is
+ *             possible to push further meta data along with it before passing
+ *             to the stack, and to give the guarantee that an ingress eBPF
+ *             program attached as a TC classifier on the same device can pick
+ *             this up for further post-processing. Since TC works with socket
+ *             buffers, it remains possible to set from XDP the **mark** or
+ *             **priority** pointers, or other pointers for the socket buffer.
+ *             Having this scratch space generic and programmable allows for
+ *             more flexibility as the user is free to store whatever meta
+ *             data they need.
+ *
+ *             A call to this helper is susceptible to change the underlaying
+ *             packet buffer. Therefore, at load time, all checks on pointers
+ *             previously done by the verifier are invalidated and must be
+ *             performed again, if the helper is used in combination with
+ *             direct packet access.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * int bpf_perf_event_read_value(struct bpf_map *map, u64 flags, struct bpf_perf_event_value *buf, u32 buf_size)
+ *     Description
+ *             Read the value of a perf event counter, and store it into *buf*
+ *             of size *buf_size*. This helper relies on a *map* of type
+ *             **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. The nature of the perf event
+ *             counter is selected when *map* is updated with perf event file
+ *             descriptors. The *map* is an array whose size is the number of
+ *             available CPUs, and each cell contains a value relative to one
+ *             CPU. The value to retrieve is indicated by *flags*, that
+ *             contains the index of the CPU to look up, masked with
+ *             **BPF_F_INDEX_MASK**. Alternatively, *flags* can be set to
+ *             **BPF_F_CURRENT_CPU** to indicate that the value for the
+ *             current CPU should be retrieved.
+ *
+ *             This helper behaves in a way close to
+ *             **bpf_perf_event_read**\ () helper, save that instead of
+ *             just returning the value observed, it fills the *buf*
+ *             structure. This allows for additional data to be retrieved: in
+ *             particular, the enabled and running times (in *buf*\
+ *             **->enabled** and *buf*\ **->running**, respectively) are
+ *             copied. In general, **bpf_perf_event_read_value**\ () is
+ *             recommended over **bpf_perf_event_read**\ (), which has some
+ *             ABI issues and provides fewer functionalities.
+ *
+ *             These values are interesting, because hardware PMU (Performance
+ *             Monitoring Unit) counters are limited resources. When there are
+ *             more PMU based perf events opened than available counters,
+ *             kernel will multiplex these events so each event gets certain
+ *             percentage (but not all) of the PMU time. In case that
+ *             multiplexing happens, the number of samples or counter value
+ *             will not reflect the case compared to when no multiplexing
+ *             occurs. This makes comparison between different runs difficult.
+ *             Typically, the counter value should be normalized before
+ *             comparing to other experiments. The usual normalization is done
+ *             as follows.
+ *
+ *             ::
+ *
+ *                     normalized_counter = counter * t_enabled / t_running
+ *
+ *             Where t_enabled is the time enabled for event and t_running is
+ *             the time running for event since last normalization. The
+ *             enabled and running times are accumulated since the perf event
+ *             open. To achieve scaling factor between two invocations of an
+ *             eBPF program, users can can use CPU id as the key (which is
+ *             typical for perf array usage model) to remember the previous
+ *             value and do the calculation inside the eBPF program.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * int bpf_perf_prog_read_value(struct bpf_perf_event_data *ctx, struct bpf_perf_event_value *buf, u32 buf_size)
+ *     Description
+ *             For en eBPF program attached to a perf event, retrieve the
+ *             value of the event counter associated to *ctx* and store it in
+ *             the structure pointed by *buf* and of size *buf_size*. Enabled
+ *             and running times are also stored in the structure (see
+ *             description of helper **bpf_perf_event_read_value**\ () for
+ *             more details).
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * int bpf_getsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, char *optval, int optlen)
+ *     Description
+ *             Emulate a call to **getsockopt()** on the socket associated to
+ *             *bpf_socket*, which must be a full socket. The *level* at
+ *             which the option resides and the name *optname* of the option
+ *             must be specified, see **getsockopt(2)** for more information.
+ *             The retrieved value is stored in the structure pointed by
+ *             *opval* and of length *optlen*.
+ *
+ *             This helper actually implements a subset of **getsockopt()**.
+ *             It supports the following *level*\ s:
+ *
+ *             * **IPPROTO_TCP**, which supports *optname*
+ *               **TCP_CONGESTION**.
+ *             * **IPPROTO_IP**, which supports *optname* **IP_TOS**.
+ *             * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * int bpf_override_return(struct pt_reg *regs, u64 rc)
+ *     Description
+ *             Used for error injection, this helper uses kprobes to override
+ *             the return value of the probed function, and to set it to *rc*.
+ *             The first argument is the context *regs* on which the kprobe
+ *             works.
+ *
+ *             This helper works by setting setting the PC (program counter)
+ *             to an override function which is run in place of the original
+ *             probed function. This means the probed function is not run at
+ *             all. The replacement function just returns with the required
+ *             value.
+ *
+ *             This helper has security implications, and thus is subject to
+ *             restrictions. It is only available if the kernel was compiled
+ *             with the **CONFIG_BPF_KPROBE_OVERRIDE** configuration
+ *             option, and in this case it only works on functions tagged with
+ *             **ALLOW_ERROR_INJECTION** in the kernel code.
+ *
+ *             Also, the helper is only available for the architectures having
+ *             the CONFIG_FUNCTION_ERROR_INJECTION option. As of this writing,
+ *             x86 architecture is the only one to support this feature.
+ *     Return
+ *             0
+ *
+ * int bpf_sock_ops_cb_flags_set(struct bpf_sock_ops *bpf_sock, int argval)
+ *     Description
+ *             Attempt to set the value of the **bpf_sock_ops_cb_flags** field
+ *             for the full TCP socket associated to *bpf_sock_ops* to
+ *             *argval*.
+ *
+ *             The primary use of this field is to determine if there should
+ *             be calls to eBPF programs of type
+ *             **BPF_PROG_TYPE_SOCK_OPS** at various points in the TCP
+ *             code. A program of the same type can change its value, per
+ *             connection and as necessary, when the connection is
+ *             established. This field is directly accessible for reading, but
+ *             this helper must be used for updates in order to return an
+ *             error if an eBPF program tries to set a callback that is not
+ *             supported in the current kernel.
+ *
+ *             The supported callback values that *argval* can combine are:
+ *
+ *             * **BPF_SOCK_OPS_RTO_CB_FLAG** (retransmission time out)
+ *             * **BPF_SOCK_OPS_RETRANS_CB_FLAG** (retransmission)
+ *             * **BPF_SOCK_OPS_STATE_CB_FLAG** (TCP state change)
+ *
+ *             Here are some examples of where one could call such eBPF
+ *             program:
+ *
+ *             * When RTO fires.
+ *             * When a packet is retransmitted.
+ *             * When the connection terminates.
+ *             * When a packet is sent.
+ *             * When a packet is received.
+ *     Return
+ *             Code **-EINVAL** if the socket is not a full TCP socket;
+ *             otherwise, a positive number containing the bits that could not
+ *             be set is returned (which comes down to 0 if all bits were set
+ *             as required).
+ *
+ * int bpf_msg_redirect_map(struct sk_msg_buff *msg, struct bpf_map *map, u32 key, u64 flags)
+ *     Description
+ *             This helper is used in programs implementing policies at the
+ *             socket level. If the message *msg* is allowed to pass (i.e. if
+ *             the verdict eBPF program returns **SK_PASS**), redirect it to
+ *             the socket referenced by *map* (of type
+ *             **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and
+ *             egress interfaces can be used for redirection. The
+ *             **BPF_F_INGRESS** value in *flags* is used to make the
+ *             distinction (ingress path is selected if the flag is present,
+ *             egress path otherwise). This is the only flag supported for now.
+ *     Return
+ *             **SK_PASS** on success, or **SK_DROP** on error.
+ *
+ * int bpf_msg_apply_bytes(struct sk_msg_buff *msg, u32 bytes)
+ *     Description
+ *             For socket policies, apply the verdict of the eBPF program to
+ *             the next *bytes* (number of bytes) of message *msg*.
+ *
+ *             For example, this helper can be used in the following cases:
+ *
+ *             * A single **sendmsg**\ () or **sendfile**\ () system call
+ *               contains multiple logical messages that the eBPF program is
+ *               supposed to read and for which it should apply a verdict.
+ *             * An eBPF program only cares to read the first *bytes* of a
+ *               *msg*. If the message has a large payload, then setting up
+ *               and calling the eBPF program repeatedly for all bytes, even
+ *               though the verdict is already known, would create unnecessary
+ *               overhead.
+ *
+ *             When called from within an eBPF program, the helper sets a
+ *             counter internal to the BPF infrastructure, that is used to
+ *             apply the last verdict to the next *bytes*. If *bytes* is
+ *             smaller than the current data being processed from a
+ *             **sendmsg**\ () or **sendfile**\ () system call, the first
+ *             *bytes* will be sent and the eBPF program will be re-run with
+ *             the pointer for start of data pointing to byte number *bytes*
+ *             **+ 1**. If *bytes* is larger than the current data being
+ *             processed, then the eBPF verdict will be applied to multiple
+ *             **sendmsg**\ () or **sendfile**\ () calls until *bytes* are
+ *             consumed.
+ *
+ *             Note that if a socket closes with the internal counter holding
+ *             a non-zero value, this is not a problem because data is not
+ *             being buffered for *bytes* and is sent as it is received.
+ *     Return
+ *             0
+ *
+ * int bpf_msg_cork_bytes(struct sk_msg_buff *msg, u32 bytes)
+ *     Description
+ *             For socket policies, prevent the execution of the verdict eBPF
+ *             program for message *msg* until *bytes* (byte number) have been
+ *             accumulated.
+ *
+ *             This can be used when one needs a specific number of bytes
+ *             before a verdict can be assigned, even if the data spans
+ *             multiple **sendmsg**\ () or **sendfile**\ () calls. The extreme
+ *             case would be a user calling **sendmsg**\ () repeatedly with
+ *             1-byte long message segments. Obviously, this is bad for
+ *             performance, but it is still valid. If the eBPF program needs
+ *             *bytes* bytes to validate a header, this helper can be used to
+ *             prevent the eBPF program to be called again until *bytes* have
+ *             been accumulated.
+ *     Return
+ *             0
+ *
+ * int bpf_msg_pull_data(struct sk_msg_buff *msg, u32 start, u32 end, u64 flags)
+ *     Description
+ *             For socket policies, pull in non-linear data from user space
+ *             for *msg* and set pointers *msg*\ **->data** and *msg*\
+ *             **->data_end** to *start* and *end* bytes offsets into *msg*,
+ *             respectively.
+ *
+ *             If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a
+ *             *msg* it can only parse data that the (**data**, **data_end**)
+ *             pointers have already consumed. For **sendmsg**\ () hooks this
+ *             is likely the first scatterlist element. But for calls relying
+ *             on the **sendpage** handler (e.g. **sendfile**\ ()) this will
+ *             be the range (**0**, **0**) because the data is shared with
+ *             user space and by default the objective is to avoid allowing
+ *             user space to modify data while (or after) eBPF verdict is
+ *             being decided. This helper can be used to pull in data and to
+ *             set the start and end pointer to given values. Data will be
+ *             copied if necessary (i.e. if data was not linear and if start
+ *             and end pointers do not point to the same chunk).
+ *
+ *             A call to this helper is susceptible to change the underlaying
+ *             packet buffer. Therefore, at load time, all checks on pointers
+ *             previously done by the verifier are invalidated and must be
+ *             performed again, if the helper is used in combination with
+ *             direct packet access.
+ *
+ *             All values for *flags* are reserved for future usage, and must
+ *             be left at zero.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * int bpf_bind(struct bpf_sock_addr *ctx, struct sockaddr *addr, int addr_len)
+ *     Description
+ *             Bind the socket associated to *ctx* to the address pointed by
+ *             *addr*, of length *addr_len*. This allows for making outgoing
+ *             connection from the desired IP address, which can be useful for
+ *             example when all processes inside a cgroup should use one
+ *             single IP address on a host that has multiple IP configured.
+ *
+ *             This helper works for IPv4 and IPv6, TCP and UDP sockets. The
+ *             domain (*addr*\ **->sa_family**) must be **AF_INET** (or
+ *             **AF_INET6**). Looking for a free port to bind to can be
+ *             expensive, therefore binding to port is not permitted by the
+ *             helper: *addr*\ **->sin_port** (or **sin6_port**, respectively)
+ *             must be set to zero.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * int bpf_xdp_adjust_tail(struct xdp_buff *xdp_md, int delta)
+ *     Description
+ *             Adjust (move) *xdp_md*\ **->data_end** by *delta* bytes. It is
+ *             only possible to shrink the packet as of this writing,
+ *             therefore *delta* must be a negative integer.
+ *
+ *             A call to this helper is susceptible to change the underlaying
+ *             packet buffer. Therefore, at load time, all checks on pointers
+ *             previously done by the verifier are invalidated and must be
+ *             performed again, if the helper is used in combination with
+ *             direct packet access.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_get_xfrm_state(struct sk_buff *skb, u32 index, struct bpf_xfrm_state *xfrm_state, u32 size, u64 flags)
+ *     Description
+ *             Retrieve the XFRM state (IP transform framework, see also
+ *             **ip-xfrm(8)**) at *index* in XFRM "security path" for *skb*.
+ *
+ *             The retrieved value is stored in the **struct bpf_xfrm_state**
+ *             pointed by *xfrm_state* and of length *size*.
+ *
+ *             All values for *flags* are reserved for future usage, and must
+ *             be left at zero.
+ *
+ *             This helper is available only if the kernel was compiled with
+ *             **CONFIG_XFRM** configuration option.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * int bpf_get_stack(struct pt_regs *regs, void *buf, u32 size, u64 flags)
+ *     Description
+ *             Return a user or a kernel stack in bpf program provided buffer.
+ *             To achieve this, the helper needs *ctx*, which is a pointer
+ *             to the context on which the tracing program is executed.
+ *             To store the stacktrace, the bpf program provides *buf* with
+ *             a nonnegative *size*.
+ *
+ *             The last argument, *flags*, holds the number of stack frames to
+ *             skip (from 0 to 255), masked with
+ *             **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set
+ *             the following flags:
+ *
+ *             **BPF_F_USER_STACK**
+ *                     Collect a user space stack instead of a kernel stack.
+ *             **BPF_F_USER_BUILD_ID**
+ *                     Collect buildid+offset instead of ips for user stack,
+ *                     only valid if **BPF_F_USER_STACK** is also specified.
+ *
+ *             **bpf_get_stack**\ () can collect up to
+ *             **PERF_MAX_STACK_DEPTH** both kernel and user frames, subject
+ *             to sufficient large buffer size. Note that
+ *             this limit can be controlled with the **sysctl** program, and
+ *             that it should be manually increased in order to profile long
+ *             user stacks (such as stacks for Java programs). To do so, use:
+ *
+ *             ::
+ *
+ *                     # sysctl kernel.perf_event_max_stack=<new value>
+ *
+ *     Return
+ *             a non-negative value equal to or less than size on success, or
+ *             a negative error in case of failure.
+ *
+ * int skb_load_bytes_relative(const struct sk_buff *skb, u32 offset, void *to, u32 len, u32 start_header)
+ *     Description
+ *             This helper is similar to **bpf_skb_load_bytes**\ () in that
+ *             it provides an easy way to load *len* bytes from *offset*
+ *             from the packet associated to *skb*, into the buffer pointed
+ *             by *to*. The difference to **bpf_skb_load_bytes**\ () is that
+ *             a fifth argument *start_header* exists in order to select a
+ *             base offset to start from. *start_header* can be one of:
+ *
+ *             **BPF_HDR_START_MAC**
+ *                     Base offset to load data from is *skb*'s mac header.
+ *             **BPF_HDR_START_NET**
+ *                     Base offset to load data from is *skb*'s network header.
+ *
+ *             In general, "direct packet access" is the preferred method to
+ *             access packet data, however, this helper is in particular useful
+ *             in socket filters where *skb*\ **->data** does not always point
+ *             to the start of the mac header and where "direct packet access"
+ *             is not available.
+ *
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * int bpf_fib_lookup(void *ctx, struct bpf_fib_lookup *params, int plen, u32 flags)
+ *     Description
+ *             Do FIB lookup in kernel tables using parameters in *params*.
+ *             If lookup is successful and result shows packet is to be
+ *             forwarded, the neighbor tables are searched for the nexthop.
+ *             If successful (ie., FIB lookup shows forwarding and nexthop
+ *             is resolved), the nexthop address is returned in ipv4_dst,
+ *             ipv6_dst or mpls_out based on family, smac is set to mac
+ *             address of egress device, dmac is set to nexthop mac address,
+ *             rt_metric is set to metric from route.
+ *
+ *             *plen* argument is the size of the passed in struct.
+ *             *flags* argument can be one or more BPF_FIB_LOOKUP_ flags:
+ *
+ *             **BPF_FIB_LOOKUP_DIRECT** means do a direct table lookup vs
+ *             full lookup using FIB rules
+ *             **BPF_FIB_LOOKUP_OUTPUT** means do lookup from an egress
+ *             perspective (default is ingress)
+ *
+ *             *ctx* is either **struct xdp_md** for XDP programs or
+ *             **struct sk_buff** tc cls_act programs.
+ *
+ *     Return
+ *             Egress device index on success, 0 if packet needs to continue
+ *             up the stack for further processing or a negative error in case
+ *             of failure.
+ *
+ * int bpf_sock_hash_update(struct bpf_sock_ops_kern *skops, struct bpf_map *map, void *key, u64 flags)
+ *     Description
+ *             Add an entry to, or update a sockhash *map* referencing sockets.
+ *             The *skops* is used as a new value for the entry associated to
+ *             *key*. *flags* is one of:
+ *
+ *             **BPF_NOEXIST**
+ *                     The entry for *key* must not exist in the map.
+ *             **BPF_EXIST**
+ *                     The entry for *key* must already exist in the map.
+ *             **BPF_ANY**
+ *                     No condition on the existence of the entry for *key*.
+ *
+ *             If the *map* has eBPF programs (parser and verdict), those will
+ *             be inherited by the socket being added. If the socket is
+ *             already attached to eBPF programs, this results in an error.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * int bpf_msg_redirect_hash(struct sk_msg_buff *msg, struct bpf_map *map, void *key, u64 flags)
+ *     Description
+ *             This helper is used in programs implementing policies at the
+ *             socket level. If the message *msg* is allowed to pass (i.e. if
+ *             the verdict eBPF program returns **SK_PASS**), redirect it to
+ *             the socket referenced by *map* (of type
+ *             **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and
+ *             egress interfaces can be used for redirection. The
+ *             **BPF_F_INGRESS** value in *flags* is used to make the
+ *             distinction (ingress path is selected if the flag is present,
+ *             egress path otherwise). This is the only flag supported for now.
+ *     Return
+ *             **SK_PASS** on success, or **SK_DROP** on error.
+ *
+ * int bpf_sk_redirect_hash(struct sk_buff *skb, struct bpf_map *map, void *key, u64 flags)
+ *     Description
+ *             This helper is used in programs implementing policies at the
+ *             skb socket level. If the sk_buff *skb* is allowed to pass (i.e.
+ *             if the verdeict eBPF program returns **SK_PASS**), redirect it
+ *             to the socket referenced by *map* (of type
+ *             **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and
+ *             egress interfaces can be used for redirection. The
+ *             **BPF_F_INGRESS** value in *flags* is used to make the
+ *             distinction (ingress path is selected if the flag is present,
+ *             egress otherwise). This is the only flag supported for now.
+ *     Return
+ *             **SK_PASS** on success, or **SK_DROP** on error.
  */
 #define __BPF_FUNC_MAPPER(FN)          \
        FN(unspec),                     \
@@ -841,7 +1969,14 @@ union bpf_attr {
        FN(msg_cork_bytes),             \
        FN(msg_pull_data),              \
        FN(bind),                       \
-       FN(xdp_adjust_tail),
+       FN(xdp_adjust_tail),            \
+       FN(skb_get_xfrm_state),         \
+       FN(get_stack),                  \
+       FN(skb_load_bytes_relative),    \
+       FN(fib_lookup),                 \
+       FN(sock_hash_update),           \
+       FN(msg_redirect_hash),          \
+       FN(sk_redirect_hash),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -875,11 +2010,14 @@ enum bpf_func_id {
 /* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */
 #define BPF_F_TUNINFO_IPV6             (1ULL << 0)
 
-/* BPF_FUNC_get_stackid flags. */
+/* flags for both BPF_FUNC_get_stackid and BPF_FUNC_get_stack. */
 #define BPF_F_SKIP_FIELD_MASK          0xffULL
 #define BPF_F_USER_STACK               (1ULL << 8)
+/* flags used by BPF_FUNC_get_stackid only. */
 #define BPF_F_FAST_STACK_CMP           (1ULL << 9)
 #define BPF_F_REUSE_STACKID            (1ULL << 10)
+/* flags used by BPF_FUNC_get_stack only. */
+#define BPF_F_USER_BUILD_ID            (1ULL << 11)
 
 /* BPF_FUNC_skb_set_tunnel_key flags. */
 #define BPF_F_ZERO_CSUM_TX             (1ULL << 1)
@@ -899,6 +2037,12 @@ enum bpf_adj_room_mode {
        BPF_ADJ_ROOM_NET,
 };
 
+/* Mode for BPF_FUNC_skb_load_bytes_relative helper. */
+enum bpf_hdr_start_off {
+       BPF_HDR_START_MAC,
+       BPF_HDR_START_NET,
+};
+
 /* user accessible mirror of in-kernel sk_buff.
  * new fields can only be added to the end of this structure
  */
@@ -947,6 +2091,19 @@ struct bpf_tunnel_key {
        __u32 tunnel_label;
 };
 
+/* user accessible mirror of in-kernel xfrm_state.
+ * new fields can only be added to the end of this structure
+ */
+struct bpf_xfrm_state {
+       __u32 reqid;
+       __u32 spi;      /* Stored in network byte order */
+       __u16 family;
+       union {
+               __u32 remote_ipv4;      /* Stored in network byte order */
+               __u32 remote_ipv6[4];   /* Stored in network byte order */
+       };
+};
+
 /* Generic BPF return codes which all BPF program types may support.
  * The values are binary compatible with their TC_ACT_* counter-part to
  * provide backwards compatibility with existing SCHED_CLS and SCHED_ACT
@@ -1037,6 +2194,7 @@ struct bpf_prog_info {
        __aligned_u64 map_ids;
        char name[BPF_OBJ_NAME_LEN];
        __u32 ifindex;
+       __u32 gpl_compatible:1;
        __u64 netns_dev;
        __u64 netns_ino;
 } __attribute__((aligned(8)));
@@ -1052,6 +2210,15 @@ struct bpf_map_info {
        __u32 ifindex;
        __u64 netns_dev;
        __u64 netns_ino;
+       __u32 btf_id;
+       __u32 btf_key_id;
+       __u32 btf_value_id;
+} __attribute__((aligned(8)));
+
+struct bpf_btf_info {
+       __aligned_u64 btf;
+       __u32 btf_size;
+       __u32 id;
 } __attribute__((aligned(8)));
 
 /* User bpf_sock_addr struct to access socket fields and sockaddr struct passed
@@ -1232,4 +2399,55 @@ struct bpf_raw_tracepoint_args {
        __u64 args[0];
 };
 
+/* DIRECT:  Skip the FIB rules and go to FIB table associated with device
+ * OUTPUT:  Do lookup from egress perspective; default is ingress
+ */
+#define BPF_FIB_LOOKUP_DIRECT  BIT(0)
+#define BPF_FIB_LOOKUP_OUTPUT  BIT(1)
+
+struct bpf_fib_lookup {
+       /* input */
+       __u8    family;   /* network family, AF_INET, AF_INET6, AF_MPLS */
+
+       /* set if lookup is to consider L4 data - e.g., FIB rules */
+       __u8    l4_protocol;
+       __be16  sport;
+       __be16  dport;
+
+       /* total length of packet from network header - used for MTU check */
+       __u16   tot_len;
+       __u32   ifindex;  /* L3 device index for lookup */
+
+       union {
+               /* inputs to lookup */
+               __u8    tos;            /* AF_INET  */
+               __be32  flowlabel;      /* AF_INET6 */
+
+               /* output: metric of fib result */
+               __u32 rt_metric;
+       };
+
+       union {
+               __be32          mpls_in;
+               __be32          ipv4_src;
+               __u32           ipv6_src[4];  /* in6_addr; network order */
+       };
+
+       /* input to bpf_fib_lookup, *dst is destination address.
+        * output: bpf_fib_lookup sets to gateway address
+        */
+       union {
+               /* return for MPLS lookups */
+               __be32          mpls_out[4];  /* support up to 4 labels */
+               __be32          ipv4_dst;
+               __u32           ipv6_dst[4];  /* in6_addr; network order */
+       };
+
+       /* output */
+       __be16  h_vlan_proto;
+       __be16  h_vlan_TCI;
+       __u8    smac[6];     /* ETH_ALEN */
+       __u8    dmac[6];     /* ETH_ALEN */
+};
+
 #endif /* _UAPI__LINUX_BPF_H__ */
index 74a30b1090dfcfe58c95d707671a1b09745d6cab..bcb56ee4701461a6b4a0134903a1eeef8e275ca6 100644 (file)
@@ -6,9 +6,7 @@
 #include <linux/types.h>
 
 #define BTF_MAGIC      0xeB9F
-#define BTF_MAGIC_SWAP 0x9FeB
 #define BTF_VERSION    1
-#define BTF_FLAGS_COMPR        0x01
 
 struct btf_header {
        __u16   magic;
@@ -43,7 +41,7 @@ struct btf_header {
 #define BTF_STR_OFFSET(ref)    ((ref) & BTF_MAX_NAME_OFFSET)
 
 struct btf_type {
-       __u32 name;
+       __u32 name_off;
        /* "info" bits arrangement
         * bits  0-15: vlen (e.g. # of struct's members)
         * bits 16-23: unused
@@ -105,7 +103,7 @@ struct btf_type {
  * info in "struct btf_type").
  */
 struct btf_enum {
-       __u32   name;
+       __u32   name_off;
        __s32   val;
 };
 
@@ -122,7 +120,7 @@ struct btf_array {
  * "struct btf_type").
  */
 struct btf_member {
-       __u32   name;
+       __u32   name_off;
        __u32   type;
        __u32   offset; /* offset in bits */
 };
index 68ff254147005f5eab04a042f8f0e451287d1423..db210625cee8c8b53169366017ee2d2f95795186 100644 (file)
@@ -116,12 +116,16 @@ struct proc_event {
                struct coredump_proc_event {
                        __kernel_pid_t process_pid;
                        __kernel_pid_t process_tgid;
+                       __kernel_pid_t parent_pid;
+                       __kernel_pid_t parent_tgid;
                } coredump;
 
                struct exit_proc_event {
                        __kernel_pid_t process_pid;
                        __kernel_pid_t process_tgid;
                        __u32 exit_code, exit_signal;
+                       __kernel_pid_t parent_pid;
+                       __kernel_pid_t parent_tgid;
                } exit;
 
        } event_data;
index 1df65a4c20441a64c5c191005074d8ce834f3fcd..75cb5450c851254764b55445384857b932f012a9 100644 (file)
@@ -132,6 +132,16 @@ enum devlink_eswitch_encap_mode {
        DEVLINK_ESWITCH_ENCAP_MODE_BASIC,
 };
 
+enum devlink_port_flavour {
+       DEVLINK_PORT_FLAVOUR_PHYSICAL, /* Any kind of a port physically
+                                       * facing the user.
+                                       */
+       DEVLINK_PORT_FLAVOUR_CPU, /* CPU port */
+       DEVLINK_PORT_FLAVOUR_DSA, /* Distributed switch architecture
+                                  * interconnect port.
+                                  */
+};
+
 enum devlink_attr {
        /* don't change the order or add anything between, this is ABI! */
        DEVLINK_ATTR_UNSPEC,
@@ -224,6 +234,10 @@ enum devlink_attr {
        DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_ID,   /* u64 */
        DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_UNITS,/* u64 */
 
+       DEVLINK_ATTR_PORT_FLAVOUR,              /* u16 */
+       DEVLINK_ATTR_PORT_NUMBER,               /* u32 */
+       DEVLINK_ATTR_PORT_SPLIT_SUBPORT_NUMBER, /* u32 */
+
        /* add new attributes above here, update the policy in devlink.c */
 
        __DEVLINK_ATTR_MAX,
index e2535d6dcec79e568b55340535b00378305bba24..4e12c423b9fe9352df063f14c447858cf6e774e9 100644 (file)
@@ -421,6 +421,7 @@ typedef struct elf64_shdr {
 #define NT_ARM_SYSTEM_CALL     0x404   /* ARM system call number */
 #define NT_ARM_SVE     0x405           /* ARM Scalable Vector Extension registers */
 #define NT_ARC_V2      0x600           /* ARCv2 accumulator/extra registers */
+#define NT_VMCOREDD    0x700           /* Vmcore Device Dump Note */
 
 /* Note header in a PT_NOTE section */
 typedef struct elf32_note {
index 050b92dcf8cf4013ae7ac5c76edb13bb990296ad..0fc33bf30e45a1211de3112ce8752cfc3386b307 100644 (file)
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */
 /*
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h
new file mode 100644 (file)
index 0000000..77b88c4
--- /dev/null
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+ *
+ * if_xdp: XDP socket user-space interface
+ * Copyright(c) 2018 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * Author(s): Björn Töpel <bjorn.topel@intel.com>
+ *           Magnus Karlsson <magnus.karlsson@intel.com>
+ */
+
+#ifndef _LINUX_IF_XDP_H
+#define _LINUX_IF_XDP_H
+
+#include <linux/types.h>
+
+/* Options for the sxdp_flags field */
+#define XDP_SHARED_UMEM 1
+
+struct sockaddr_xdp {
+       __u16 sxdp_family;
+       __u32 sxdp_ifindex;
+       __u32 sxdp_queue_id;
+       __u32 sxdp_shared_umem_fd;
+       __u16 sxdp_flags;
+};
+
+/* XDP socket options */
+#define XDP_RX_RING                    1
+#define XDP_TX_RING                    2
+#define XDP_UMEM_REG                   3
+#define XDP_UMEM_FILL_RING             4
+#define XDP_UMEM_COMPLETION_RING       5
+#define XDP_STATISTICS                 6
+
+struct xdp_umem_reg {
+       __u64 addr; /* Start of packet data area */
+       __u64 len; /* Length of packet data area */
+       __u32 frame_size; /* Frame size */
+       __u32 frame_headroom; /* Frame head room */
+};
+
+struct xdp_statistics {
+       __u64 rx_dropped; /* Dropped for reasons other than invalid desc */
+       __u64 rx_invalid_descs; /* Dropped due to invalid descriptor */
+       __u64 tx_invalid_descs; /* Dropped due to invalid descriptor */
+};
+
+/* Pgoff for mmaping the rings */
+#define XDP_PGOFF_RX_RING                        0
+#define XDP_PGOFF_TX_RING               0x80000000
+#define XDP_UMEM_PGOFF_FILL_RING       0x100000000
+#define XDP_UMEM_PGOFF_COMPLETION_RING 0x180000000
+
+struct xdp_desc {
+       __u32 idx;
+       __u32 len;
+       __u16 offset;
+       __u8 flags;
+       __u8 padding[5];
+};
+
+struct xdp_ring {
+       __u32 producer __attribute__((aligned(64)));
+       __u32 consumer __attribute__((aligned(64)));
+};
+
+/* Used for the RX and TX queues for packets */
+struct xdp_rxtx_ring {
+       struct xdp_ring ptrs;
+       struct xdp_desc desc[0] __attribute__((aligned(64)));
+};
+
+/* Used for the fill and completion queues for buffers */
+struct xdp_umem_ring {
+       struct xdp_ring ptrs;
+       __u32 desc[0] __attribute__((aligned(64)));
+};
+
+#endif /* _LINUX_IF_XDP_H */
index 1065006c9bf5890abdf99f9a4c444552af5542d1..b02c41e53d5616a3124e16dbec17250654a790b4 100644 (file)
@@ -676,6 +676,13 @@ struct kvm_ioeventfd {
        __u8  pad[36];
 };
 
+#define KVM_X86_DISABLE_EXITS_MWAIT          (1 << 0)
+#define KVM_X86_DISABLE_EXITS_HTL            (1 << 1)
+#define KVM_X86_DISABLE_EXITS_PAUSE          (1 << 2)
+#define KVM_X86_DISABLE_VALID_EXITS          (KVM_X86_DISABLE_EXITS_MWAIT | \
+                                              KVM_X86_DISABLE_EXITS_HTL | \
+                                              KVM_X86_DISABLE_EXITS_PAUSE)
+
 /* for KVM_ENABLE_CAP */
 struct kvm_enable_cap {
        /* in */
index 74b91151d49463f8773f2e16db4710464b5550b7..bcba72def817ab704c08434b8f080c5113f9d88e 100644 (file)
@@ -46,6 +46,9 @@ enum tcp_conntrack {
 /* Marks possibility for expected RFC5961 challenge ACK */
 #define IP_CT_EXP_CHALLENGE_ACK                0x40
 
+/* Simultaneous open initialized */
+#define IP_CT_TCP_SIMULTANEOUS_OPEN            0x80
+
 struct nf_ct_tcp_flags {
        __u8 flags;
        __u8 mask;
index a33000da7229cc00a76a1fa27029a366f51086e5..4a95c0db14d4ffdd2030beba3d5572e462907f70 100644 (file)
@@ -10,6 +10,7 @@
 #define NF_NAT_RANGE_PROTO_RANDOM              (1 << 2)
 #define NF_NAT_RANGE_PERSISTENT                        (1 << 3)
 #define NF_NAT_RANGE_PROTO_RANDOM_FULLY                (1 << 4)
+#define NF_NAT_RANGE_PROTO_OFFSET              (1 << 5)
 
 #define NF_NAT_RANGE_PROTO_RANDOM_ALL          \
        (NF_NAT_RANGE_PROTO_RANDOM | NF_NAT_RANGE_PROTO_RANDOM_FULLY)
@@ -17,7 +18,7 @@
 #define NF_NAT_RANGE_MASK                                      \
        (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED |  \
         NF_NAT_RANGE_PROTO_RANDOM | NF_NAT_RANGE_PERSISTENT |  \
-        NF_NAT_RANGE_PROTO_RANDOM_FULLY)
+        NF_NAT_RANGE_PROTO_RANDOM_FULLY | NF_NAT_RANGE_PROTO_OFFSET)
 
 struct nf_nat_ipv4_range {
        unsigned int                    flags;
@@ -40,4 +41,13 @@ struct nf_nat_range {
        union nf_conntrack_man_proto    max_proto;
 };
 
+struct nf_nat_range2 {
+       unsigned int                    flags;
+       union nf_inet_addr              min_addr;
+       union nf_inet_addr              max_addr;
+       union nf_conntrack_man_proto    min_proto;
+       union nf_conntrack_man_proto    max_proto;
+       union nf_conntrack_man_proto    base_proto;
+};
+
 #endif /* _NETFILTER_NF_NAT_H */
diff --git a/include/uapi/linux/netfilter/nf_osf.h b/include/uapi/linux/netfilter/nf_osf.h
new file mode 100644 (file)
index 0000000..45376ea
--- /dev/null
@@ -0,0 +1,90 @@
+#ifndef _NF_OSF_H
+#define _NF_OSF_H
+
+#define MAXGENRELEN    32
+
+#define NF_OSF_GENRE   (1 << 0)
+#define NF_OSF_TTL     (1 << 1)
+#define NF_OSF_LOG     (1 << 2)
+#define NF_OSF_INVERT  (1 << 3)
+
+#define NF_OSF_LOGLEVEL_ALL            0       /* log all matched fingerprints */
+#define NF_OSF_LOGLEVEL_FIRST          1       /* log only the first matced fingerprint */
+#define NF_OSF_LOGLEVEL_ALL_KNOWN      2       /* do not log unknown packets */
+
+#define NF_OSF_TTL_TRUE                        0       /* True ip and fingerprint TTL comparison */
+
+/* Do not compare ip and fingerprint TTL at all */
+#define NF_OSF_TTL_NOCHECK             2
+
+/* Wildcard MSS (kind of).
+ * It is used to implement a state machine for the different wildcard values
+ * of the MSS and window sizes.
+ */
+struct nf_osf_wc {
+       __u32   wc;
+       __u32   val;
+};
+
+/* This struct represents IANA options
+ * http://www.iana.org/assignments/tcp-parameters
+ */
+struct nf_osf_opt {
+       __u16                   kind, length;
+       struct nf_osf_wc        wc;
+};
+
+struct nf_osf_info {
+       char    genre[MAXGENRELEN];
+       __u32   len;
+       __u32   flags;
+       __u32   loglevel;
+       __u32   ttl;
+};
+
+struct nf_osf_user_finger {
+       struct nf_osf_wc        wss;
+
+       __u8    ttl, df;
+       __u16   ss, mss;
+       __u16   opt_num;
+
+       char    genre[MAXGENRELEN];
+       char    version[MAXGENRELEN];
+       char    subtype[MAXGENRELEN];
+
+       /* MAX_IPOPTLEN is maximum if all options are NOPs or EOLs */
+       struct nf_osf_opt       opt[MAX_IPOPTLEN];
+};
+
+struct nf_osf_finger {
+       struct rcu_head                 rcu_head;
+       struct list_head                finger_entry;
+       struct nf_osf_user_finger       finger;
+};
+
+struct nf_osf_nlmsg {
+       struct nf_osf_user_finger       f;
+       struct iphdr                    ip;
+       struct tcphdr                   tcp;
+};
+
+/* Defines for IANA option kinds */
+enum iana_options {
+       OSFOPT_EOL = 0,         /* End of options */
+       OSFOPT_NOP,             /* NOP */
+       OSFOPT_MSS,             /* Maximum segment size */
+       OSFOPT_WSO,             /* Window scale option */
+       OSFOPT_SACKP,           /* SACK permitted */
+       OSFOPT_SACK,            /* SACK */
+       OSFOPT_ECHO,
+       OSFOPT_ECHOREPLY,
+       OSFOPT_TS,              /* Timestamp option */
+       OSFOPT_POCP,            /* Partial Order Connection Permitted */
+       OSFOPT_POSP,            /* Partial Order Service Profile */
+
+       /* Others are not used in the current OSF */
+       OSFOPT_EMPTY = 255,
+};
+
+#endif /* _NF_OSF_H */
index 6a3d653d5b274841ae3e850ae1916a000e86ea0a..ce031cf72288dfc5bbc50903c76d0915f9dec672 100644 (file)
@@ -831,7 +831,9 @@ enum nft_rt_keys {
        NFT_RT_NEXTHOP4,
        NFT_RT_NEXTHOP6,
        NFT_RT_TCPMSS,
+       __NFT_RT_MAX
 };
+#define NFT_RT_MAX             (__NFT_RT_MAX - 1)
 
 /**
  * enum nft_hash_types - nf_tables hash expression types
@@ -949,7 +951,9 @@ enum nft_ct_keys {
        NFT_CT_DST_IP,
        NFT_CT_SRC_IP6,
        NFT_CT_DST_IP6,
+       __NFT_CT_MAX
 };
+#define NFT_CT_MAX             (__NFT_CT_MAX - 1)
 
 /**
  * enum nft_ct_attributes - nf_tables ct expression netlink attributes
@@ -1450,6 +1454,8 @@ enum nft_trace_types {
  * @NFTA_NG_MODULUS: maximum counter value (NLA_U32)
  * @NFTA_NG_TYPE: operation type (NLA_U32)
  * @NFTA_NG_OFFSET: offset to be added to the counter (NLA_U32)
+ * @NFTA_NG_SET_NAME: name of the map to lookup (NLA_STRING)
+ * @NFTA_NG_SET_ID: id of the map (NLA_U32)
  */
 enum nft_ng_attributes {
        NFTA_NG_UNSPEC,
@@ -1457,6 +1463,8 @@ enum nft_ng_attributes {
        NFTA_NG_MODULUS,
        NFTA_NG_TYPE,
        NFTA_NG_OFFSET,
+       NFTA_NG_SET_NAME,
+       NFTA_NG_SET_ID,
        __NFTA_NG_MAX
 };
 #define NFTA_NG_MAX    (__NFTA_NG_MAX - 1)
index 77987111cab0c2a01d2126e95ad40c64b18b4549..1d41810d17e2caffc6c7c31e586300d02da6947e 100644 (file)
@@ -262,6 +262,7 @@ enum ctattr_stats_cpu {
 enum ctattr_stats_global {
        CTA_STATS_GLOBAL_UNSPEC,
        CTA_STATS_GLOBAL_ENTRIES,
+       CTA_STATS_GLOBAL_MAX_ENTRIES,
        __CTA_STATS_GLOBAL_MAX,
 };
 #define CTA_STATS_GLOBAL_MAX (__CTA_STATS_GLOBAL_MAX - 1)
index dad197e2ab99b2cf3a3df707b9934afbe54fe85d..72956eceeb09689d3c76edec321dbc81d86b803c 100644 (file)
 #include <linux/types.h>
 #include <linux/ip.h>
 #include <linux/tcp.h>
+#include <linux/netfilter/nf_osf.h>
 
-#define MAXGENRELEN            32
+#define XT_OSF_GENRE           NF_OSF_GENRE
+#define XT_OSF_INVERT          NF_OSF_INVERT
 
-#define XT_OSF_GENRE           (1<<0)
-#define        XT_OSF_TTL              (1<<1)
-#define XT_OSF_LOG             (1<<2)
-#define XT_OSF_INVERT          (1<<3)
+#define XT_OSF_TTL             NF_OSF_TTL
+#define XT_OSF_LOG             NF_OSF_LOG
 
-#define XT_OSF_LOGLEVEL_ALL    0       /* log all matched fingerprints */
-#define XT_OSF_LOGLEVEL_FIRST  1       /* log only the first matced fingerprint */
-#define XT_OSF_LOGLEVEL_ALL_KNOWN      2 /* do not log unknown packets */
+#define XT_OSF_LOGLEVEL_ALL            NF_OSF_LOGLEVEL_ALL
+#define XT_OSF_LOGLEVEL_FIRST          NF_OSF_LOGLEVEL_FIRST
+#define XT_OSF_LOGLEVEL_ALL_KNOWN      NF_OSF_LOGLEVEL_ALL_KNOWN
 
-#define XT_OSF_TTL_TRUE                0       /* True ip and fingerprint TTL comparison */
-#define XT_OSF_TTL_LESS                1       /* Check if ip TTL is less than fingerprint one */
-#define XT_OSF_TTL_NOCHECK     2       /* Do not compare ip and fingerprint TTL at all */
+#define XT_OSF_TTL_TRUE                NF_OSF_TTL_TRUE
+#define XT_OSF_TTL_NOCHECK     NF_OSF_TTL_NOCHECK
 
-struct xt_osf_info {
-       char                    genre[MAXGENRELEN];
-       __u32                   len;
-       __u32                   flags;
-       __u32                   loglevel;
-       __u32                   ttl;
-};
-
-/*
- * Wildcard MSS (kind of).
- * It is used to implement a state machine for the different wildcard values
- * of the MSS and window sizes.
- */
-struct xt_osf_wc {
-       __u32                   wc;
-       __u32                   val;
-};
-
-/*
- * This struct represents IANA options
- * http://www.iana.org/assignments/tcp-parameters
- */
-struct xt_osf_opt {
-       __u16                   kind, length;
-       struct xt_osf_wc        wc;
-};
-
-struct xt_osf_user_finger {
-       struct xt_osf_wc        wss;
-
-       __u8                    ttl, df;
-       __u16                   ss, mss;
-       __u16                   opt_num;
-
-       char                    genre[MAXGENRELEN];
-       char                    version[MAXGENRELEN];
-       char                    subtype[MAXGENRELEN];
+#define XT_OSF_TTL_LESS        1       /* Check if ip TTL is less than fingerprint one */
 
-       /* MAX_IPOPTLEN is maximum if all options are NOPs or EOLs */
-       struct xt_osf_opt       opt[MAX_IPOPTLEN];
-};
-
-struct xt_osf_nlmsg {
-       struct xt_osf_user_finger       f;
-       struct iphdr            ip;
-       struct tcphdr           tcp;
-};
-
-/* Defines for IANA option kinds */
-
-enum iana_options {
-       OSFOPT_EOL = 0,         /* End of options */
-       OSFOPT_NOP,             /* NOP */
-       OSFOPT_MSS,             /* Maximum segment size */
-       OSFOPT_WSO,             /* Window scale option */
-       OSFOPT_SACKP,           /* SACK permitted */
-       OSFOPT_SACK,            /* SACK */
-       OSFOPT_ECHO,
-       OSFOPT_ECHOREPLY,
-       OSFOPT_TS,              /* Timestamp option */
-       OSFOPT_POCP,            /* Partial Order Connection Permitted */
-       OSFOPT_POSP,            /* Partial Order Service Profile */
-
-       /* Others are not used in the current OSF */
-       OSFOPT_EMPTY = 255,
-};
-
-/*
- * Initial window size option state machine: multiple of mss, mtu or
- * plain numeric value. Can also be made as plain numeric value which
- * is not a multiple of specified value.
- */
-enum xt_osf_window_size_options {
-       OSF_WSS_PLAIN   = 0,
-       OSF_WSS_MSS,
-       OSF_WSS_MTU,
-       OSF_WSS_MODULO,
-       OSF_WSS_MAX,
-};
+#define xt_osf_wc              nf_osf_wc
+#define xt_osf_opt             nf_osf_opt
+#define xt_osf_info            nf_osf_info
+#define xt_osf_user_finger     nf_osf_user_finger
+#define xt_osf_finger          nf_osf_finger
+#define xt_osf_nlmsg           nf_osf_nlmsg
 
 /*
  * Add/remove fingerprint from the kernel.
index 0c7dc83150139a31a1b01cf12a209df75df8d302..3b86c14ea49d0b5f355286f41ad8160cc3e567b3 100644 (file)
@@ -191,6 +191,12 @@ struct ebt_entry {
        unsigned char elems[0] __attribute__ ((aligned (__alignof__(struct ebt_replace))));
 };
 
+static __inline__ struct ebt_entry_target *
+ebt_get_target(struct ebt_entry *e)
+{
+       return (void *)e + e->target_offset;
+}
+
 /* {g,s}etsockopt numbers */
 #define EBT_BASE_CTL            128
 
index f3cc0ef514a702257426bee8fb239cada40e9737..54ed83360dac8ed5e36fd6102b7b07856925d43b 100644 (file)
 #define IP6T_SRH_LAST_GT        0x0100
 #define IP6T_SRH_LAST_LT        0x0200
 #define IP6T_SRH_TAG            0x0400
-#define IP6T_SRH_MASK           0x07FF
+#define IP6T_SRH_PSID           0x0800
+#define IP6T_SRH_NSID           0x1000
+#define IP6T_SRH_LSID           0x2000
+#define IP6T_SRH_MASK           0x3FFF
 
 /* Values for "mt_invflags" field in struct ip6t_srh */
 #define IP6T_SRH_INV_NEXTHDR    0x0001
 #define IP6T_SRH_INV_LAST_GT    0x0100
 #define IP6T_SRH_INV_LAST_LT    0x0200
 #define IP6T_SRH_INV_TAG        0x0400
-#define IP6T_SRH_INV_MASK       0x07FF
+#define IP6T_SRH_INV_PSID       0x0800
+#define IP6T_SRH_INV_NSID       0x1000
+#define IP6T_SRH_INV_LSID       0x2000
+#define IP6T_SRH_INV_MASK       0x3FFF
 
 /**
  *      struct ip6t_srh - SRH match options
@@ -54,4 +60,37 @@ struct ip6t_srh {
        __u16                   mt_invflags;
 };
 
+/**
+ *      struct ip6t_srh1 - SRH match options (revision 1)
+ *      @ next_hdr: Next header field of SRH
+ *      @ hdr_len: Extension header length field of SRH
+ *      @ segs_left: Segments left field of SRH
+ *      @ last_entry: Last entry field of SRH
+ *      @ tag: Tag field of SRH
+ *      @ psid_addr: Address of previous SID in SRH SID list
+ *      @ nsid_addr: Address of NEXT SID in SRH SID list
+ *      @ lsid_addr: Address of LAST SID in SRH SID list
+ *      @ psid_msk: Mask of previous SID in SRH SID list
+ *      @ nsid_msk: Mask of next SID in SRH SID list
+ *      @ lsid_msk: MAsk of last SID in SRH SID list
+ *      @ mt_flags: match options
+ *      @ mt_invflags: Invert the sense of match options
+ */
+
+struct ip6t_srh1 {
+       __u8                    next_hdr;
+       __u8                    hdr_len;
+       __u8                    segs_left;
+       __u8                    last_entry;
+       __u16                   tag;
+       struct in6_addr         psid_addr;
+       struct in6_addr         nsid_addr;
+       struct in6_addr         lsid_addr;
+       struct in6_addr         psid_msk;
+       struct in6_addr         nsid_msk;
+       struct in6_addr         lsid_msk;
+       __u16                   mt_flags;
+       __u16                   mt_invflags;
+};
+
 #endif /*_IP6T_SRH_H*/
index 15daf5e2638d9bbcb6ad8b5d104b520dc81678fe..9c3630146cec0f739a0c3cea6ec98e95e69e44bb 100644 (file)
@@ -2698,6 +2698,8 @@ enum nl80211_attrs {
 #define NL80211_ATTR_KEYS NL80211_ATTR_KEYS
 #define NL80211_ATTR_FEATURE_FLAGS NL80211_ATTR_FEATURE_FLAGS
 
+#define NL80211_WIPHY_NAME_MAXLEN              128
+
 #define NL80211_MAX_SUPP_RATES                 32
 #define NL80211_MAX_SUPP_HT_RATES              77
 #define NL80211_MAX_SUPP_REG_RULES             64
index 912b85b52344b31aa405b546b5d1160a61a52df1..b8e288a1f7409012d50e464e7993b96d4c404610 100644 (file)
@@ -650,11 +650,23 @@ struct perf_event_mmap_page {
 #define PERF_RECORD_MISC_COMM_EXEC             (1 << 13)
 #define PERF_RECORD_MISC_SWITCH_OUT            (1 << 13)
 /*
- * Indicates that the content of PERF_SAMPLE_IP points to
- * the actual instruction that triggered the event. See also
- * perf_event_attr::precise_ip.
+ * These PERF_RECORD_MISC_* flags below are safely reused
+ * for the following events:
+ *
+ *   PERF_RECORD_MISC_EXACT_IP           - PERF_RECORD_SAMPLE of precise events
+ *   PERF_RECORD_MISC_SWITCH_OUT_PREEMPT - PERF_RECORD_SWITCH* events
+ *
+ *
+ * PERF_RECORD_MISC_EXACT_IP:
+ *   Indicates that the content of PERF_SAMPLE_IP points to
+ *   the actual instruction that triggered the event. See also
+ *   perf_event_attr::precise_ip.
+ *
+ * PERF_RECORD_MISC_SWITCH_OUT_PREEMPT:
+ *   Indicates that thread was preempted in TASK_RUNNING state.
  */
 #define PERF_RECORD_MISC_EXACT_IP              (1 << 14)
+#define PERF_RECORD_MISC_SWITCH_OUT_PREEMPT    (1 << 14)
 /*
  * Reserve the last bit to indicate some extended misc field
  */
index be05e66c167b12a70db409242519a9b1958b1000..84e4c1d0f874afec5891fcf95def286c121f71ed 100644 (file)
@@ -129,6 +129,7 @@ enum {
 #define TCA_CLS_FLAGS_SKIP_SW  (1 << 1) /* don't use filter in SW */
 #define TCA_CLS_FLAGS_IN_HW    (1 << 2) /* filter is offloaded to HW */
 #define TCA_CLS_FLAGS_NOT_IN_HW (1 << 3) /* filter isn't offloaded to HW */
+#define TCA_CLS_FLAGS_VERBOSE  (1 << 4) /* verbose logging */
 
 /* U32 filters */
 
index c34f4490d025ff6802aca2ef6bd2237906f889b2..26ee91300e3ecbb2d5f8c18db6231343c09944bd 100644 (file)
@@ -35,6 +35,9 @@
 /* Clear the entropy pool and associated counters.  (Superuser only.) */
 #define RNDCLEARPOOL   _IO( 'R', 0x06 )
 
+/* Reseed CRNG.  (Superuser only.) */
+#define RNDRESEEDCRNG  _IO( 'R', 0x07 )
+
 struct rand_pool_info {
        int     entropy_count;
        int     buf_size;
index a66b213de3d7a40ee13cb40d900bfc1a18818692..20c6bd0b00079e9edd199cc1c138c28d3129fc46 100644 (file)
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */
 /*
  * Copyright (c) 2008 Oracle.  All rights reserved.
  *
index d02e859301ff499dd72a1c0e1b56bed10a9397a6..750d89120335eb489f698191edb6c5110969fa8c 100644 (file)
@@ -278,6 +278,7 @@ enum
        LINUX_MIB_TCPMTUPSUCCESS,               /* TCPMTUPSuccess */
        LINUX_MIB_TCPDELIVERED,                 /* TCPDelivered */
        LINUX_MIB_TCPDELIVEREDCE,               /* TCPDeliveredCE */
+       LINUX_MIB_TCPACKCOMPRESSED,             /* TCPAckCompressed */
        __LINUX_MIB_MAX
 };
 
index 0f272818a4d27846ef96881c266d9713bbadb094..6b58371b1f0d5a1405328c5e4c43c15bc9588f9a 100644 (file)
@@ -780,24 +780,6 @@ enum {
        NET_BRIDGE_NF_FILTER_PPPOE_TAGGED = 5,
 };
 
-/* proc/sys/net/irda */
-enum {
-       NET_IRDA_DISCOVERY=1,
-       NET_IRDA_DEVNAME=2,
-       NET_IRDA_DEBUG=3,
-       NET_IRDA_FAST_POLL=4,
-       NET_IRDA_DISCOVERY_SLOTS=5,
-       NET_IRDA_DISCOVERY_TIMEOUT=6,
-       NET_IRDA_SLOT_TIMEOUT=7,
-       NET_IRDA_MAX_BAUD_RATE=8,
-       NET_IRDA_MIN_TX_TURN_TIME=9,
-       NET_IRDA_MAX_TX_DATA_SIZE=10,
-       NET_IRDA_MAX_TX_WINDOW=11,
-       NET_IRDA_MAX_NOREPLY_TIME=12,
-       NET_IRDA_WARN_NOREPLY_TIME=13,
-       NET_IRDA_LAP_KEEPALIVE_TIME=14,
-};
-
 
 /* CTL_FS names: */
 enum
index 379b08700a542d49bbce9b4b49b17879d00b69bb..29eb659aa77a183e36082599866fb512908d1197 100644 (file)
@@ -122,6 +122,10 @@ enum {
 #define TCP_MD5SIG_EXT         32      /* TCP MD5 Signature with extensions */
 #define TCP_FASTOPEN_KEY       33      /* Set the key for Fast Open (cookie) */
 #define TCP_FASTOPEN_NO_COOKIE 34      /* Enable TFO without a TFO cookie */
+#define TCP_ZEROCOPY_RECEIVE   35
+#define TCP_INQ                        36      /* Notify bytes available to read as a cmsg on read */
+
+#define TCP_CM_INQ             TCP_INQ
 
 struct tcp_repair_opt {
        __u32   opt_code;
@@ -276,4 +280,11 @@ struct tcp_diag_md5sig {
        __u8    tcpm_key[TCP_MD5SIG_MAXKEYLEN];
 };
 
+/* setsockopt(fd, IPPROTO_TCP, TCP_ZEROCOPY_RECEIVE, ...) */
+
+struct tcp_zerocopy_receive {
+       __u64 address;          /* in: address of mapping */
+       __u32 length;           /* in/out: number of bytes to map/mapped */
+       __u32 recv_skip_hint;   /* out: amount of bytes to skip */
+};
 #endif /* _UAPI_LINUX_TCP_H */
index 16a296612ba4b0df2359c67aa9c899bec6f719c6..4c0338ea308a67b3f7dfe179651c7a55322ce6fb 100644 (file)
@@ -73,7 +73,6 @@ struct __kernel_old_timeval {
  */
 #define CLOCK_SGI_CYCLE                        10
 #define CLOCK_TAI                      11
-#define CLOCK_MONOTONIC_ACTIVE         12
 
 #define MAX_CLOCKS                     16
 #define CLOCKS_MASK                    (CLOCK_REALTIME | CLOCK_MONOTONIC)
index bf6d28677cfe01d6b482591b5428474acf3d0d9a..6b2fd4d9655f6949d9c752a4f286ed0834d02f1f 100644 (file)
@@ -209,16 +209,16 @@ struct tipc_group_req {
  * The string formatting for each name element is:
  * media: media
  * interface: media:interface name
- * link: Z.C.N:interface-Z.C.N:interface
- *
+ * link: node:interface-node:interface
  */
-
+#define TIPC_NODEID_LEN         16
 #define TIPC_MAX_MEDIA_NAME    16
 #define TIPC_MAX_IF_NAME       16
 #define TIPC_MAX_BEARER_NAME   32
 #define TIPC_MAX_LINK_NAME     68
 
-#define SIOCGETLINKNAME                SIOCPROTOPRIVATE
+#define SIOCGETLINKNAME        SIOCPROTOPRIVATE
+#define SIOCGETNODEID          (SIOCPROTOPRIVATE + 1)
 
 struct tipc_sioc_ln_req {
        __u32 peer;
@@ -226,6 +226,10 @@ struct tipc_sioc_ln_req {
        char linkname[TIPC_MAX_LINK_NAME];
 };
 
+struct tipc_sioc_nodeid_req {
+       __u32 peer;
+       char node_id[TIPC_NODEID_LEN];
+};
 
 /* The macros and functions below are deprecated:
  */
index c6633e97eca40b33a15d822a1e16222ce19bf94e..ff02287495ac567a67081c142b3a3ea18ad9dc7a 100644 (file)
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */
 /*
  * Copyright (c) 2016-2017, Mellanox Technologies. All rights reserved.
  *
index efb7b5991c2fd5eaeab64ca0a52074d9f6cd9a11..09d00f8c442b785d98c097b38fcdde25f8e967ef 100644 (file)
@@ -32,6 +32,7 @@ struct udphdr {
 #define UDP_ENCAP      100     /* Set the socket to accept encapsulated packets */
 #define UDP_NO_CHECK6_TX 101   /* Disable sending checksum for UDP6X */
 #define UDP_NO_CHECK6_RX 102   /* Disable accpeting checksum for UDP6 */
+#define UDP_SEGMENT    103     /* Set GSO segmentation size */
 
 /* UDP encapsulation types */
 #define UDP_ENCAP_ESPINUDP_NON_IKE     1 /* draft-ietf-ipsec-nat-t-ike-00/01 */
index 40297a3181ed8244fbb4492b34c0feed55612739..13b8cb563892b7ca66a6268738b452c8428f006c 100644 (file)
@@ -57,6 +57,21 @@ struct virtio_balloon_config {
 #define VIRTIO_BALLOON_S_HTLB_PGFAIL   9  /* Hugetlb page allocation failures */
 #define VIRTIO_BALLOON_S_NR       10
 
+#define VIRTIO_BALLOON_S_NAMES_WITH_PREFIX(VIRTIO_BALLOON_S_NAMES_prefix) { \
+       VIRTIO_BALLOON_S_NAMES_prefix "swap-in", \
+       VIRTIO_BALLOON_S_NAMES_prefix "swap-out", \
+       VIRTIO_BALLOON_S_NAMES_prefix "major-faults", \
+       VIRTIO_BALLOON_S_NAMES_prefix "minor-faults", \
+       VIRTIO_BALLOON_S_NAMES_prefix "free-memory", \
+       VIRTIO_BALLOON_S_NAMES_prefix "total-memory", \
+       VIRTIO_BALLOON_S_NAMES_prefix "available-memory", \
+       VIRTIO_BALLOON_S_NAMES_prefix "disk-caches", \
+       VIRTIO_BALLOON_S_NAMES_prefix "hugetlb-allocations", \
+       VIRTIO_BALLOON_S_NAMES_prefix "hugetlb-failures" \
+}
+
+#define VIRTIO_BALLOON_S_NAMES VIRTIO_BALLOON_S_NAMES_WITH_PREFIX("")
+
 /*
  * Memory statistics structure.
  * Driver fills an array of these structures and passes to device.
diff --git a/include/uapi/linux/vmcore.h b/include/uapi/linux/vmcore.h
new file mode 100644 (file)
index 0000000..0226196
--- /dev/null
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _UAPI_VMCORE_H
+#define _UAPI_VMCORE_H
+
+#include <linux/types.h>
+
+#define VMCOREDD_NOTE_NAME "LINUX"
+#define VMCOREDD_MAX_NAME_BYTES 44
+
+struct vmcoredd_header {
+       __u32 n_namesz; /* Name size */
+       __u32 n_descsz; /* Content size */
+       __u32 n_type;   /* NT_VMCOREDD */
+       __u8 name[8];   /* LINUX\0\0\0 */
+       __u8 dump_name[VMCOREDD_MAX_NAME_BYTES]; /* Device dump's name */
+};
+
+#endif /* _UAPI_VMCORE_H */
index 9acb4b7a624633a007412e6aa5af89a82e0ce0d9..85aed672f43e65934a49bdc5247b7e9d31b7c9d9 100644 (file)
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */
 /*
  * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
  *
index 1fefd0140c26f6e1865cc47d33d916a5a8818ea0..a159ba8dcf8f1390aba52c9c5db52e1f41f630fa 100644 (file)
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */
 /*
  * Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved.
  *
index 7092c8de4bd8839e1d42df26419941b3d3956427..78613b609fa846aae3cda63dbb5f3858c030b253 100644 (file)
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */
 /*
  * Copyright (c) 2016 Hisilicon Limited.
  *
index 4a8f9562f7cd9b2d625aee5ae9b91b899e551a9f..e2709bb8cb1802921ee4f0be7b5702efe1a3baf8 100644 (file)
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */
 /*
  * Copyright (c) 2005 Topspin Communications.  All rights reserved.
  * Copyright (c) 2005 Intel Corporation.  All rights reserved.
index 04e46ea517d328af930663a8dd75740148791572..625545d862d7e479bb80b59d60dfdf86a19beada 100644 (file)
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */
 /*
  * Copyright (c) 2017-2018, Mellanox Technologies inc.  All rights reserved.
  *
index ef92118dad9770da455ac5bb91eac7fc73a0724d..90c0cf228020dca8ad241a5617e1fb8ad7764954 100644 (file)
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */
 /*
  * Copyright (c) 2004 Topspin Communications.  All rights reserved.
  * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
index 0d2607f0cd20c38778bb182ad268fbbf484b7f19..435155d6e1c6a53a6f79e14dec3a8c276ddaed2f 100644 (file)
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */
 /*
  * Copyright (c) 2005 Intel Corporation.  All rights reserved.
  *
index 9be07394fdbe507c8a9bca237a5cda8519fb11a9..6aeb03315b0bd543b4fa11d5a46ab8d8a98430a8 100644 (file)
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */
 /*
  * Copyright (c) 2005 Topspin Communications.  All rights reserved.
  * Copyright (c) 2005, 2006 Cisco Systems.  All rights reserved.
index 04f64bc4045f1bc6770afaa94d5991a840f66bec..f745575281756daf991d8e4177015650794cf770 100644 (file)
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */
 /*
  * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
  * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
index cb4a02c4a1cef077ebc0d19cb0c579b57f876e69..fdaf00e206498cc3bf382c53a4515aa157a1938b 100644 (file)
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */
 /*
  * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
  *
index ac756cd9e807726981db0779dc0dc67c28e623d4..91b12e1a6f43ce1140d5f962549c8f66702c3f3b 100644 (file)
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */
 /*
  * Copyright (c) 2005 Topspin Communications.  All rights reserved.
  * Copyright (c) 2005, 2006 Cisco Systems.  All rights reserved.
index 35bfd4015d070500bd6458891ce8889e03205524..f80495baa9697e8ef6f059def22afb06dc79c6c6 100644 (file)
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */
 /*
  * Copyright (c) 2006 - 2011 Intel Corporation.  All rights reserved.
  * Copyright (c) 2005 Topspin Communications.  All rights reserved.
index 8ba098900e9aac1b3c52bbe7526df26d9757283e..24c658b3c7904297c1098937d451944642999460 100644 (file)
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */
 /* QLogic qedr NIC Driver
  * Copyright (c) 2015-2016  QLogic Corporation
  *
index e1269024af47f010e21270c90ca8739078b226bc..0d1e78ebad05154097ffa8b3bfb8708f5c7a0cb3 100644 (file)
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */
 /*
  * Copyright (c) 2005-2006 Intel Corporation.  All rights reserved.
  *
index d223f4164a0f8d4aff469120006543c1b1b63097..d92d2721b28c5bd4afbdcf9a9264ead02441dbff 100644 (file)
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */
 /*
  * Copyright (c) 2016 Mellanox Technologies, LTD. All rights reserved.
  *
index 1f8a9e7daea43e804bd8104c9decee7b088d9b7d..44ef6a3b7afc8ce4269d2e363482803be42ed379 100644 (file)
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */
 /*
  * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
  *
index f013afc74b1139e2fe24f1dcc7df17bb91810ab4..1fecd5b7bad823ff751fb4e375ed1fa4f574b80a 100644 (file)
@@ -738,7 +738,7 @@ config CFS_BANDWIDTH
          tasks running within the fair group scheduler.  Groups with no limit
          set are considered to be unconstrained and will run with no
          restriction.
-         See tip/Documentation/scheduler/sched-bwc.txt for more information.
+         See Documentation/scheduler/sched-bwc.txt for more information.
 
 config RT_GROUP_SCHED
        bool "Group scheduling for SCHED_RR/FIFO"
@@ -1391,6 +1391,7 @@ config BPF_SYSCALL
        bool "Enable bpf() system call"
        select ANON_INODES
        select BPF
+       select IRQ_WORK
        default n
        help
          Enable the bpf() system call that allows to manipulate eBPF
index b795aa341a3a30528f878db01f79907a2ac3e5b1..fd37315835b4ae18235af293cc689b6cdd055850 100644 (file)
@@ -423,7 +423,7 @@ static noinline void __ref rest_init(void)
 
        /*
         * Enable might_sleep() and smp_processor_id() checks.
-        * They cannot be enabled earlier because with CONFIG_PRREMPT=y
+        * They cannot be enabled earlier because with CONFIG_PREEMPT=y
         * kernel_thread() would trigger might_sleep() splats. With
         * CONFIG_PREEMPT_VOLUNTARY=y the init task might have scheduled
         * already, but it's stuck on the kthreadd_done completion.
@@ -1034,6 +1034,13 @@ __setup("rodata=", set_debug_rodata);
 static void mark_readonly(void)
 {
        if (rodata_enabled) {
+               /*
+                * load_module() results in W+X mappings, which are cleaned up
+                * with call_rcu_sched().  Let's make sure that queued work is
+                * flushed so that we don't hit false positives looking for
+                * insecure pages which are W+X.
+                */
+               rcu_barrier_sched();
                mark_rodata_ro();
                rodata_test();
        } else
index 35c485fa9ea36ba91d1fd5e7d979204ffc0751e8..f27f5496d6fe2d4d58d007d78cdc2d1efa682d87 100644 (file)
@@ -8,6 +8,9 @@ obj-$(CONFIG_BPF_SYSCALL) += btf.o
 ifeq ($(CONFIG_NET),y)
 obj-$(CONFIG_BPF_SYSCALL) += devmap.o
 obj-$(CONFIG_BPF_SYSCALL) += cpumap.o
+ifeq ($(CONFIG_XDP_SOCKETS),y)
+obj-$(CONFIG_BPF_SYSCALL) += xskmap.o
+endif
 obj-$(CONFIG_BPF_SYSCALL) += offload.o
 ifeq ($(CONFIG_STREAM_PARSER),y)
 ifeq ($(CONFIG_INET),y)
index 02a189339381c00092f1891d57eea659f8333521..0fd8d8f1a398ee1a58efbde3cb37d4b78e69da24 100644 (file)
@@ -526,7 +526,7 @@ static u32 prog_fd_array_sys_lookup_elem(void *ptr)
 }
 
 /* decrement refcnt of all bpf_progs that are stored in this map */
-void bpf_fd_array_map_clear(struct bpf_map *map)
+static void bpf_fd_array_map_clear(struct bpf_map *map)
 {
        struct bpf_array *array = container_of(map, struct bpf_array, map);
        int i;
@@ -545,6 +545,7 @@ const struct bpf_map_ops prog_array_map_ops = {
        .map_fd_get_ptr = prog_fd_array_get_ptr,
        .map_fd_put_ptr = prog_fd_array_put_ptr,
        .map_fd_sys_lookup_elem = prog_fd_array_sys_lookup_elem,
+       .map_release_uref = bpf_fd_array_map_clear,
 };
 
 static struct bpf_event_entry *bpf_event_entry_gen(struct file *perf_file,
index eb56ac7605473f5626e34c245206d9638a915144..ded10ab47b8acb4d90902debceecfae484caa73f 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/file.h>
 #include <linux/uaccess.h>
 #include <linux/kernel.h>
+#include <linux/idr.h>
 #include <linux/bpf_verifier.h>
 #include <linux/btf.h>
 
             i < btf_type_vlen(struct_type);                            \
             i++, member++)
 
+static DEFINE_IDR(btf_idr);
+static DEFINE_SPINLOCK(btf_idr_lock);
+
 struct btf {
        union {
                struct btf_header *hdr;
@@ -193,6 +197,8 @@ struct btf {
        u32 types_size;
        u32 data_size;
        refcount_t refcnt;
+       u32 id;
+       struct rcu_head rcu;
 };
 
 enum verifier_phase {
@@ -473,7 +479,7 @@ __printf(4, 5) static void __btf_verifier_log_type(struct btf_verifier_env *env,
        __btf_verifier_log(log, "[%u] %s %s%s",
                           env->log_type_id,
                           btf_kind_str[kind],
-                          btf_name_by_offset(btf, t->name),
+                          btf_name_by_offset(btf, t->name_off),
                           log_details ? " " : "");
 
        if (log_details)
@@ -517,7 +523,7 @@ static void btf_verifier_log_member(struct btf_verifier_env *env,
                btf_verifier_log_type(env, struct_type, NULL);
 
        __btf_verifier_log(log, "\t%s type_id=%u bits_offset=%u",
-                          btf_name_by_offset(btf, member->name),
+                          btf_name_by_offset(btf, member->name_off),
                           member->type, member->offset);
 
        if (fmt && *fmt) {
@@ -598,6 +604,42 @@ static int btf_add_type(struct btf_verifier_env *env, struct btf_type *t)
        return 0;
 }
 
+static int btf_alloc_id(struct btf *btf)
+{
+       int id;
+
+       idr_preload(GFP_KERNEL);
+       spin_lock_bh(&btf_idr_lock);
+       id = idr_alloc_cyclic(&btf_idr, btf, 1, INT_MAX, GFP_ATOMIC);
+       if (id > 0)
+               btf->id = id;
+       spin_unlock_bh(&btf_idr_lock);
+       idr_preload_end();
+
+       if (WARN_ON_ONCE(!id))
+               return -ENOSPC;
+
+       return id > 0 ? 0 : id;
+}
+
+static void btf_free_id(struct btf *btf)
+{
+       unsigned long flags;
+
+       /*
+        * In map-in-map, calling map_delete_elem() on outer
+        * map will call bpf_map_put on the inner map.
+        * It will then eventually call btf_free_id()
+        * on the inner map.  Some of the map_delete_elem()
+        * implementation may have irq disabled, so
+        * we need to use the _irqsave() version instead
+        * of the _bh() version.
+        */
+       spin_lock_irqsave(&btf_idr_lock, flags);
+       idr_remove(&btf_idr, btf->id);
+       spin_unlock_irqrestore(&btf_idr_lock, flags);
+}
+
 static void btf_free(struct btf *btf)
 {
        kvfree(btf->types);
@@ -607,15 +649,19 @@ static void btf_free(struct btf *btf)
        kfree(btf);
 }
 
-static void btf_get(struct btf *btf)
+static void btf_free_rcu(struct rcu_head *rcu)
 {
-       refcount_inc(&btf->refcnt);
+       struct btf *btf = container_of(rcu, struct btf, rcu);
+
+       btf_free(btf);
 }
 
 void btf_put(struct btf *btf)
 {
-       if (btf && refcount_dec_and_test(&btf->refcnt))
-               btf_free(btf);
+       if (btf && refcount_dec_and_test(&btf->refcnt)) {
+               btf_free_id(btf);
+               call_rcu(&btf->rcu, btf_free_rcu);
+       }
 }
 
 static int env_resolve_init(struct btf_verifier_env *env)
@@ -1419,10 +1465,10 @@ static s32 btf_struct_check_meta(struct btf_verifier_env *env,
        btf_verifier_log_type(env, t, NULL);
 
        for_each_member(i, t, member) {
-               if (!btf_name_offset_valid(btf, member->name)) {
+               if (!btf_name_offset_valid(btf, member->name_off)) {
                        btf_verifier_log_member(env, t, member,
                                                "Invalid member name_offset:%u",
-                                               member->name);
+                                               member->name_off);
                        return -EINVAL;
                }
 
@@ -1605,14 +1651,14 @@ static s32 btf_enum_check_meta(struct btf_verifier_env *env,
        btf_verifier_log_type(env, t, NULL);
 
        for (i = 0; i < nr_enums; i++) {
-               if (!btf_name_offset_valid(btf, enums[i].name)) {
+               if (!btf_name_offset_valid(btf, enums[i].name_off)) {
                        btf_verifier_log(env, "\tInvalid name_offset:%u",
-                                        enums[i].name);
+                                        enums[i].name_off);
                        return -EINVAL;
                }
 
                btf_verifier_log(env, "\t%s val=%d\n",
-                                btf_name_by_offset(btf, enums[i].name),
+                                btf_name_by_offset(btf, enums[i].name_off),
                                 enums[i].val);
        }
 
@@ -1636,7 +1682,7 @@ static void btf_enum_seq_show(const struct btf *btf, const struct btf_type *t,
        for (i = 0; i < nr_enums; i++) {
                if (v == enums[i].val) {
                        seq_printf(m, "%s",
-                                  btf_name_by_offset(btf, enums[i].name));
+                                  btf_name_by_offset(btf, enums[i].name_off));
                        return;
                }
        }
@@ -1687,9 +1733,9 @@ static s32 btf_check_meta(struct btf_verifier_env *env,
                return -EINVAL;
        }
 
-       if (!btf_name_offset_valid(env->btf, t->name)) {
+       if (!btf_name_offset_valid(env->btf, t->name_off)) {
                btf_verifier_log(env, "[%u] Invalid name_offset:%u",
-                                env->log_type_id, t->name);
+                                env->log_type_id, t->name_off);
                return -EINVAL;
        }
 
@@ -1977,7 +2023,7 @@ static struct btf *btf_parse(void __user *btf_data, u32 btf_data_size,
 
        if (!err) {
                btf_verifier_env_free(env);
-               btf_get(btf);
+               refcount_set(&btf->refcnt, 1);
                return btf;
        }
 
@@ -2006,10 +2052,15 @@ const struct file_operations btf_fops = {
        .release        = btf_release,
 };
 
+static int __btf_new_fd(struct btf *btf)
+{
+       return anon_inode_getfd("btf", &btf_fops, btf, O_RDONLY | O_CLOEXEC);
+}
+
 int btf_new_fd(const union bpf_attr *attr)
 {
        struct btf *btf;
-       int fd;
+       int ret;
 
        btf = btf_parse(u64_to_user_ptr(attr->btf),
                        attr->btf_size, attr->btf_log_level,
@@ -2018,12 +2069,23 @@ int btf_new_fd(const union bpf_attr *attr)
        if (IS_ERR(btf))
                return PTR_ERR(btf);
 
-       fd = anon_inode_getfd("btf", &btf_fops, btf,
-                             O_RDONLY | O_CLOEXEC);
-       if (fd < 0)
+       ret = btf_alloc_id(btf);
+       if (ret) {
+               btf_free(btf);
+               return ret;
+       }
+
+       /*
+        * The BTF ID is published to the userspace.
+        * All BTF free must go through call_rcu() from
+        * now on (i.e. free by calling btf_put()).
+        */
+
+       ret = __btf_new_fd(btf);
+       if (ret < 0)
                btf_put(btf);
 
-       return fd;
+       return ret;
 }
 
 struct btf *btf_get_by_fd(int fd)
@@ -2042,7 +2104,7 @@ struct btf *btf_get_by_fd(int fd)
        }
 
        btf = f.file->private_data;
-       btf_get(btf);
+       refcount_inc(&btf->refcnt);
        fdput(f);
 
        return btf;
@@ -2052,13 +2114,55 @@ int btf_get_info_by_fd(const struct btf *btf,
                       const union bpf_attr *attr,
                       union bpf_attr __user *uattr)
 {
-       void __user *udata = u64_to_user_ptr(attr->info.info);
-       u32 copy_len = min_t(u32, btf->data_size,
-                            attr->info.info_len);
+       struct bpf_btf_info __user *uinfo;
+       struct bpf_btf_info info = {};
+       u32 info_copy, btf_copy;
+       void __user *ubtf;
+       u32 uinfo_len;
+
+       uinfo = u64_to_user_ptr(attr->info.info);
+       uinfo_len = attr->info.info_len;
 
-       if (copy_to_user(udata, btf->data, copy_len) ||
-           put_user(btf->data_size, &uattr->info.info_len))
+       info_copy = min_t(u32, uinfo_len, sizeof(info));
+       if (copy_from_user(&info, uinfo, info_copy))
+               return -EFAULT;
+
+       info.id = btf->id;
+       ubtf = u64_to_user_ptr(info.btf);
+       btf_copy = min_t(u32, btf->data_size, info.btf_size);
+       if (copy_to_user(ubtf, btf->data, btf_copy))
+               return -EFAULT;
+       info.btf_size = btf->data_size;
+
+       if (copy_to_user(uinfo, &info, info_copy) ||
+           put_user(info_copy, &uattr->info.info_len))
                return -EFAULT;
 
        return 0;
 }
+
+int btf_get_fd_by_id(u32 id)
+{
+       struct btf *btf;
+       int fd;
+
+       rcu_read_lock();
+       btf = idr_find(&btf_idr, id);
+       if (!btf || !refcount_inc_not_zero(&btf->refcnt))
+               btf = ERR_PTR(-ENOENT);
+       rcu_read_unlock();
+
+       if (IS_ERR(btf))
+               return PTR_ERR(btf);
+
+       fd = __btf_new_fd(btf);
+       if (fd < 0)
+               btf_put(btf);
+
+       return fd;
+}
+
+u32 btf_id(const struct btf *btf)
+{
+       return btf->id;
+}
index d315b393abdd0f7dfa67abd706e0f973c3ef2c5a..b574dddc05b82be34c1212cf0a839fe48bae60b0 100644 (file)
@@ -31,6 +31,7 @@
 #include <linux/rbtree_latch.h>
 #include <linux/kallsyms.h>
 #include <linux/rcupdate.h>
+#include <linux/perf_event.h>
 
 #include <asm/unaligned.h>
 
@@ -218,47 +219,84 @@ int bpf_prog_calc_tag(struct bpf_prog *fp)
        return 0;
 }
 
-static void bpf_adj_branches(struct bpf_prog *prog, u32 pos, u32 delta)
+static int bpf_adj_delta_to_imm(struct bpf_insn *insn, u32 pos, u32 delta,
+                               u32 curr, const bool probe_pass)
 {
+       const s64 imm_min = S32_MIN, imm_max = S32_MAX;
+       s64 imm = insn->imm;
+
+       if (curr < pos && curr + imm + 1 > pos)
+               imm += delta;
+       else if (curr > pos + delta && curr + imm + 1 <= pos + delta)
+               imm -= delta;
+       if (imm < imm_min || imm > imm_max)
+               return -ERANGE;
+       if (!probe_pass)
+               insn->imm = imm;
+       return 0;
+}
+
+static int bpf_adj_delta_to_off(struct bpf_insn *insn, u32 pos, u32 delta,
+                               u32 curr, const bool probe_pass)
+{
+       const s32 off_min = S16_MIN, off_max = S16_MAX;
+       s32 off = insn->off;
+
+       if (curr < pos && curr + off + 1 > pos)
+               off += delta;
+       else if (curr > pos + delta && curr + off + 1 <= pos + delta)
+               off -= delta;
+       if (off < off_min || off > off_max)
+               return -ERANGE;
+       if (!probe_pass)
+               insn->off = off;
+       return 0;
+}
+
+static int bpf_adj_branches(struct bpf_prog *prog, u32 pos, u32 delta,
+                           const bool probe_pass)
+{
+       u32 i, insn_cnt = prog->len + (probe_pass ? delta : 0);
        struct bpf_insn *insn = prog->insnsi;
-       u32 i, insn_cnt = prog->len;
-       bool pseudo_call;
-       u8 code;
-       int off;
+       int ret = 0;
 
        for (i = 0; i < insn_cnt; i++, insn++) {
+               u8 code;
+
+               /* In the probing pass we still operate on the original,
+                * unpatched image in order to check overflows before we
+                * do any other adjustments. Therefore skip the patchlet.
+                */
+               if (probe_pass && i == pos) {
+                       i += delta + 1;
+                       insn++;
+               }
                code = insn->code;
-               if (BPF_CLASS(code) != BPF_JMP)
-                       continue;
-               if (BPF_OP(code) == BPF_EXIT)
+               if (BPF_CLASS(code) != BPF_JMP ||
+                   BPF_OP(code) == BPF_EXIT)
                        continue;
+               /* Adjust offset of jmps if we cross patch boundaries. */
                if (BPF_OP(code) == BPF_CALL) {
-                       if (insn->src_reg == BPF_PSEUDO_CALL)
-                               pseudo_call = true;
-                       else
+                       if (insn->src_reg != BPF_PSEUDO_CALL)
                                continue;
+                       ret = bpf_adj_delta_to_imm(insn, pos, delta, i,
+                                                  probe_pass);
                } else {
-                       pseudo_call = false;
+                       ret = bpf_adj_delta_to_off(insn, pos, delta, i,
+                                                  probe_pass);
                }
-               off = pseudo_call ? insn->imm : insn->off;
-
-               /* Adjust offset of jmps if we cross boundaries. */
-               if (i < pos && i + off + 1 > pos)
-                       off += delta;
-               else if (i > pos + delta && i + off + 1 <= pos + delta)
-                       off -= delta;
-
-               if (pseudo_call)
-                       insn->imm = off;
-               else
-                       insn->off = off;
+               if (ret)
+                       break;
        }
+
+       return ret;
 }
 
 struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
                                       const struct bpf_insn *patch, u32 len)
 {
        u32 insn_adj_cnt, insn_rest, insn_delta = len - 1;
+       const u32 cnt_max = S16_MAX;
        struct bpf_prog *prog_adj;
 
        /* Since our patchlet doesn't expand the image, we're done. */
@@ -269,6 +307,15 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
 
        insn_adj_cnt = prog->len + insn_delta;
 
+       /* Reject anything that would potentially let the insn->off
+        * target overflow when we have excessive program expansions.
+        * We need to probe here before we do any reallocation where
+        * we afterwards may not fail anymore.
+        */
+       if (insn_adj_cnt > cnt_max &&
+           bpf_adj_branches(prog, off, insn_delta, true))
+               return NULL;
+
        /* Several new instructions need to be inserted. Make room
         * for them. Likely, there's no need for a new allocation as
         * last page could have large enough tailroom.
@@ -294,7 +341,11 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
                sizeof(*patch) * insn_rest);
        memcpy(prog_adj->insnsi + off, patch, sizeof(*patch) * len);
 
-       bpf_adj_branches(prog_adj, off, insn_delta);
+       /* We are guaranteed to not fail at this point, otherwise
+        * the ship has sailed to reverse to the original state. An
+        * overflow cannot happen at this point.
+        */
+       BUG_ON(bpf_adj_branches(prog_adj, off, insn_delta, false));
 
        return prog_adj;
 }
@@ -633,23 +684,6 @@ static int bpf_jit_blind_insn(const struct bpf_insn *from,
                *to++ = BPF_JMP_REG(from->code, from->dst_reg, BPF_REG_AX, off);
                break;
 
-       case BPF_LD | BPF_ABS | BPF_W:
-       case BPF_LD | BPF_ABS | BPF_H:
-       case BPF_LD | BPF_ABS | BPF_B:
-               *to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm);
-               *to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
-               *to++ = BPF_LD_IND(from->code, BPF_REG_AX, 0);
-               break;
-
-       case BPF_LD | BPF_IND | BPF_W:
-       case BPF_LD | BPF_IND | BPF_H:
-       case BPF_LD | BPF_IND | BPF_B:
-               *to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm);
-               *to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
-               *to++ = BPF_ALU32_REG(BPF_ADD, BPF_REG_AX, from->src_reg);
-               *to++ = BPF_LD_IND(from->code, BPF_REG_AX, 0);
-               break;
-
        case BPF_LD | BPF_IMM | BPF_DW:
                *to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ aux[1].imm);
                *to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
@@ -890,14 +924,7 @@ EXPORT_SYMBOL_GPL(__bpf_call_base);
        INSN_3(LDX, MEM, W),                    \
        INSN_3(LDX, MEM, DW),                   \
        /*   Immediate based. */                \
-       INSN_3(LD, IMM, DW),                    \
-       /*   Misc (old cBPF carry-over). */     \
-       INSN_3(LD, ABS, B),                     \
-       INSN_3(LD, ABS, H),                     \
-       INSN_3(LD, ABS, W),                     \
-       INSN_3(LD, IND, B),                     \
-       INSN_3(LD, IND, H),                     \
-       INSN_3(LD, IND, W)
+       INSN_3(LD, IMM, DW)
 
 bool bpf_opcode_in_insntable(u8 code)
 {
@@ -907,6 +934,13 @@ bool bpf_opcode_in_insntable(u8 code)
                [0 ... 255] = false,
                /* Now overwrite non-defaults ... */
                BPF_INSN_MAP(BPF_INSN_2_TBL, BPF_INSN_3_TBL),
+               /* UAPI exposed, but rewritten opcodes. cBPF carry-over. */
+               [BPF_LD | BPF_ABS | BPF_B] = true,
+               [BPF_LD | BPF_ABS | BPF_H] = true,
+               [BPF_LD | BPF_ABS | BPF_W] = true,
+               [BPF_LD | BPF_IND | BPF_B] = true,
+               [BPF_LD | BPF_IND | BPF_H] = true,
+               [BPF_LD | BPF_IND | BPF_W] = true,
        };
 #undef BPF_INSN_3_TBL
 #undef BPF_INSN_2_TBL
@@ -937,8 +971,6 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack)
 #undef BPF_INSN_3_LBL
 #undef BPF_INSN_2_LBL
        u32 tail_call_cnt = 0;
-       void *ptr;
-       int off;
 
 #define CONT    ({ insn++; goto select_insn; })
 #define CONT_JMP ({ insn++; goto select_insn; })
@@ -1265,67 +1297,6 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack)
                atomic64_add((u64) SRC, (atomic64_t *)(unsigned long)
                             (DST + insn->off));
                CONT;
-       LD_ABS_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + imm32)) */
-               off = IMM;
-load_word:
-               /* BPF_LD + BPD_ABS and BPF_LD + BPF_IND insns are only
-                * appearing in the programs where ctx == skb
-                * (see may_access_skb() in the verifier). All programs
-                * keep 'ctx' in regs[BPF_REG_CTX] == BPF_R6,
-                * bpf_convert_filter() saves it in BPF_R6, internal BPF
-                * verifier will check that BPF_R6 == ctx.
-                *
-                * BPF_ABS and BPF_IND are wrappers of function calls,
-                * so they scratch BPF_R1-BPF_R5 registers, preserve
-                * BPF_R6-BPF_R9, and store return value into BPF_R0.
-                *
-                * Implicit input:
-                *   ctx == skb == BPF_R6 == CTX
-                *
-                * Explicit input:
-                *   SRC == any register
-                *   IMM == 32-bit immediate
-                *
-                * Output:
-                *   BPF_R0 - 8/16/32-bit skb data converted to cpu endianness
-                */
-
-               ptr = bpf_load_pointer((struct sk_buff *) (unsigned long) CTX, off, 4, &tmp);
-               if (likely(ptr != NULL)) {
-                       BPF_R0 = get_unaligned_be32(ptr);
-                       CONT;
-               }
-
-               return 0;
-       LD_ABS_H: /* BPF_R0 = ntohs(*(u16 *) (skb->data + imm32)) */
-               off = IMM;
-load_half:
-               ptr = bpf_load_pointer((struct sk_buff *) (unsigned long) CTX, off, 2, &tmp);
-               if (likely(ptr != NULL)) {
-                       BPF_R0 = get_unaligned_be16(ptr);
-                       CONT;
-               }
-
-               return 0;
-       LD_ABS_B: /* BPF_R0 = *(u8 *) (skb->data + imm32) */
-               off = IMM;
-load_byte:
-               ptr = bpf_load_pointer((struct sk_buff *) (unsigned long) CTX, off, 1, &tmp);
-               if (likely(ptr != NULL)) {
-                       BPF_R0 = *(u8 *)ptr;
-                       CONT;
-               }
-
-               return 0;
-       LD_IND_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + src_reg + imm32)) */
-               off = IMM + SRC;
-               goto load_word;
-       LD_IND_H: /* BPF_R0 = ntohs(*(u16 *) (skb->data + src_reg + imm32)) */
-               off = IMM + SRC;
-               goto load_half;
-       LD_IND_B: /* BPF_R0 = *(u8 *) (skb->data + src_reg + imm32) */
-               off = IMM + SRC;
-               goto load_byte;
 
        default_label:
                /* If we ever reach this, we have a bug somewhere. Die hard here
@@ -1572,13 +1543,32 @@ int bpf_prog_array_length(struct bpf_prog_array __rcu *progs)
        return cnt;
 }
 
+static bool bpf_prog_array_copy_core(struct bpf_prog **prog,
+                                    u32 *prog_ids,
+                                    u32 request_cnt)
+{
+       int i = 0;
+
+       for (; *prog; prog++) {
+               if (*prog == &dummy_bpf_prog.prog)
+                       continue;
+               prog_ids[i] = (*prog)->aux->id;
+               if (++i == request_cnt) {
+                       prog++;
+                       break;
+               }
+       }
+
+       return !!(*prog);
+}
+
 int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs,
                                __u32 __user *prog_ids, u32 cnt)
 {
        struct bpf_prog **prog;
        unsigned long err = 0;
-       u32 i = 0, *ids;
        bool nospc;
+       u32 *ids;
 
        /* users of this function are doing:
         * cnt = bpf_prog_array_length();
@@ -1595,16 +1585,7 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs,
                return -ENOMEM;
        rcu_read_lock();
        prog = rcu_dereference(progs)->progs;
-       for (; *prog; prog++) {
-               if (*prog == &dummy_bpf_prog.prog)
-                       continue;
-               ids[i] = (*prog)->aux->id;
-               if (++i == cnt) {
-                       prog++;
-                       break;
-               }
-       }
-       nospc = !!(*prog);
+       nospc = bpf_prog_array_copy_core(prog, ids, cnt);
        rcu_read_unlock();
        err = copy_to_user(prog_ids, ids, cnt * sizeof(u32));
        kfree(ids);
@@ -1683,22 +1664,25 @@ int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
 }
 
 int bpf_prog_array_copy_info(struct bpf_prog_array __rcu *array,
-                            __u32 __user *prog_ids, u32 request_cnt,
-                            __u32 __user *prog_cnt)
+                            u32 *prog_ids, u32 request_cnt,
+                            u32 *prog_cnt)
 {
+       struct bpf_prog **prog;
        u32 cnt = 0;
 
        if (array)
                cnt = bpf_prog_array_length(array);
 
-       if (copy_to_user(prog_cnt, &cnt, sizeof(cnt)))
-               return -EFAULT;
+       *prog_cnt = cnt;
 
        /* return early if user requested only program count or nothing to copy */
        if (!request_cnt || !cnt)
                return 0;
 
-       return bpf_prog_array_copy_to_user(array, prog_ids, request_cnt);
+       /* this function is called under trace/bpf_trace.c: bpf_event_mutex */
+       prog = rcu_dereference_check(array, 1)->progs;
+       return bpf_prog_array_copy_core(prog, prog_ids, request_cnt) ? -ENOSPC
+                                                                    : 0;
 }
 
 static void bpf_prog_free_deferred(struct work_struct *work)
@@ -1709,6 +1693,10 @@ static void bpf_prog_free_deferred(struct work_struct *work)
        aux = container_of(work, struct bpf_prog_aux, work);
        if (bpf_prog_is_dev_bound(aux))
                bpf_prog_offload_destroy(aux->prog);
+#ifdef CONFIG_PERF_EVENTS
+       if (aux->prog->has_callchain_buf)
+               put_callchain_buffers();
+#endif
        for (i = 0; i < aux->func_cnt; i++)
                bpf_jit_free(aux->func[i]);
        if (aux->func_cnt) {
@@ -1769,6 +1757,7 @@ const struct bpf_func_proto bpf_get_current_pid_tgid_proto __weak;
 const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak;
 const struct bpf_func_proto bpf_get_current_comm_proto __weak;
 const struct bpf_func_proto bpf_sock_map_update_proto __weak;
+const struct bpf_func_proto bpf_sock_hash_update_proto __weak;
 
 const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void)
 {
@@ -1781,6 +1770,7 @@ bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
 {
        return -ENOTSUPP;
 }
+EXPORT_SYMBOL_GPL(bpf_event_output);
 
 /* Always built-in helper functions. */
 const struct bpf_func_proto bpf_tail_call_proto = {
@@ -1827,9 +1817,3 @@ int __weak skb_copy_bits(const struct sk_buff *skb, int offset, void *to,
 #include <linux/bpf_trace.h>
 
 EXPORT_TRACEPOINT_SYMBOL_GPL(xdp_exception);
-
-/* These are only used within the BPF_SYSCALL code */
-#ifdef CONFIG_BPF_SYSCALL
-EXPORT_TRACEPOINT_SYMBOL_GPL(bpf_prog_get_type);
-EXPORT_TRACEPOINT_SYMBOL_GPL(bpf_prog_put_rcu);
-#endif
index a41343009ccce430b05e3df4430260fcb5dcb264..ed13645bd80c48ea3e16e999cbd598182e934e63 100644 (file)
@@ -429,13 +429,6 @@ int bpf_obj_pin_user(u32 ufd, const char __user *pathname)
        ret = bpf_obj_do_pin(pname, raw, type);
        if (ret != 0)
                bpf_any_put(raw, type);
-       if ((trace_bpf_obj_pin_prog_enabled() ||
-            trace_bpf_obj_pin_map_enabled()) && !ret) {
-               if (type == BPF_TYPE_PROG)
-                       trace_bpf_obj_pin_prog(raw, ufd, pname);
-               if (type == BPF_TYPE_MAP)
-                       trace_bpf_obj_pin_map(raw, ufd, pname);
-       }
 out:
        putname(pname);
        return ret;
@@ -502,15 +495,8 @@ int bpf_obj_get_user(const char __user *pathname, int flags)
        else
                goto out;
 
-       if (ret < 0) {
+       if (ret < 0)
                bpf_any_put(raw, type);
-       } else if (trace_bpf_obj_get_prog_enabled() ||
-                  trace_bpf_obj_get_map_enabled()) {
-               if (type == BPF_TYPE_PROG)
-                       trace_bpf_obj_get_prog(raw, ret, pname);
-               if (type == BPF_TYPE_MAP)
-                       trace_bpf_obj_get_map(raw, ret, pname);
-       }
 out:
        putname(pname);
        return ret;
index c9401075b58c842d7a6e239d51a25283d1f53b3f..ac747d5cf7c68b196c3e37a1c58a9e23bc5fd786 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 Netronome Systems, Inc.
+ * Copyright (C) 2017-2018 Netronome Systems, Inc.
  *
  * This software is licensed under the GNU General License Version 2,
  * June 1991 as shown in the file COPYING in the top-level directory of this
@@ -474,8 +474,10 @@ bool bpf_offload_dev_match(struct bpf_prog *prog, struct bpf_map *map)
        struct bpf_prog_offload *offload;
        bool ret;
 
-       if (!bpf_prog_is_dev_bound(prog->aux) || !bpf_map_is_dev_bound(map))
+       if (!bpf_prog_is_dev_bound(prog->aux))
                return false;
+       if (!bpf_map_is_dev_bound(map))
+               return bpf_map_offload_neutral(map);
 
        down_read(&bpf_devs_lock);
        offload = prog->aux->offload;
index 8dd9210d7db7851da0eb79b950803fedff9d89f3..cd832250a47844e10041542518093f147da60189 100644 (file)
 #include <net/tcp.h>
 #include <linux/ptr_ring.h>
 #include <net/inet_common.h>
+#include <linux/sched/signal.h>
 
 #define SOCK_CREATE_FLAG_MASK \
        (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
 
-struct bpf_stab {
-       struct bpf_map map;
-       struct sock **sock_map;
+struct bpf_sock_progs {
        struct bpf_prog *bpf_tx_msg;
        struct bpf_prog *bpf_parse;
        struct bpf_prog *bpf_verdict;
 };
 
+struct bpf_stab {
+       struct bpf_map map;
+       struct sock **sock_map;
+       struct bpf_sock_progs progs;
+};
+
+struct bucket {
+       struct hlist_head head;
+       raw_spinlock_t lock;
+};
+
+struct bpf_htab {
+       struct bpf_map map;
+       struct bucket *buckets;
+       atomic_t count;
+       u32 n_buckets;
+       u32 elem_size;
+       struct bpf_sock_progs progs;
+};
+
+struct htab_elem {
+       struct rcu_head rcu;
+       struct hlist_node hash_node;
+       u32 hash;
+       struct sock *sk;
+       char key[0];
+};
+
 enum smap_psock_state {
        SMAP_TX_RUNNING,
 };
@@ -62,6 +89,8 @@ enum smap_psock_state {
 struct smap_psock_map_entry {
        struct list_head list;
        struct sock **entry;
+       struct htab_elem *hash_link;
+       struct bpf_htab *htab;
 };
 
 struct smap_psock {
@@ -190,6 +219,12 @@ static void bpf_tcp_release(struct sock *sk)
        rcu_read_unlock();
 }
 
+static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
+{
+       atomic_dec(&htab->count);
+       kfree_rcu(l, rcu);
+}
+
 static void bpf_tcp_close(struct sock *sk, long timeout)
 {
        void (*close_fun)(struct sock *sk, long timeout);
@@ -226,10 +261,16 @@ static void bpf_tcp_close(struct sock *sk, long timeout)
        }
 
        list_for_each_entry_safe(e, tmp, &psock->maps, list) {
-               osk = cmpxchg(e->entry, sk, NULL);
-               if (osk == sk) {
-                       list_del(&e->list);
-                       smap_release_sock(psock, sk);
+               if (e->entry) {
+                       osk = cmpxchg(e->entry, sk, NULL);
+                       if (osk == sk) {
+                               list_del(&e->list);
+                               smap_release_sock(psock, sk);
+                       }
+               } else {
+                       hlist_del_rcu(&e->hash_link->hash_node);
+                       smap_release_sock(psock, e->hash_link->sk);
+                       free_htab_elem(e->htab, e->hash_link);
                }
        }
        write_unlock_bh(&sk->sk_callback_lock);
@@ -325,6 +366,9 @@ static int bpf_tcp_push(struct sock *sk, int apply_bytes,
                        if (ret > 0) {
                                if (apply)
                                        apply_bytes -= ret;
+
+                               sg->offset += ret;
+                               sg->length -= ret;
                                size -= ret;
                                offset += ret;
                                if (uncharge)
@@ -332,8 +376,6 @@ static int bpf_tcp_push(struct sock *sk, int apply_bytes,
                                goto retry;
                        }
 
-                       sg->length = size;
-                       sg->offset = offset;
                        return ret;
                }
 
@@ -391,7 +433,8 @@ static void return_mem_sg(struct sock *sk, int bytes, struct sk_msg_buff *md)
        } while (i != md->sg_end);
 }
 
-static void free_bytes_sg(struct sock *sk, int bytes, struct sk_msg_buff *md)
+static void free_bytes_sg(struct sock *sk, int bytes,
+                         struct sk_msg_buff *md, bool charge)
 {
        struct scatterlist *sg = md->sg_data;
        int i = md->sg_start, free;
@@ -401,11 +444,13 @@ static void free_bytes_sg(struct sock *sk, int bytes, struct sk_msg_buff *md)
                if (bytes < free) {
                        sg[i].length -= bytes;
                        sg[i].offset += bytes;
-                       sk_mem_uncharge(sk, bytes);
+                       if (charge)
+                               sk_mem_uncharge(sk, bytes);
                        break;
                }
 
-               sk_mem_uncharge(sk, sg[i].length);
+               if (charge)
+                       sk_mem_uncharge(sk, sg[i].length);
                put_page(sg_page(&sg[i]));
                bytes -= sg[i].length;
                sg[i].length = 0;
@@ -416,6 +461,7 @@ static void free_bytes_sg(struct sock *sk, int bytes, struct sk_msg_buff *md)
                if (i == MAX_SKB_FRAGS)
                        i = 0;
        }
+       md->sg_start = i;
 }
 
 static int free_sg(struct sock *sk, int start, struct sk_msg_buff *md)
@@ -455,7 +501,7 @@ static int free_curr_sg(struct sock *sk, struct sk_msg_buff *md)
 static int bpf_map_msg_verdict(int _rc, struct sk_msg_buff *md)
 {
        return ((_rc == SK_PASS) ?
-              (md->map ? __SK_REDIRECT : __SK_PASS) :
+              (md->sk_redir ? __SK_REDIRECT : __SK_PASS) :
               __SK_DROP);
 }
 
@@ -523,8 +569,6 @@ static int bpf_tcp_ingress(struct sock *sk, int apply_bytes,
        i = md->sg_start;
 
        do {
-               r->sg_data[i] = md->sg_data[i];
-
                size = (apply && apply_bytes < md->sg_data[i].length) ?
                        apply_bytes : md->sg_data[i].length;
 
@@ -535,6 +579,7 @@ static int bpf_tcp_ingress(struct sock *sk, int apply_bytes,
                }
 
                sk_mem_charge(sk, size);
+               r->sg_data[i] = md->sg_data[i];
                r->sg_data[i].length = size;
                md->sg_data[i].length -= size;
                md->sg_data[i].offset += size;
@@ -575,10 +620,10 @@ static int bpf_tcp_sendmsg_do_redirect(struct sock *sk, int send,
                                       struct sk_msg_buff *md,
                                       int flags)
 {
+       bool ingress = !!(md->flags & BPF_F_INGRESS);
        struct smap_psock *psock;
        struct scatterlist *sg;
-       int i, err, free = 0;
-       bool ingress = !!(md->flags & BPF_F_INGRESS);
+       int err = 0;
 
        sg = md->sg_data;
 
@@ -606,16 +651,8 @@ static int bpf_tcp_sendmsg_do_redirect(struct sock *sk, int send,
 out_rcu:
        rcu_read_unlock();
 out:
-       i = md->sg_start;
-       while (sg[i].length) {
-               free += sg[i].length;
-               put_page(sg_page(&sg[i]));
-               sg[i].length = 0;
-               i++;
-               if (i == MAX_SKB_FRAGS)
-                       i = 0;
-       }
-       return free;
+       free_bytes_sg(NULL, send, md, false);
+       return err;
 }
 
 static inline void bpf_md_init(struct smap_psock *psock)
@@ -700,19 +737,26 @@ static int bpf_exec_tx_verdict(struct smap_psock *psock,
                err = bpf_tcp_sendmsg_do_redirect(redir, send, m, flags);
                lock_sock(sk);
 
+               if (unlikely(err < 0)) {
+                       free_start_sg(sk, m);
+                       psock->sg_size = 0;
+                       if (!cork)
+                               *copied -= send;
+               } else {
+                       psock->sg_size -= send;
+               }
+
                if (cork) {
                        free_start_sg(sk, m);
+                       psock->sg_size = 0;
                        kfree(m);
                        m = NULL;
+                       err = 0;
                }
-               if (unlikely(err))
-                       *copied -= err;
-               else
-                       psock->sg_size -= send;
                break;
        case __SK_DROP:
        default:
-               free_bytes_sg(sk, send, m);
+               free_bytes_sg(sk, send, m, true);
                apply_bytes_dec(psock, send);
                *copied -= send;
                psock->sg_size -= send;
@@ -732,6 +776,26 @@ static int bpf_exec_tx_verdict(struct smap_psock *psock,
        return err;
 }
 
+static int bpf_wait_data(struct sock *sk,
+                        struct smap_psock *psk, int flags,
+                        long timeo, int *err)
+{
+       int rc;
+
+       DEFINE_WAIT_FUNC(wait, woken_wake_function);
+
+       add_wait_queue(sk_sleep(sk), &wait);
+       sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
+       rc = sk_wait_event(sk, &timeo,
+                          !list_empty(&psk->ingress) ||
+                          !skb_queue_empty(&sk->sk_receive_queue),
+                          &wait);
+       sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
+       remove_wait_queue(sk_sleep(sk), &wait);
+
+       return rc;
+}
+
 static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
                           int nonblock, int flags, int *addr_len)
 {
@@ -755,6 +819,7 @@ static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
                return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
 
        lock_sock(sk);
+bytes_ready:
        while (copied != len) {
                struct scatterlist *sg;
                struct sk_msg_buff *md;
@@ -809,6 +874,28 @@ static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
                }
        }
 
+       if (!copied) {
+               long timeo;
+               int data;
+               int err = 0;
+
+               timeo = sock_rcvtimeo(sk, nonblock);
+               data = bpf_wait_data(sk, psock, flags, timeo, &err);
+
+               if (data) {
+                       if (!skb_queue_empty(&sk->sk_receive_queue)) {
+                               release_sock(sk);
+                               smap_release_sock(psock, sk);
+                               copied = tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
+                               return copied;
+                       }
+                       goto bytes_ready;
+               }
+
+               if (err)
+                       copied = err;
+       }
+
        release_sock(sk);
        smap_release_sock(psock, sk);
        return copied;
@@ -1045,7 +1132,7 @@ static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb)
         * when we orphan the skb so that we don't have the possibility
         * to reference a stale map.
         */
-       TCP_SKB_CB(skb)->bpf.map = NULL;
+       TCP_SKB_CB(skb)->bpf.sk_redir = NULL;
        skb->sk = psock->sock;
        bpf_compute_data_pointers(skb);
        preempt_disable();
@@ -1055,7 +1142,7 @@ static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb)
 
        /* Moving return codes from UAPI namespace into internal namespace */
        return rc == SK_PASS ?
-               (TCP_SKB_CB(skb)->bpf.map ? __SK_REDIRECT : __SK_PASS) :
+               (TCP_SKB_CB(skb)->bpf.sk_redir ? __SK_REDIRECT : __SK_PASS) :
                __SK_DROP;
 }
 
@@ -1325,7 +1412,6 @@ static int smap_init_sock(struct smap_psock *psock,
 }
 
 static void smap_init_progs(struct smap_psock *psock,
-                           struct bpf_stab *stab,
                            struct bpf_prog *verdict,
                            struct bpf_prog *parse)
 {
@@ -1403,14 +1489,13 @@ static void smap_gc_work(struct work_struct *w)
        kfree(psock);
 }
 
-static struct smap_psock *smap_init_psock(struct sock *sock,
-                                         struct bpf_stab *stab)
+static struct smap_psock *smap_init_psock(struct sock *sock, int node)
 {
        struct smap_psock *psock;
 
        psock = kzalloc_node(sizeof(struct smap_psock),
                             GFP_ATOMIC | __GFP_NOWARN,
-                            stab->map.numa_node);
+                            node);
        if (!psock)
                return ERR_PTR(-ENOMEM);
 
@@ -1442,9 +1527,6 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
            attr->value_size != 4 || attr->map_flags & ~SOCK_CREATE_FLAG_MASK)
                return ERR_PTR(-EINVAL);
 
-       if (attr->value_size > KMALLOC_MAX_SIZE)
-               return ERR_PTR(-E2BIG);
-
        err = bpf_tcp_ulp_register();
        if (err && err != -EEXIST)
                return ERR_PTR(err);
@@ -1481,12 +1563,14 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
        return ERR_PTR(err);
 }
 
-static void smap_list_remove(struct smap_psock *psock, struct sock **entry)
+static void smap_list_remove(struct smap_psock *psock,
+                            struct sock **entry,
+                            struct htab_elem *hash_link)
 {
        struct smap_psock_map_entry *e, *tmp;
 
        list_for_each_entry_safe(e, tmp, &psock->maps, list) {
-               if (e->entry == entry) {
+               if (e->entry == entry || e->hash_link == hash_link) {
                        list_del(&e->list);
                        break;
                }
@@ -1524,7 +1608,7 @@ static void sock_map_free(struct bpf_map *map)
                 * to be null and queued for garbage collection.
                 */
                if (likely(psock)) {
-                       smap_list_remove(psock, &stab->sock_map[i]);
+                       smap_list_remove(psock, &stab->sock_map[i], NULL);
                        smap_release_sock(psock, sock);
                }
                write_unlock_bh(&sock->sk_callback_lock);
@@ -1583,7 +1667,7 @@ static int sock_map_delete_elem(struct bpf_map *map, void *key)
 
        if (psock->bpf_parse)
                smap_stop_sock(psock, sock);
-       smap_list_remove(psock, &stab->sock_map[k]);
+       smap_list_remove(psock, &stab->sock_map[k], NULL);
        smap_release_sock(psock, sock);
 out:
        write_unlock_bh(&sock->sk_callback_lock);
@@ -1618,40 +1702,26 @@ static int sock_map_delete_elem(struct bpf_map *map, void *key)
  *  - sock_map must use READ_ONCE and (cmp)xchg operations
  *  - BPF verdict/parse programs must use READ_ONCE and xchg operations
  */
-static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
-                                   struct bpf_map *map,
-                                   void *key, u64 flags)
+
+static int __sock_map_ctx_update_elem(struct bpf_map *map,
+                                     struct bpf_sock_progs *progs,
+                                     struct sock *sock,
+                                     struct sock **map_link,
+                                     void *key)
 {
-       struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
-       struct smap_psock_map_entry *e = NULL;
        struct bpf_prog *verdict, *parse, *tx_msg;
-       struct sock *osock, *sock;
+       struct smap_psock_map_entry *e = NULL;
        struct smap_psock *psock;
-       u32 i = *(u32 *)key;
        bool new = false;
        int err;
 
-       if (unlikely(flags > BPF_EXIST))
-               return -EINVAL;
-
-       if (unlikely(i >= stab->map.max_entries))
-               return -E2BIG;
-
-       sock = READ_ONCE(stab->sock_map[i]);
-       if (flags == BPF_EXIST && !sock)
-               return -ENOENT;
-       else if (flags == BPF_NOEXIST && sock)
-               return -EEXIST;
-
-       sock = skops->sk;
-
        /* 1. If sock map has BPF programs those will be inherited by the
         * sock being added. If the sock is already attached to BPF programs
         * this results in an error.
         */
-       verdict = READ_ONCE(stab->bpf_verdict);
-       parse = READ_ONCE(stab->bpf_parse);
-       tx_msg = READ_ONCE(stab->bpf_tx_msg);
+       verdict = READ_ONCE(progs->bpf_verdict);
+       parse = READ_ONCE(progs->bpf_parse);
+       tx_msg = READ_ONCE(progs->bpf_tx_msg);
 
        if (parse && verdict) {
                /* bpf prog refcnt may be zero if a concurrent attach operation
@@ -1659,11 +1729,11 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
                 * we increment the refcnt. If this is the case abort with an
                 * error.
                 */
-               verdict = bpf_prog_inc_not_zero(stab->bpf_verdict);
+               verdict = bpf_prog_inc_not_zero(verdict);
                if (IS_ERR(verdict))
                        return PTR_ERR(verdict);
 
-               parse = bpf_prog_inc_not_zero(stab->bpf_parse);
+               parse = bpf_prog_inc_not_zero(parse);
                if (IS_ERR(parse)) {
                        bpf_prog_put(verdict);
                        return PTR_ERR(parse);
@@ -1671,12 +1741,12 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
        }
 
        if (tx_msg) {
-               tx_msg = bpf_prog_inc_not_zero(stab->bpf_tx_msg);
+               tx_msg = bpf_prog_inc_not_zero(tx_msg);
                if (IS_ERR(tx_msg)) {
-                       if (verdict)
-                               bpf_prog_put(verdict);
-                       if (parse)
+                       if (parse && verdict) {
                                bpf_prog_put(parse);
+                               bpf_prog_put(verdict);
+                       }
                        return PTR_ERR(tx_msg);
                }
        }
@@ -1704,7 +1774,7 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
                        goto out_progs;
                }
        } else {
-               psock = smap_init_psock(sock, stab);
+               psock = smap_init_psock(sock, map->numa_node);
                if (IS_ERR(psock)) {
                        err = PTR_ERR(psock);
                        goto out_progs;
@@ -1714,12 +1784,13 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
                new = true;
        }
 
-       e = kzalloc(sizeof(*e), GFP_ATOMIC | __GFP_NOWARN);
-       if (!e) {
-               err = -ENOMEM;
-               goto out_progs;
+       if (map_link) {
+               e = kzalloc(sizeof(*e), GFP_ATOMIC | __GFP_NOWARN);
+               if (!e) {
+                       err = -ENOMEM;
+                       goto out_progs;
+               }
        }
-       e->entry = &stab->sock_map[i];
 
        /* 3. At this point we have a reference to a valid psock that is
         * running. Attach any BPF programs needed.
@@ -1736,7 +1807,7 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
                err = smap_init_sock(psock, sock);
                if (err)
                        goto out_free;
-               smap_init_progs(psock, stab, verdict, parse);
+               smap_init_progs(psock, verdict, parse);
                smap_start_sock(psock, sock);
        }
 
@@ -1745,50 +1816,94 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
         * it with. Because we can only have a single set of programs if
         * old_sock has a strp we can stop it.
         */
-       list_add_tail(&e->list, &psock->maps);
+       if (map_link) {
+               e->entry = map_link;
+               list_add_tail(&e->list, &psock->maps);
+       }
+       write_unlock_bh(&sock->sk_callback_lock);
+       return err;
+out_free:
+       kfree(e);
+       smap_release_sock(psock, sock);
+out_progs:
+       if (parse && verdict) {
+               bpf_prog_put(parse);
+               bpf_prog_put(verdict);
+       }
+       if (tx_msg)
+               bpf_prog_put(tx_msg);
        write_unlock_bh(&sock->sk_callback_lock);
+       kfree(e);
+       return err;
+}
+
+static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
+                                   struct bpf_map *map,
+                                   void *key, u64 flags)
+{
+       struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
+       struct bpf_sock_progs *progs = &stab->progs;
+       struct sock *osock, *sock;
+       u32 i = *(u32 *)key;
+       int err;
+
+       if (unlikely(flags > BPF_EXIST))
+               return -EINVAL;
+
+       if (unlikely(i >= stab->map.max_entries))
+               return -E2BIG;
+
+       sock = READ_ONCE(stab->sock_map[i]);
+       if (flags == BPF_EXIST && !sock)
+               return -ENOENT;
+       else if (flags == BPF_NOEXIST && sock)
+               return -EEXIST;
+
+       sock = skops->sk;
+       err = __sock_map_ctx_update_elem(map, progs, sock, &stab->sock_map[i],
+                                        key);
+       if (err)
+               goto out;
 
        osock = xchg(&stab->sock_map[i], sock);
        if (osock) {
                struct smap_psock *opsock = smap_psock_sk(osock);
 
                write_lock_bh(&osock->sk_callback_lock);
-               smap_list_remove(opsock, &stab->sock_map[i]);
+               smap_list_remove(opsock, &stab->sock_map[i], NULL);
                smap_release_sock(opsock, osock);
                write_unlock_bh(&osock->sk_callback_lock);
        }
-       return 0;
-out_free:
-       smap_release_sock(psock, sock);
-out_progs:
-       if (verdict)
-               bpf_prog_put(verdict);
-       if (parse)
-               bpf_prog_put(parse);
-       if (tx_msg)
-               bpf_prog_put(tx_msg);
-       write_unlock_bh(&sock->sk_callback_lock);
-       kfree(e);
+out:
        return err;
 }
 
 int sock_map_prog(struct bpf_map *map, struct bpf_prog *prog, u32 type)
 {
-       struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
+       struct bpf_sock_progs *progs;
        struct bpf_prog *orig;
 
-       if (unlikely(map->map_type != BPF_MAP_TYPE_SOCKMAP))
+       if (map->map_type == BPF_MAP_TYPE_SOCKMAP) {
+               struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
+
+               progs = &stab->progs;
+       } else if (map->map_type == BPF_MAP_TYPE_SOCKHASH) {
+               struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+
+               progs = &htab->progs;
+       } else {
                return -EINVAL;
+       }
 
        switch (type) {
        case BPF_SK_MSG_VERDICT:
-               orig = xchg(&stab->bpf_tx_msg, prog);
+               orig = xchg(&progs->bpf_tx_msg, prog);
                break;
        case BPF_SK_SKB_STREAM_PARSER:
-               orig = xchg(&stab->bpf_parse, prog);
+               orig = xchg(&progs->bpf_parse, prog);
                break;
        case BPF_SK_SKB_STREAM_VERDICT:
-               orig = xchg(&stab->bpf_verdict, prog);
+               orig = xchg(&progs->bpf_verdict, prog);
                break;
        default:
                return -EOPNOTSUPP;
@@ -1834,23 +1949,423 @@ static int sock_map_update_elem(struct bpf_map *map,
        return err;
 }
 
-static void sock_map_release(struct bpf_map *map, struct file *map_file)
+static void sock_map_release(struct bpf_map *map)
 {
-       struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
+       struct bpf_sock_progs *progs;
        struct bpf_prog *orig;
 
-       orig = xchg(&stab->bpf_parse, NULL);
+       if (map->map_type == BPF_MAP_TYPE_SOCKMAP) {
+               struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
+
+               progs = &stab->progs;
+       } else {
+               struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+
+               progs = &htab->progs;
+       }
+
+       orig = xchg(&progs->bpf_parse, NULL);
        if (orig)
                bpf_prog_put(orig);
-       orig = xchg(&stab->bpf_verdict, NULL);
+       orig = xchg(&progs->bpf_verdict, NULL);
        if (orig)
                bpf_prog_put(orig);
 
-       orig = xchg(&stab->bpf_tx_msg, NULL);
+       orig = xchg(&progs->bpf_tx_msg, NULL);
        if (orig)
                bpf_prog_put(orig);
 }
 
+static struct bpf_map *sock_hash_alloc(union bpf_attr *attr)
+{
+       struct bpf_htab *htab;
+       int i, err;
+       u64 cost;
+
+       if (!capable(CAP_NET_ADMIN))
+               return ERR_PTR(-EPERM);
+
+       /* check sanity of attributes */
+       if (attr->max_entries == 0 || attr->value_size != 4 ||
+           attr->map_flags & ~SOCK_CREATE_FLAG_MASK)
+               return ERR_PTR(-EINVAL);
+
+       if (attr->key_size > MAX_BPF_STACK)
+               /* eBPF programs initialize keys on stack, so they cannot be
+                * larger than max stack size
+                */
+               return ERR_PTR(-E2BIG);
+
+       err = bpf_tcp_ulp_register();
+       if (err && err != -EEXIST)
+               return ERR_PTR(err);
+
+       htab = kzalloc(sizeof(*htab), GFP_USER);
+       if (!htab)
+               return ERR_PTR(-ENOMEM);
+
+       bpf_map_init_from_attr(&htab->map, attr);
+
+       htab->n_buckets = roundup_pow_of_two(htab->map.max_entries);
+       htab->elem_size = sizeof(struct htab_elem) +
+                         round_up(htab->map.key_size, 8);
+       err = -EINVAL;
+       if (htab->n_buckets == 0 ||
+           htab->n_buckets > U32_MAX / sizeof(struct bucket))
+               goto free_htab;
+
+       cost = (u64) htab->n_buckets * sizeof(struct bucket) +
+              (u64) htab->elem_size * htab->map.max_entries;
+
+       if (cost >= U32_MAX - PAGE_SIZE)
+               goto free_htab;
+
+       htab->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
+       err = bpf_map_precharge_memlock(htab->map.pages);
+       if (err)
+               goto free_htab;
+
+       err = -ENOMEM;
+       htab->buckets = bpf_map_area_alloc(
+                               htab->n_buckets * sizeof(struct bucket),
+                               htab->map.numa_node);
+       if (!htab->buckets)
+               goto free_htab;
+
+       for (i = 0; i < htab->n_buckets; i++) {
+               INIT_HLIST_HEAD(&htab->buckets[i].head);
+               raw_spin_lock_init(&htab->buckets[i].lock);
+       }
+
+       return &htab->map;
+free_htab:
+       kfree(htab);
+       return ERR_PTR(err);
+}
+
+static inline struct bucket *__select_bucket(struct bpf_htab *htab, u32 hash)
+{
+       return &htab->buckets[hash & (htab->n_buckets - 1)];
+}
+
+static inline struct hlist_head *select_bucket(struct bpf_htab *htab, u32 hash)
+{
+       return &__select_bucket(htab, hash)->head;
+}
+
+static void sock_hash_free(struct bpf_map *map)
+{
+       struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+       int i;
+
+       synchronize_rcu();
+
+       /* At this point no update, lookup or delete operations can happen.
+        * However, be aware we can still get a socket state event updates,
+        * and data ready callabacks that reference the psock from sk_user_data
+        * Also psock worker threads are still in-flight. So smap_release_sock
+        * will only free the psock after cancel_sync on the worker threads
+        * and a grace period expire to ensure psock is really safe to remove.
+        */
+       rcu_read_lock();
+       for (i = 0; i < htab->n_buckets; i++) {
+               struct hlist_head *head = select_bucket(htab, i);
+               struct hlist_node *n;
+               struct htab_elem *l;
+
+               hlist_for_each_entry_safe(l, n, head, hash_node) {
+                       struct sock *sock = l->sk;
+                       struct smap_psock *psock;
+
+                       hlist_del_rcu(&l->hash_node);
+                       write_lock_bh(&sock->sk_callback_lock);
+                       psock = smap_psock_sk(sock);
+                       /* This check handles a racing sock event that can get
+                        * the sk_callback_lock before this case but after xchg
+                        * causing the refcnt to hit zero and sock user data
+                        * (psock) to be null and queued for garbage collection.
+                        */
+                       if (likely(psock)) {
+                               smap_list_remove(psock, NULL, l);
+                               smap_release_sock(psock, sock);
+                       }
+                       write_unlock_bh(&sock->sk_callback_lock);
+                       kfree(l);
+               }
+       }
+       rcu_read_unlock();
+       bpf_map_area_free(htab->buckets);
+       kfree(htab);
+}
+
+static struct htab_elem *alloc_sock_hash_elem(struct bpf_htab *htab,
+                                             void *key, u32 key_size, u32 hash,
+                                             struct sock *sk,
+                                             struct htab_elem *old_elem)
+{
+       struct htab_elem *l_new;
+
+       if (atomic_inc_return(&htab->count) > htab->map.max_entries) {
+               if (!old_elem) {
+                       atomic_dec(&htab->count);
+                       return ERR_PTR(-E2BIG);
+               }
+       }
+       l_new = kmalloc_node(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN,
+                            htab->map.numa_node);
+       if (!l_new)
+               return ERR_PTR(-ENOMEM);
+
+       memcpy(l_new->key, key, key_size);
+       l_new->sk = sk;
+       l_new->hash = hash;
+       return l_new;
+}
+
+static struct htab_elem *lookup_elem_raw(struct hlist_head *head,
+                                        u32 hash, void *key, u32 key_size)
+{
+       struct htab_elem *l;
+
+       hlist_for_each_entry_rcu(l, head, hash_node) {
+               if (l->hash == hash && !memcmp(&l->key, key, key_size))
+                       return l;
+       }
+
+       return NULL;
+}
+
+static inline u32 htab_map_hash(const void *key, u32 key_len)
+{
+       return jhash(key, key_len, 0);
+}
+
+static int sock_hash_get_next_key(struct bpf_map *map,
+                                 void *key, void *next_key)
+{
+       struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+       struct htab_elem *l, *next_l;
+       struct hlist_head *h;
+       u32 hash, key_size;
+       int i = 0;
+
+       WARN_ON_ONCE(!rcu_read_lock_held());
+
+       key_size = map->key_size;
+       if (!key)
+               goto find_first_elem;
+       hash = htab_map_hash(key, key_size);
+       h = select_bucket(htab, hash);
+
+       l = lookup_elem_raw(h, hash, key, key_size);
+       if (!l)
+               goto find_first_elem;
+       next_l = hlist_entry_safe(
+                    rcu_dereference_raw(hlist_next_rcu(&l->hash_node)),
+                    struct htab_elem, hash_node);
+       if (next_l) {
+               memcpy(next_key, next_l->key, key_size);
+               return 0;
+       }
+
+       /* no more elements in this hash list, go to the next bucket */
+       i = hash & (htab->n_buckets - 1);
+       i++;
+
+find_first_elem:
+       /* iterate over buckets */
+       for (; i < htab->n_buckets; i++) {
+               h = select_bucket(htab, i);
+
+               /* pick first element in the bucket */
+               next_l = hlist_entry_safe(
+                               rcu_dereference_raw(hlist_first_rcu(h)),
+                               struct htab_elem, hash_node);
+               if (next_l) {
+                       /* if it's not empty, just return it */
+                       memcpy(next_key, next_l->key, key_size);
+                       return 0;
+               }
+       }
+
+       /* iterated over all buckets and all elements */
+       return -ENOENT;
+}
+
+static int sock_hash_ctx_update_elem(struct bpf_sock_ops_kern *skops,
+                                    struct bpf_map *map,
+                                    void *key, u64 map_flags)
+{
+       struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+       struct bpf_sock_progs *progs = &htab->progs;
+       struct htab_elem *l_new = NULL, *l_old;
+       struct smap_psock_map_entry *e = NULL;
+       struct hlist_head *head;
+       struct smap_psock *psock;
+       u32 key_size, hash;
+       struct sock *sock;
+       struct bucket *b;
+       int err;
+
+       sock = skops->sk;
+
+       if (sock->sk_type != SOCK_STREAM ||
+           sock->sk_protocol != IPPROTO_TCP)
+               return -EOPNOTSUPP;
+
+       if (unlikely(map_flags > BPF_EXIST))
+               return -EINVAL;
+
+       e = kzalloc(sizeof(*e), GFP_ATOMIC | __GFP_NOWARN);
+       if (!e)
+               return -ENOMEM;
+
+       WARN_ON_ONCE(!rcu_read_lock_held());
+       key_size = map->key_size;
+       hash = htab_map_hash(key, key_size);
+       b = __select_bucket(htab, hash);
+       head = &b->head;
+
+       err = __sock_map_ctx_update_elem(map, progs, sock, NULL, key);
+       if (err)
+               goto err;
+
+       /* bpf_map_update_elem() can be called in_irq() */
+       raw_spin_lock_bh(&b->lock);
+       l_old = lookup_elem_raw(head, hash, key, key_size);
+       if (l_old && map_flags == BPF_NOEXIST) {
+               err = -EEXIST;
+               goto bucket_err;
+       }
+       if (!l_old && map_flags == BPF_EXIST) {
+               err = -ENOENT;
+               goto bucket_err;
+       }
+
+       l_new = alloc_sock_hash_elem(htab, key, key_size, hash, sock, l_old);
+       if (IS_ERR(l_new)) {
+               err = PTR_ERR(l_new);
+               goto bucket_err;
+       }
+
+       psock = smap_psock_sk(sock);
+       if (unlikely(!psock)) {
+               err = -EINVAL;
+               goto bucket_err;
+       }
+
+       e->hash_link = l_new;
+       e->htab = container_of(map, struct bpf_htab, map);
+       list_add_tail(&e->list, &psock->maps);
+
+       /* add new element to the head of the list, so that
+        * concurrent search will find it before old elem
+        */
+       hlist_add_head_rcu(&l_new->hash_node, head);
+       if (l_old) {
+               psock = smap_psock_sk(l_old->sk);
+
+               hlist_del_rcu(&l_old->hash_node);
+               smap_list_remove(psock, NULL, l_old);
+               smap_release_sock(psock, l_old->sk);
+               free_htab_elem(htab, l_old);
+       }
+       raw_spin_unlock_bh(&b->lock);
+       return 0;
+bucket_err:
+       raw_spin_unlock_bh(&b->lock);
+err:
+       kfree(e);
+       psock = smap_psock_sk(sock);
+       if (psock)
+               smap_release_sock(psock, sock);
+       return err;
+}
+
+static int sock_hash_update_elem(struct bpf_map *map,
+                               void *key, void *value, u64 flags)
+{
+       struct bpf_sock_ops_kern skops;
+       u32 fd = *(u32 *)value;
+       struct socket *socket;
+       int err;
+
+       socket = sockfd_lookup(fd, &err);
+       if (!socket)
+               return err;
+
+       skops.sk = socket->sk;
+       if (!skops.sk) {
+               fput(socket->file);
+               return -EINVAL;
+       }
+
+       err = sock_hash_ctx_update_elem(&skops, map, key, flags);
+       fput(socket->file);
+       return err;
+}
+
+static int sock_hash_delete_elem(struct bpf_map *map, void *key)
+{
+       struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+       struct hlist_head *head;
+       struct bucket *b;
+       struct htab_elem *l;
+       u32 hash, key_size;
+       int ret = -ENOENT;
+
+       key_size = map->key_size;
+       hash = htab_map_hash(key, key_size);
+       b = __select_bucket(htab, hash);
+       head = &b->head;
+
+       raw_spin_lock_bh(&b->lock);
+       l = lookup_elem_raw(head, hash, key, key_size);
+       if (l) {
+               struct sock *sock = l->sk;
+               struct smap_psock *psock;
+
+               hlist_del_rcu(&l->hash_node);
+               write_lock_bh(&sock->sk_callback_lock);
+               psock = smap_psock_sk(sock);
+               /* This check handles a racing sock event that can get the
+                * sk_callback_lock before this case but after xchg happens
+                * causing the refcnt to hit zero and sock user data (psock)
+                * to be null and queued for garbage collection.
+                */
+               if (likely(psock)) {
+                       smap_list_remove(psock, NULL, l);
+                       smap_release_sock(psock, sock);
+               }
+               write_unlock_bh(&sock->sk_callback_lock);
+               free_htab_elem(htab, l);
+               ret = 0;
+       }
+       raw_spin_unlock_bh(&b->lock);
+       return ret;
+}
+
+struct sock  *__sock_hash_lookup_elem(struct bpf_map *map, void *key)
+{
+       struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+       struct hlist_head *head;
+       struct htab_elem *l;
+       u32 key_size, hash;
+       struct bucket *b;
+       struct sock *sk;
+
+       key_size = map->key_size;
+       hash = htab_map_hash(key, key_size);
+       b = __select_bucket(htab, hash);
+       head = &b->head;
+
+       raw_spin_lock_bh(&b->lock);
+       l = lookup_elem_raw(head, hash, key, key_size);
+       sk = l ? l->sk : NULL;
+       raw_spin_unlock_bh(&b->lock);
+       return sk;
+}
+
 const struct bpf_map_ops sock_map_ops = {
        .map_alloc = sock_map_alloc,
        .map_free = sock_map_free,
@@ -1858,7 +2373,16 @@ const struct bpf_map_ops sock_map_ops = {
        .map_get_next_key = sock_map_get_next_key,
        .map_update_elem = sock_map_update_elem,
        .map_delete_elem = sock_map_delete_elem,
-       .map_release = sock_map_release,
+       .map_release_uref = sock_map_release,
+};
+
+const struct bpf_map_ops sock_hash_ops = {
+       .map_alloc = sock_hash_alloc,
+       .map_free = sock_hash_free,
+       .map_lookup_elem = sock_map_lookup,
+       .map_get_next_key = sock_hash_get_next_key,
+       .map_update_elem = sock_hash_update_elem,
+       .map_delete_elem = sock_hash_delete_elem,
 };
 
 BPF_CALL_4(bpf_sock_map_update, struct bpf_sock_ops_kern *, bpf_sock,
@@ -1878,3 +2402,21 @@ const struct bpf_func_proto bpf_sock_map_update_proto = {
        .arg3_type      = ARG_PTR_TO_MAP_KEY,
        .arg4_type      = ARG_ANYTHING,
 };
+
+BPF_CALL_4(bpf_sock_hash_update, struct bpf_sock_ops_kern *, bpf_sock,
+          struct bpf_map *, map, void *, key, u64, flags)
+{
+       WARN_ON_ONCE(!rcu_read_lock_held());
+       return sock_hash_ctx_update_elem(bpf_sock, map, key, flags);
+}
+
+const struct bpf_func_proto bpf_sock_hash_update_proto = {
+       .func           = bpf_sock_hash_update,
+       .gpl_only       = false,
+       .pkt_access     = true,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_CTX,
+       .arg2_type      = ARG_CONST_MAP_PTR,
+       .arg3_type      = ARG_PTR_TO_MAP_KEY,
+       .arg4_type      = ARG_ANYTHING,
+};
index 57eeb1234b67e7dabd555e9562b0a0b59cd57abb..b59ace0f0f0912724a2b826a77a306f31fd7fb83 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/perf_event.h>
 #include <linux/elf.h>
 #include <linux/pagemap.h>
+#include <linux/irq_work.h>
 #include "percpu_freelist.h"
 
 #define STACK_CREATE_FLAG_MASK                                 \
@@ -32,6 +33,23 @@ struct bpf_stack_map {
        struct stack_map_bucket *buckets[];
 };
 
+/* irq_work to run up_read() for build_id lookup in nmi context */
+struct stack_map_irq_work {
+       struct irq_work irq_work;
+       struct rw_semaphore *sem;
+};
+
+static void do_up_read(struct irq_work *entry)
+{
+       struct stack_map_irq_work *work;
+
+       work = container_of(entry, struct stack_map_irq_work, irq_work);
+       up_read(work->sem);
+       work->sem = NULL;
+}
+
+static DEFINE_PER_CPU(struct stack_map_irq_work, up_read_work);
+
 static inline bool stack_map_use_build_id(struct bpf_map *map)
 {
        return (map->map_flags & BPF_F_STACK_BUILD_ID);
@@ -262,27 +280,32 @@ static int stack_map_get_build_id(struct vm_area_struct *vma,
        return ret;
 }
 
-static void stack_map_get_build_id_offset(struct bpf_map *map,
-                                         struct stack_map_bucket *bucket,
+static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
                                          u64 *ips, u32 trace_nr, bool user)
 {
        int i;
        struct vm_area_struct *vma;
-       struct bpf_stack_build_id *id_offs;
-
-       bucket->nr = trace_nr;
-       id_offs = (struct bpf_stack_build_id *)bucket->data;
+       bool in_nmi_ctx = in_nmi();
+       bool irq_work_busy = false;
+       struct stack_map_irq_work *work;
+
+       if (in_nmi_ctx) {
+               work = this_cpu_ptr(&up_read_work);
+               if (work->irq_work.flags & IRQ_WORK_BUSY)
+                       /* cannot queue more up_read, fallback */
+                       irq_work_busy = true;
+       }
 
        /*
-        * We cannot do up_read() in nmi context, so build_id lookup is
-        * only supported for non-nmi events. If at some point, it is
-        * possible to run find_vma() without taking the semaphore, we
-        * would like to allow build_id lookup in nmi context.
+        * We cannot do up_read() in nmi context. To do build_id lookup
+        * in nmi context, we need to run up_read() in irq_work. We use
+        * a percpu variable to do the irq_work. If the irq_work is
+        * already used by another lookup, we fall back to report ips.
         *
         * Same fallback is used for kernel stack (!user) on a stackmap
         * with build_id.
         */
-       if (!user || !current || !current->mm || in_nmi() ||
+       if (!user || !current || !current->mm || irq_work_busy ||
            down_read_trylock(&current->mm->mmap_sem) == 0) {
                /* cannot access current->mm, fall back to ips */
                for (i = 0; i < trace_nr; i++) {
@@ -304,7 +327,13 @@ static void stack_map_get_build_id_offset(struct bpf_map *map,
                        - vma->vm_start;
                id_offs[i].status = BPF_STACK_BUILD_ID_VALID;
        }
-       up_read(&current->mm->mmap_sem);
+
+       if (!in_nmi_ctx) {
+               up_read(&current->mm->mmap_sem);
+       } else {
+               work->sem = &current->mm->mmap_sem;
+               irq_work_queue(&work->irq_work);
+       }
 }
 
 BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
@@ -361,8 +390,10 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
                        pcpu_freelist_pop(&smap->freelist);
                if (unlikely(!new_bucket))
                        return -ENOMEM;
-               stack_map_get_build_id_offset(map, new_bucket, ips,
-                                             trace_nr, user);
+               new_bucket->nr = trace_nr;
+               stack_map_get_build_id_offset(
+                       (struct bpf_stack_build_id *)new_bucket->data,
+                       ips, trace_nr, user);
                trace_len = trace_nr * sizeof(struct bpf_stack_build_id);
                if (hash_matches && bucket->nr == trace_nr &&
                    memcmp(bucket->data, new_bucket->data, trace_len) == 0) {
@@ -405,6 +436,73 @@ const struct bpf_func_proto bpf_get_stackid_proto = {
        .arg3_type      = ARG_ANYTHING,
 };
 
+BPF_CALL_4(bpf_get_stack, struct pt_regs *, regs, void *, buf, u32, size,
+          u64, flags)
+{
+       u32 init_nr, trace_nr, copy_len, elem_size, num_elem;
+       bool user_build_id = flags & BPF_F_USER_BUILD_ID;
+       u32 skip = flags & BPF_F_SKIP_FIELD_MASK;
+       bool user = flags & BPF_F_USER_STACK;
+       struct perf_callchain_entry *trace;
+       bool kernel = !user;
+       int err = -EINVAL;
+       u64 *ips;
+
+       if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK |
+                              BPF_F_USER_BUILD_ID)))
+               goto clear;
+       if (kernel && user_build_id)
+               goto clear;
+
+       elem_size = (user && user_build_id) ? sizeof(struct bpf_stack_build_id)
+                                           : sizeof(u64);
+       if (unlikely(size % elem_size))
+               goto clear;
+
+       num_elem = size / elem_size;
+       if (sysctl_perf_event_max_stack < num_elem)
+               init_nr = 0;
+       else
+               init_nr = sysctl_perf_event_max_stack - num_elem;
+       trace = get_perf_callchain(regs, init_nr, kernel, user,
+                                  sysctl_perf_event_max_stack, false, false);
+       if (unlikely(!trace))
+               goto err_fault;
+
+       trace_nr = trace->nr - init_nr;
+       if (trace_nr < skip)
+               goto err_fault;
+
+       trace_nr -= skip;
+       trace_nr = (trace_nr <= num_elem) ? trace_nr : num_elem;
+       copy_len = trace_nr * elem_size;
+       ips = trace->ip + skip + init_nr;
+       if (user && user_build_id)
+               stack_map_get_build_id_offset(buf, ips, trace_nr, user);
+       else
+               memcpy(buf, ips, copy_len);
+
+       if (size > copy_len)
+               memset(buf + copy_len, 0, size - copy_len);
+       return copy_len;
+
+err_fault:
+       err = -EFAULT;
+clear:
+       memset(buf, 0, size);
+       return err;
+}
+
+const struct bpf_func_proto bpf_get_stack_proto = {
+       .func           = bpf_get_stack,
+       .gpl_only       = true,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_CTX,
+       .arg2_type      = ARG_PTR_TO_UNINIT_MEM,
+       .arg3_type      = ARG_CONST_SIZE_OR_ZERO,
+       .arg4_type      = ARG_ANYTHING,
+};
+
 /* Called from eBPF program */
 static void *stack_map_lookup_elem(struct bpf_map *map, void *key)
 {
@@ -511,3 +609,16 @@ const struct bpf_map_ops stack_map_ops = {
        .map_update_elem = stack_map_update_elem,
        .map_delete_elem = stack_map_delete_elem,
 };
+
+static int __init stack_map_init(void)
+{
+       int cpu;
+       struct stack_map_irq_work *work;
+
+       for_each_possible_cpu(cpu) {
+               work = per_cpu_ptr(&up_read_work, cpu);
+               init_irq_work(&work->irq_work, do_up_read);
+       }
+       return 0;
+}
+subsys_initcall(stack_map_init);
index fe23dc5a3ec48be4215c39ef742c50a2bf3203bb..bfcde949c7f8f9a54753f0132bfe12582ffb6fba 100644 (file)
@@ -28,6 +28,7 @@
 #include <linux/timekeeping.h>
 #include <linux/ctype.h>
 #include <linux/btf.h>
+#include <linux/nospec.h>
 
 #define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY || \
                           (map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \
@@ -104,12 +105,14 @@ const struct bpf_map_ops bpf_map_offload_ops = {
 static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
 {
        const struct bpf_map_ops *ops;
+       u32 type = attr->map_type;
        struct bpf_map *map;
        int err;
 
-       if (attr->map_type >= ARRAY_SIZE(bpf_map_types))
+       if (type >= ARRAY_SIZE(bpf_map_types))
                return ERR_PTR(-EINVAL);
-       ops = bpf_map_types[attr->map_type];
+       type = array_index_nospec(type, ARRAY_SIZE(bpf_map_types));
+       ops = bpf_map_types[type];
        if (!ops)
                return ERR_PTR(-EINVAL);
 
@@ -124,7 +127,7 @@ static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
        if (IS_ERR(map))
                return map;
        map->ops = ops;
-       map->map_type = attr->map_type;
+       map->map_type = type;
        return map;
 }
 
@@ -252,7 +255,6 @@ static void bpf_map_free_deferred(struct work_struct *work)
 
        bpf_map_uncharge_memlock(map);
        security_bpf_map_free(map);
-       btf_put(map->btf);
        /* implementation dependent freeing */
        map->ops->map_free(map);
 }
@@ -260,8 +262,8 @@ static void bpf_map_free_deferred(struct work_struct *work)
 static void bpf_map_put_uref(struct bpf_map *map)
 {
        if (atomic_dec_and_test(&map->usercnt)) {
-               if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY)
-                       bpf_fd_array_map_clear(map);
+               if (map->ops->map_release_uref)
+                       map->ops->map_release_uref(map);
        }
 }
 
@@ -273,6 +275,7 @@ static void __bpf_map_put(struct bpf_map *map, bool do_idr_lock)
        if (atomic_dec_and_test(&map->refcnt)) {
                /* bpf_map_free_id() must be called first */
                bpf_map_free_id(map, do_idr_lock);
+               btf_put(map->btf);
                INIT_WORK(&map->work, bpf_map_free_deferred);
                schedule_work(&map->work);
        }
@@ -282,6 +285,7 @@ void bpf_map_put(struct bpf_map *map)
 {
        __bpf_map_put(map, true);
 }
+EXPORT_SYMBOL_GPL(bpf_map_put);
 
 void bpf_map_put_with_uref(struct bpf_map *map)
 {
@@ -503,7 +507,6 @@ static int map_create(union bpf_attr *attr)
                return err;
        }
 
-       trace_bpf_map_create(map, err);
        return err;
 
 free_map:
@@ -544,6 +547,7 @@ struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref)
                atomic_inc(&map->usercnt);
        return map;
 }
+EXPORT_SYMBOL_GPL(bpf_map_inc);
 
 struct bpf_map *bpf_map_get_with_uref(u32 ufd)
 {
@@ -663,7 +667,6 @@ static int map_lookup_elem(union bpf_attr *attr)
        if (copy_to_user(uvalue, value, value_size) != 0)
                goto free_value;
 
-       trace_bpf_map_lookup_elem(map, ufd, key, value);
        err = 0;
 
 free_value:
@@ -760,8 +763,6 @@ static int map_update_elem(union bpf_attr *attr)
        __this_cpu_dec(bpf_prog_active);
        preempt_enable();
 out:
-       if (!err)
-               trace_bpf_map_update_elem(map, ufd, key, value);
 free_value:
        kfree(value);
 free_key:
@@ -814,8 +815,6 @@ static int map_delete_elem(union bpf_attr *attr)
        __this_cpu_dec(bpf_prog_active);
        preempt_enable();
 out:
-       if (!err)
-               trace_bpf_map_delete_elem(map, ufd, key);
        kfree(key);
 err_put:
        fdput(f);
@@ -879,7 +878,6 @@ static int map_get_next_key(union bpf_attr *attr)
        if (copy_to_user(unext_key, next_key, map->key_size) != 0)
                goto free_next_key;
 
-       trace_bpf_map_next_key(map, ufd, key, next_key);
        err = 0;
 
 free_next_key:
@@ -902,11 +900,17 @@ static const struct bpf_prog_ops * const bpf_prog_types[] = {
 
 static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog)
 {
-       if (type >= ARRAY_SIZE(bpf_prog_types) || !bpf_prog_types[type])
+       const struct bpf_prog_ops *ops;
+
+       if (type >= ARRAY_SIZE(bpf_prog_types))
+               return -EINVAL;
+       type = array_index_nospec(type, ARRAY_SIZE(bpf_prog_types));
+       ops = bpf_prog_types[type];
+       if (!ops)
                return -EINVAL;
 
        if (!bpf_prog_is_dev_bound(prog->aux))
-               prog->aux->ops = bpf_prog_types[type];
+               prog->aux->ops = ops;
        else
                prog->aux->ops = &bpf_offload_prog_ops;
        prog->type = type;
@@ -1027,7 +1031,6 @@ static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock)
        if (atomic_dec_and_test(&prog->aux->refcnt)) {
                int i;
 
-               trace_bpf_prog_put_rcu(prog);
                /* bpf_prog_free_id() must be called first */
                bpf_prog_free_id(prog, do_idr_lock);
 
@@ -1194,11 +1197,7 @@ struct bpf_prog *bpf_prog_get(u32 ufd)
 struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type,
                                       bool attach_drv)
 {
-       struct bpf_prog *prog = __bpf_prog_get(ufd, &type, attach_drv);
-
-       if (!IS_ERR(prog))
-               trace_bpf_prog_get_type(prog);
-       return prog;
+       return __bpf_prog_get(ufd, &type, attach_drv);
 }
 EXPORT_SYMBOL_GPL(bpf_prog_get_type_dev);
 
@@ -1373,7 +1372,6 @@ static int bpf_prog_load(union bpf_attr *attr)
        }
 
        bpf_prog_kallsyms_add(prog);
-       trace_bpf_prog_load(prog, err);
        return err;
 
 free_used_maps:
@@ -1914,6 +1912,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
        info.load_time = prog->aux->load_time;
        info.created_by_uid = from_kuid_munged(current_user_ns(),
                                               prog->aux->user->uid);
+       info.gpl_compatible = prog->gpl_compatible;
 
        memcpy(info.tag, prog->tag, sizeof(prog->tag));
        memcpy(info.name, prog->aux->name, sizeof(prog->aux->name));
@@ -2012,6 +2011,12 @@ static int bpf_map_get_info_by_fd(struct bpf_map *map,
        info.map_flags = map->map_flags;
        memcpy(info.name, map->name, sizeof(map->name));
 
+       if (map->btf) {
+               info.btf_id = btf_id(map->btf);
+               info.btf_key_id = map->btf_key_id;
+               info.btf_value_id = map->btf_value_id;
+       }
+
        if (bpf_map_is_dev_bound(map)) {
                err = bpf_map_offload_info_fill(&info, map);
                if (err)
@@ -2025,6 +2030,21 @@ static int bpf_map_get_info_by_fd(struct bpf_map *map,
        return 0;
 }
 
+static int bpf_btf_get_info_by_fd(struct btf *btf,
+                                 const union bpf_attr *attr,
+                                 union bpf_attr __user *uattr)
+{
+       struct bpf_btf_info __user *uinfo = u64_to_user_ptr(attr->info.info);
+       u32 info_len = attr->info.info_len;
+       int err;
+
+       err = check_uarg_tail_zero(uinfo, sizeof(*uinfo), info_len);
+       if (err)
+               return err;
+
+       return btf_get_info_by_fd(btf, attr, uattr);
+}
+
 #define BPF_OBJ_GET_INFO_BY_FD_LAST_FIELD info.info
 
 static int bpf_obj_get_info_by_fd(const union bpf_attr *attr,
@@ -2048,7 +2068,7 @@ static int bpf_obj_get_info_by_fd(const union bpf_attr *attr,
                err = bpf_map_get_info_by_fd(f.file->private_data, attr,
                                             uattr);
        else if (f.file->f_op == &btf_fops)
-               err = btf_get_info_by_fd(f.file->private_data, attr, uattr);
+               err = bpf_btf_get_info_by_fd(f.file->private_data, attr, uattr);
        else
                err = -EINVAL;
 
@@ -2069,6 +2089,19 @@ static int bpf_btf_load(const union bpf_attr *attr)
        return btf_new_fd(attr);
 }
 
+#define BPF_BTF_GET_FD_BY_ID_LAST_FIELD btf_id
+
+static int bpf_btf_get_fd_by_id(const union bpf_attr *attr)
+{
+       if (CHECK_ATTR(BPF_BTF_GET_FD_BY_ID))
+               return -EINVAL;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+
+       return btf_get_fd_by_id(attr->btf_id);
+}
+
 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
 {
        union bpf_attr attr = {};
@@ -2152,6 +2185,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
        case BPF_BTF_LOAD:
                err = bpf_btf_load(&attr);
                break;
+       case BPF_BTF_GET_FD_BY_ID:
+               err = bpf_btf_get_fd_by_id(&attr);
+               break;
        default:
                err = -EINVAL;
                break;
index 1f4bf68c12dbbb88118d32c939616b43cebcf070..938d41211be70cde6e18446ec956907f07309d39 100644 (file)
@@ -43,6 +43,16 @@ struct tnum tnum_rshift(struct tnum a, u8 shift)
        return TNUM(a.value >> shift, a.mask >> shift);
 }
 
+struct tnum tnum_arshift(struct tnum a, u8 min_shift)
+{
+       /* if a.value is negative, arithmetic shifting by minimum shift
+        * will have larger negative offset compared to more shifting.
+        * If a.value is nonnegative, arithmetic shifting by minimum shift
+        * will have larger positive offset compare to more shifting.
+        */
+       return TNUM((s64)a.value >> min_shift, (s64)a.mask >> min_shift);
+}
+
 struct tnum tnum_add(struct tnum a, struct tnum b)
 {
        u64 sm, sv, sigma, chi, mu;
index 5dd1dcb902bf445ba50df106ee4aeb129164a4ac..a9e4b1372da6c1635e708e1ace953a19e4030030 100644 (file)
@@ -22,6 +22,7 @@
 #include <linux/stringify.h>
 #include <linux/bsearch.h>
 #include <linux/sort.h>
+#include <linux/perf_event.h>
 
 #include "disasm.h"
 
@@ -164,6 +165,8 @@ struct bpf_call_arg_meta {
        bool pkt_access;
        int regno;
        int access_size;
+       s64 msize_smax_value;
+       u64 msize_umax_value;
 };
 
 static DEFINE_MUTEX(bpf_verifier_lock);
@@ -738,18 +741,19 @@ enum reg_arg_type {
 
 static int cmp_subprogs(const void *a, const void *b)
 {
-       return *(int *)a - *(int *)b;
+       return ((struct bpf_subprog_info *)a)->start -
+              ((struct bpf_subprog_info *)b)->start;
 }
 
 static int find_subprog(struct bpf_verifier_env *env, int off)
 {
-       u32 *p;
+       struct bpf_subprog_info *p;
 
-       p = bsearch(&off, env->subprog_starts, env->subprog_cnt,
-                   sizeof(env->subprog_starts[0]), cmp_subprogs);
+       p = bsearch(&off, env->subprog_info, env->subprog_cnt,
+                   sizeof(env->subprog_info[0]), cmp_subprogs);
        if (!p)
                return -ENOENT;
-       return p - env->subprog_starts;
+       return p - env->subprog_info;
 
 }
 
@@ -769,18 +773,24 @@ static int add_subprog(struct bpf_verifier_env *env, int off)
                verbose(env, "too many subprograms\n");
                return -E2BIG;
        }
-       env->subprog_starts[env->subprog_cnt++] = off;
-       sort(env->subprog_starts, env->subprog_cnt,
-            sizeof(env->subprog_starts[0]), cmp_subprogs, NULL);
+       env->subprog_info[env->subprog_cnt++].start = off;
+       sort(env->subprog_info, env->subprog_cnt,
+            sizeof(env->subprog_info[0]), cmp_subprogs, NULL);
        return 0;
 }
 
 static int check_subprogs(struct bpf_verifier_env *env)
 {
        int i, ret, subprog_start, subprog_end, off, cur_subprog = 0;
+       struct bpf_subprog_info *subprog = env->subprog_info;
        struct bpf_insn *insn = env->prog->insnsi;
        int insn_cnt = env->prog->len;
 
+       /* Add entry function. */
+       ret = add_subprog(env, 0);
+       if (ret < 0)
+               return ret;
+
        /* determine subprog starts. The end is one before the next starts */
        for (i = 0; i < insn_cnt; i++) {
                if (insn[i].code != (BPF_JMP | BPF_CALL))
@@ -800,16 +810,18 @@ static int check_subprogs(struct bpf_verifier_env *env)
                        return ret;
        }
 
+       /* Add a fake 'exit' subprog which could simplify subprog iteration
+        * logic. 'subprog_cnt' should not be increased.
+        */
+       subprog[env->subprog_cnt].start = insn_cnt;
+
        if (env->log.level > 1)
                for (i = 0; i < env->subprog_cnt; i++)
-                       verbose(env, "func#%d @%d\n", i, env->subprog_starts[i]);
+                       verbose(env, "func#%d @%d\n", i, subprog[i].start);
 
        /* now check that all jumps are within the same subprog */
-       subprog_start = 0;
-       if (env->subprog_cnt == cur_subprog)
-               subprog_end = insn_cnt;
-       else
-               subprog_end = env->subprog_starts[cur_subprog++];
+       subprog_start = subprog[cur_subprog].start;
+       subprog_end = subprog[cur_subprog + 1].start;
        for (i = 0; i < insn_cnt; i++) {
                u8 code = insn[i].code;
 
@@ -834,10 +846,9 @@ static int check_subprogs(struct bpf_verifier_env *env)
                                return -EINVAL;
                        }
                        subprog_start = subprog_end;
-                       if (env->subprog_cnt == cur_subprog)
-                               subprog_end = insn_cnt;
-                       else
-                               subprog_end = env->subprog_starts[cur_subprog++];
+                       cur_subprog++;
+                       if (cur_subprog < env->subprog_cnt)
+                               subprog_end = subprog[cur_subprog + 1].start;
                }
        }
        return 0;
@@ -1470,13 +1481,13 @@ static int update_stack_depth(struct bpf_verifier_env *env,
                              const struct bpf_func_state *func,
                              int off)
 {
-       u16 stack = env->subprog_stack_depth[func->subprogno];
+       u16 stack = env->subprog_info[func->subprogno].stack_depth;
 
        if (stack >= -off)
                return 0;
 
        /* update known max for given subprogram */
-       env->subprog_stack_depth[func->subprogno] = -off;
+       env->subprog_info[func->subprogno].stack_depth = -off;
        return 0;
 }
 
@@ -1488,9 +1499,9 @@ static int update_stack_depth(struct bpf_verifier_env *env,
  */
 static int check_max_stack_depth(struct bpf_verifier_env *env)
 {
-       int depth = 0, frame = 0, subprog = 0, i = 0, subprog_end;
+       int depth = 0, frame = 0, idx = 0, i = 0, subprog_end;
+       struct bpf_subprog_info *subprog = env->subprog_info;
        struct bpf_insn *insn = env->prog->insnsi;
-       int insn_cnt = env->prog->len;
        int ret_insn[MAX_CALL_FRAMES];
        int ret_prog[MAX_CALL_FRAMES];
 
@@ -1498,17 +1509,14 @@ static int check_max_stack_depth(struct bpf_verifier_env *env)
        /* round up to 32-bytes, since this is granularity
         * of interpreter stack size
         */
-       depth += round_up(max_t(u32, env->subprog_stack_depth[subprog], 1), 32);
+       depth += round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
        if (depth > MAX_BPF_STACK) {
                verbose(env, "combined stack size of %d calls is %d. Too large\n",
                        frame + 1, depth);
                return -EACCES;
        }
 continue_func:
-       if (env->subprog_cnt == subprog)
-               subprog_end = insn_cnt;
-       else
-               subprog_end = env->subprog_starts[subprog];
+       subprog_end = subprog[idx + 1].start;
        for (; i < subprog_end; i++) {
                if (insn[i].code != (BPF_JMP | BPF_CALL))
                        continue;
@@ -1516,17 +1524,16 @@ static int check_max_stack_depth(struct bpf_verifier_env *env)
                        continue;
                /* remember insn and function to return to */
                ret_insn[frame] = i + 1;
-               ret_prog[frame] = subprog;
+               ret_prog[frame] = idx;
 
                /* find the callee */
                i = i + insn[i].imm + 1;
-               subprog = find_subprog(env, i);
-               if (subprog < 0) {
+               idx = find_subprog(env, i);
+               if (idx < 0) {
                        WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
                                  i);
                        return -EFAULT;
                }
-               subprog++;
                frame++;
                if (frame >= MAX_CALL_FRAMES) {
                        WARN_ONCE(1, "verifier bug. Call stack is too deep\n");
@@ -1539,10 +1546,10 @@ static int check_max_stack_depth(struct bpf_verifier_env *env)
         */
        if (frame == 0)
                return 0;
-       depth -= round_up(max_t(u32, env->subprog_stack_depth[subprog], 1), 32);
+       depth -= round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
        frame--;
        i = ret_insn[frame];
-       subprog = ret_prog[frame];
+       idx = ret_prog[frame];
        goto continue_func;
 }
 
@@ -1558,8 +1565,7 @@ static int get_callee_stack_depth(struct bpf_verifier_env *env,
                          start);
                return -EFAULT;
        }
-       subprog++;
-       return env->subprog_stack_depth[subprog];
+       return env->subprog_info[subprog].stack_depth;
 }
 #endif
 
@@ -1914,7 +1920,7 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
        if (arg_type == ARG_PTR_TO_MAP_KEY ||
            arg_type == ARG_PTR_TO_MAP_VALUE) {
                expected_type = PTR_TO_STACK;
-               if (!type_is_pkt_pointer(type) &&
+               if (!type_is_pkt_pointer(type) && type != PTR_TO_MAP_VALUE &&
                    type != expected_type)
                        goto err_type;
        } else if (arg_type == ARG_CONST_SIZE ||
@@ -1966,14 +1972,9 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
                        verbose(env, "invalid map_ptr to access map->key\n");
                        return -EACCES;
                }
-               if (type_is_pkt_pointer(type))
-                       err = check_packet_access(env, regno, reg->off,
-                                                 meta->map_ptr->key_size,
-                                                 false);
-               else
-                       err = check_stack_boundary(env, regno,
-                                                  meta->map_ptr->key_size,
-                                                  false, NULL);
+               err = check_helper_mem_access(env, regno,
+                                             meta->map_ptr->key_size, false,
+                                             NULL);
        } else if (arg_type == ARG_PTR_TO_MAP_VALUE) {
                /* bpf_map_xxx(..., map_ptr, ..., value) call:
                 * check [value, value + map->value_size) validity
@@ -1983,17 +1984,18 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
                        verbose(env, "invalid map_ptr to access map->value\n");
                        return -EACCES;
                }
-               if (type_is_pkt_pointer(type))
-                       err = check_packet_access(env, regno, reg->off,
-                                                 meta->map_ptr->value_size,
-                                                 false);
-               else
-                       err = check_stack_boundary(env, regno,
-                                                  meta->map_ptr->value_size,
-                                                  false, NULL);
+               err = check_helper_mem_access(env, regno,
+                                             meta->map_ptr->value_size, false,
+                                             NULL);
        } else if (arg_type_is_mem_size(arg_type)) {
                bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO);
 
+               /* remember the mem_size which may be used later
+                * to refine return values.
+                */
+               meta->msize_smax_value = reg->smax_value;
+               meta->msize_umax_value = reg->umax_value;
+
                /* The register is SCALAR_VALUE; the access check
                 * happens using its boundaries.
                 */
@@ -2071,8 +2073,11 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
                if (func_id != BPF_FUNC_redirect_map)
                        goto error;
                break;
-       /* Restrict bpf side of cpumap, open when use-cases appear */
+       /* Restrict bpf side of cpumap and xskmap, open when use-cases
+        * appear.
+        */
        case BPF_MAP_TYPE_CPUMAP:
+       case BPF_MAP_TYPE_XSKMAP:
                if (func_id != BPF_FUNC_redirect_map)
                        goto error;
                break;
@@ -2088,6 +2093,13 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
                    func_id != BPF_FUNC_msg_redirect_map)
                        goto error;
                break;
+       case BPF_MAP_TYPE_SOCKHASH:
+               if (func_id != BPF_FUNC_sk_redirect_hash &&
+                   func_id != BPF_FUNC_sock_hash_update &&
+                   func_id != BPF_FUNC_map_delete_elem &&
+                   func_id != BPF_FUNC_msg_redirect_hash)
+                       goto error;
+               break;
        default:
                break;
        }
@@ -2097,7 +2109,7 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
        case BPF_FUNC_tail_call:
                if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
                        goto error;
-               if (env->subprog_cnt) {
+               if (env->subprog_cnt > 1) {
                        verbose(env, "tail_calls are not allowed in programs with bpf-to-bpf calls\n");
                        return -EINVAL;
                }
@@ -2119,16 +2131,20 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
                break;
        case BPF_FUNC_redirect_map:
                if (map->map_type != BPF_MAP_TYPE_DEVMAP &&
-                   map->map_type != BPF_MAP_TYPE_CPUMAP)
+                   map->map_type != BPF_MAP_TYPE_CPUMAP &&
+                   map->map_type != BPF_MAP_TYPE_XSKMAP)
                        goto error;
                break;
        case BPF_FUNC_sk_redirect_map:
        case BPF_FUNC_msg_redirect_map:
+       case BPF_FUNC_sock_map_update:
                if (map->map_type != BPF_MAP_TYPE_SOCKMAP)
                        goto error;
                break;
-       case BPF_FUNC_sock_map_update:
-               if (map->map_type != BPF_MAP_TYPE_SOCKMAP)
+       case BPF_FUNC_sk_redirect_hash:
+       case BPF_FUNC_msg_redirect_hash:
+       case BPF_FUNC_sock_hash_update:
+               if (map->map_type != BPF_MAP_TYPE_SOCKHASH)
                        goto error;
                break;
        default:
@@ -2269,7 +2285,7 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
                        /* remember the callsite, it will be used by bpf_exit */
                        *insn_idx /* callsite */,
                        state->curframe + 1 /* frameno within this callchain */,
-                       subprog + 1 /* subprog number within this prog */);
+                       subprog /* subprog number within this prog */);
 
        /* copy r1 - r5 args that callee can access */
        for (i = BPF_REG_1; i <= BPF_REG_5; i++)
@@ -2333,6 +2349,23 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
        return 0;
 }
 
+static void do_refine_retval_range(struct bpf_reg_state *regs, int ret_type,
+                                  int func_id,
+                                  struct bpf_call_arg_meta *meta)
+{
+       struct bpf_reg_state *ret_reg = &regs[BPF_REG_0];
+
+       if (ret_type != RET_INTEGER ||
+           (func_id != BPF_FUNC_get_stack &&
+            func_id != BPF_FUNC_probe_read_str))
+               return;
+
+       ret_reg->smax_value = meta->msize_smax_value;
+       ret_reg->umax_value = meta->msize_umax_value;
+       __reg_deduce_bounds(ret_reg);
+       __reg_bound_offset(ret_reg);
+}
+
 static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
 {
        const struct bpf_func_proto *fn = NULL;
@@ -2456,10 +2489,30 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
                return -EINVAL;
        }
 
+       do_refine_retval_range(regs, fn->ret_type, func_id, &meta);
+
        err = check_map_func_compatibility(env, meta.map_ptr, func_id);
        if (err)
                return err;
 
+       if (func_id == BPF_FUNC_get_stack && !env->prog->has_callchain_buf) {
+               const char *err_str;
+
+#ifdef CONFIG_PERF_EVENTS
+               err = get_callchain_buffers(sysctl_perf_event_max_stack);
+               err_str = "cannot get callchain buffer for func %s#%d\n";
+#else
+               err = -ENOTSUPP;
+               err_str = "func %s#%d not supported without CONFIG_PERF_EVENTS\n";
+#endif
+               if (err) {
+                       verbose(env, err_str, func_id_name(func_id), func_id);
+                       return err;
+               }
+
+               env->prog->has_callchain_buf = true;
+       }
+
        if (changes_data)
                clear_all_pkt_pointers(env);
        return 0;
@@ -2904,10 +2957,7 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
                        dst_reg->umin_value <<= umin_val;
                        dst_reg->umax_value <<= umax_val;
                }
-               if (src_known)
-                       dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val);
-               else
-                       dst_reg->var_off = tnum_lshift(tnum_unknown, umin_val);
+               dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val);
                /* We may learn something more from the var_off */
                __update_reg_bounds(dst_reg);
                break;
@@ -2935,16 +2985,35 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
                 */
                dst_reg->smin_value = S64_MIN;
                dst_reg->smax_value = S64_MAX;
-               if (src_known)
-                       dst_reg->var_off = tnum_rshift(dst_reg->var_off,
-                                                      umin_val);
-               else
-                       dst_reg->var_off = tnum_rshift(tnum_unknown, umin_val);
+               dst_reg->var_off = tnum_rshift(dst_reg->var_off, umin_val);
                dst_reg->umin_value >>= umax_val;
                dst_reg->umax_value >>= umin_val;
                /* We may learn something more from the var_off */
                __update_reg_bounds(dst_reg);
                break;
+       case BPF_ARSH:
+               if (umax_val >= insn_bitness) {
+                       /* Shifts greater than 31 or 63 are undefined.
+                        * This includes shifts by a negative number.
+                        */
+                       mark_reg_unknown(env, regs, insn->dst_reg);
+                       break;
+               }
+
+               /* Upon reaching here, src_known is true and
+                * umax_val is equal to umin_val.
+                */
+               dst_reg->smin_value >>= umin_val;
+               dst_reg->smax_value >>= umin_val;
+               dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_val);
+
+               /* blow away the dst_reg umin_value/umax_value and rely on
+                * dst_reg var_off to refine the result.
+                */
+               dst_reg->umin_value = 0;
+               dst_reg->umax_value = U64_MAX;
+               __update_reg_bounds(dst_reg);
+               break;
        default:
                mark_reg_unknown(env, regs, insn->dst_reg);
                break;
@@ -3828,7 +3897,12 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
                return -EINVAL;
        }
 
-       if (env->subprog_cnt) {
+       if (!env->ops->gen_ld_abs) {
+               verbose(env, "bpf verifier is misconfigured\n");
+               return -EINVAL;
+       }
+
+       if (env->subprog_cnt > 1) {
                /* when program has LD_ABS insn JITs and interpreter assume
                 * that r1 == ctx == skb which is not the case for callees
                 * that can have arbitrary arguments. It's problematic
@@ -4859,15 +4933,15 @@ static int do_check(struct bpf_verifier_env *env)
 
        verbose(env, "processed %d insns (limit %d), stack depth ",
                insn_processed, BPF_COMPLEXITY_LIMIT_INSNS);
-       for (i = 0; i < env->subprog_cnt + 1; i++) {
-               u32 depth = env->subprog_stack_depth[i];
+       for (i = 0; i < env->subprog_cnt; i++) {
+               u32 depth = env->subprog_info[i].stack_depth;
 
                verbose(env, "%d", depth);
-               if (i + 1 < env->subprog_cnt + 1)
+               if (i + 1 < env->subprog_cnt)
                        verbose(env, "+");
        }
        verbose(env, "\n");
-       env->prog->aux->stack_depth = env->subprog_stack_depth[0];
+       env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
        return 0;
 }
 
@@ -4991,7 +5065,7 @@ static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
                        /* hold the map. If the program is rejected by verifier,
                         * the map will be released by release_maps() or it
                         * will be used by the valid program until it's unloaded
-                        * and all maps are released in free_bpf_prog_info()
+                        * and all maps are released in free_used_maps()
                         */
                        map = bpf_map_inc(map, false);
                        if (IS_ERR(map)) {
@@ -5073,10 +5147,11 @@ static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len
 
        if (len == 1)
                return;
-       for (i = 0; i < env->subprog_cnt; i++) {
-               if (env->subprog_starts[i] < off)
+       /* NOTE: fake 'exit' subprog should be updated as well. */
+       for (i = 0; i <= env->subprog_cnt; i++) {
+               if (env->subprog_info[i].start < off)
                        continue;
-               env->subprog_starts[i] += len - 1;
+               env->subprog_info[i].start += len - 1;
        }
 }
 
@@ -5150,7 +5225,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
                }
        }
 
-       if (!ops->convert_ctx_access)
+       if (!ops->convert_ctx_access || bpf_prog_is_dev_bound(env->prog->aux))
                return 0;
 
        insn = env->prog->insnsi + delta;
@@ -5240,7 +5315,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
        void *old_bpf_func;
        int err = -ENOMEM;
 
-       if (env->subprog_cnt == 0)
+       if (env->subprog_cnt <= 1)
                return 0;
 
        for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
@@ -5256,7 +5331,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
                /* temporarily remember subprog id inside insn instead of
                 * aux_data, since next loop will split up all insns into funcs
                 */
-               insn->off = subprog + 1;
+               insn->off = subprog;
                /* remember original imm in case JIT fails and fallback
                 * to interpreter will be needed
                 */
@@ -5265,16 +5340,13 @@ static int jit_subprogs(struct bpf_verifier_env *env)
                insn->imm = 1;
        }
 
-       func = kzalloc(sizeof(prog) * (env->subprog_cnt + 1), GFP_KERNEL);
+       func = kzalloc(sizeof(prog) * env->subprog_cnt, GFP_KERNEL);
        if (!func)
                return -ENOMEM;
 
-       for (i = 0; i <= env->subprog_cnt; i++) {
+       for (i = 0; i < env->subprog_cnt; i++) {
                subprog_start = subprog_end;
-               if (env->subprog_cnt == i)
-                       subprog_end = prog->len;
-               else
-                       subprog_end = env->subprog_starts[i];
+               subprog_end = env->subprog_info[i + 1].start;
 
                len = subprog_end - subprog_start;
                func[i] = bpf_prog_alloc(bpf_prog_size(len), GFP_USER);
@@ -5291,7 +5363,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
                 * Long term would need debug info to populate names
                 */
                func[i]->aux->name[0] = 'F';
-               func[i]->aux->stack_depth = env->subprog_stack_depth[i];
+               func[i]->aux->stack_depth = env->subprog_info[i].stack_depth;
                func[i]->jit_requested = 1;
                func[i] = bpf_int_jit_compile(func[i]);
                if (!func[i]->jited) {
@@ -5304,7 +5376,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
         * now populate all bpf_calls with correct addresses and
         * run last pass of JIT
         */
-       for (i = 0; i <= env->subprog_cnt; i++) {
+       for (i = 0; i < env->subprog_cnt; i++) {
                insn = func[i]->insnsi;
                for (j = 0; j < func[i]->len; j++, insn++) {
                        if (insn->code != (BPF_JMP | BPF_CALL) ||
@@ -5317,7 +5389,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
                                __bpf_call_base;
                }
        }
-       for (i = 0; i <= env->subprog_cnt; i++) {
+       for (i = 0; i < env->subprog_cnt; i++) {
                old_bpf_func = func[i]->bpf_func;
                tmp = bpf_int_jit_compile(func[i]);
                if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) {
@@ -5331,7 +5403,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
        /* finally lock prog and jit images for all functions and
         * populate kallsysm
         */
-       for (i = 0; i <= env->subprog_cnt; i++) {
+       for (i = 0; i < env->subprog_cnt; i++) {
                bpf_prog_lock_ro(func[i]);
                bpf_prog_kallsyms_add(func[i]);
        }
@@ -5348,7 +5420,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
                        continue;
                insn->off = env->insn_aux_data[i].call_imm;
                subprog = find_subprog(env, i + insn->off + 1);
-               addr  = (unsigned long)func[subprog + 1]->bpf_func;
+               addr  = (unsigned long)func[subprog]->bpf_func;
                addr &= PAGE_MASK;
                insn->imm = (u64 (*)(u64, u64, u64, u64, u64))
                            addr - __bpf_call_base;
@@ -5357,10 +5429,10 @@ static int jit_subprogs(struct bpf_verifier_env *env)
        prog->jited = 1;
        prog->bpf_func = func[0]->bpf_func;
        prog->aux->func = func;
-       prog->aux->func_cnt = env->subprog_cnt + 1;
+       prog->aux->func_cnt = env->subprog_cnt;
        return 0;
 out_free:
-       for (i = 0; i <= env->subprog_cnt; i++)
+       for (i = 0; i < env->subprog_cnt; i++)
                if (func[i])
                        bpf_jit_free(func[i]);
        kfree(func);
@@ -5463,6 +5535,25 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
                        continue;
                }
 
+               if (BPF_CLASS(insn->code) == BPF_LD &&
+                   (BPF_MODE(insn->code) == BPF_ABS ||
+                    BPF_MODE(insn->code) == BPF_IND)) {
+                       cnt = env->ops->gen_ld_abs(insn, insn_buf);
+                       if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
+                               verbose(env, "bpf verifier is misconfigured\n");
+                               return -EINVAL;
+                       }
+
+                       new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+                       if (!new_prog)
+                               return -ENOMEM;
+
+                       delta    += cnt - 1;
+                       env->prog = prog = new_prog;
+                       insn      = new_prog->insnsi + i + delta;
+                       continue;
+               }
+
                if (insn->code != (BPF_JMP | BPF_CALL))
                        continue;
                if (insn->src_reg == BPF_PSEUDO_CALL)
@@ -5660,16 +5751,16 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
        if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
                env->strict_alignment = true;
 
+       ret = replace_map_fd_with_map_ptr(env);
+       if (ret < 0)
+               goto skip_full_check;
+
        if (bpf_prog_is_dev_bound(env->prog->aux)) {
                ret = bpf_prog_offload_verifier_prep(env);
                if (ret)
-                       goto err_unlock;
+                       goto skip_full_check;
        }
 
-       ret = replace_map_fd_with_map_ptr(env);
-       if (ret < 0)
-               goto skip_full_check;
-
        env->explored_states = kcalloc(env->prog->len,
                                       sizeof(struct bpf_verifier_state_list *),
                                       GFP_USER);
@@ -5740,7 +5831,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
 err_release_maps:
        if (!env->prog->aux->used_maps)
                /* if we didn't copy map pointers into bpf_prog_info, release
-                * them now. Otherwise free_bpf_prog_info() will release them.
+                * them now. Otherwise free_used_maps() will release them.
                 */
                release_maps(env);
        *prog = env->prog;
diff --git a/kernel/bpf/xskmap.c b/kernel/bpf/xskmap.c
new file mode 100644 (file)
index 0000000..cb3a121
--- /dev/null
@@ -0,0 +1,241 @@
+// SPDX-License-Identifier: GPL-2.0
+/* XSKMAP used for AF_XDP sockets
+ * Copyright(c) 2018 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/bpf.h>
+#include <linux/capability.h>
+#include <net/xdp_sock.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+
+struct xsk_map {
+       struct bpf_map map;
+       struct xdp_sock **xsk_map;
+       struct list_head __percpu *flush_list;
+};
+
+static struct bpf_map *xsk_map_alloc(union bpf_attr *attr)
+{
+       int cpu, err = -EINVAL;
+       struct xsk_map *m;
+       u64 cost;
+
+       if (!capable(CAP_NET_ADMIN))
+               return ERR_PTR(-EPERM);
+
+       if (attr->max_entries == 0 || attr->key_size != 4 ||
+           attr->value_size != 4 ||
+           attr->map_flags & ~(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY))
+               return ERR_PTR(-EINVAL);
+
+       m = kzalloc(sizeof(*m), GFP_USER);
+       if (!m)
+               return ERR_PTR(-ENOMEM);
+
+       bpf_map_init_from_attr(&m->map, attr);
+
+       cost = (u64)m->map.max_entries * sizeof(struct xdp_sock *);
+       cost += sizeof(struct list_head) * num_possible_cpus();
+       if (cost >= U32_MAX - PAGE_SIZE)
+               goto free_m;
+
+       m->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
+
+       /* Notice returns -EPERM on if map size is larger than memlock limit */
+       err = bpf_map_precharge_memlock(m->map.pages);
+       if (err)
+               goto free_m;
+
+       err = -ENOMEM;
+
+       m->flush_list = alloc_percpu(struct list_head);
+       if (!m->flush_list)
+               goto free_m;
+
+       for_each_possible_cpu(cpu)
+               INIT_LIST_HEAD(per_cpu_ptr(m->flush_list, cpu));
+
+       m->xsk_map = bpf_map_area_alloc(m->map.max_entries *
+                                       sizeof(struct xdp_sock *),
+                                       m->map.numa_node);
+       if (!m->xsk_map)
+               goto free_percpu;
+       return &m->map;
+
+free_percpu:
+       free_percpu(m->flush_list);
+free_m:
+       kfree(m);
+       return ERR_PTR(err);
+}
+
+static void xsk_map_free(struct bpf_map *map)
+{
+       struct xsk_map *m = container_of(map, struct xsk_map, map);
+       int i;
+
+       synchronize_net();
+
+       for (i = 0; i < map->max_entries; i++) {
+               struct xdp_sock *xs;
+
+               xs = m->xsk_map[i];
+               if (!xs)
+                       continue;
+
+               sock_put((struct sock *)xs);
+       }
+
+       free_percpu(m->flush_list);
+       bpf_map_area_free(m->xsk_map);
+       kfree(m);
+}
+
+static int xsk_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
+{
+       struct xsk_map *m = container_of(map, struct xsk_map, map);
+       u32 index = key ? *(u32 *)key : U32_MAX;
+       u32 *next = next_key;
+
+       if (index >= m->map.max_entries) {
+               *next = 0;
+               return 0;
+       }
+
+       if (index == m->map.max_entries - 1)
+               return -ENOENT;
+       *next = index + 1;
+       return 0;
+}
+
+struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map, u32 key)
+{
+       struct xsk_map *m = container_of(map, struct xsk_map, map);
+       struct xdp_sock *xs;
+
+       if (key >= map->max_entries)
+               return NULL;
+
+       xs = READ_ONCE(m->xsk_map[key]);
+       return xs;
+}
+
+int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp,
+                      struct xdp_sock *xs)
+{
+       struct xsk_map *m = container_of(map, struct xsk_map, map);
+       struct list_head *flush_list = this_cpu_ptr(m->flush_list);
+       int err;
+
+       err = xsk_rcv(xs, xdp);
+       if (err)
+               return err;
+
+       if (!xs->flush_node.prev)
+               list_add(&xs->flush_node, flush_list);
+
+       return 0;
+}
+
+void __xsk_map_flush(struct bpf_map *map)
+{
+       struct xsk_map *m = container_of(map, struct xsk_map, map);
+       struct list_head *flush_list = this_cpu_ptr(m->flush_list);
+       struct xdp_sock *xs, *tmp;
+
+       list_for_each_entry_safe(xs, tmp, flush_list, flush_node) {
+               xsk_flush(xs);
+               __list_del(xs->flush_node.prev, xs->flush_node.next);
+               xs->flush_node.prev = NULL;
+       }
+}
+
+static void *xsk_map_lookup_elem(struct bpf_map *map, void *key)
+{
+       return NULL;
+}
+
+static int xsk_map_update_elem(struct bpf_map *map, void *key, void *value,
+                              u64 map_flags)
+{
+       struct xsk_map *m = container_of(map, struct xsk_map, map);
+       u32 i = *(u32 *)key, fd = *(u32 *)value;
+       struct xdp_sock *xs, *old_xs;
+       struct socket *sock;
+       int err;
+
+       if (unlikely(map_flags > BPF_EXIST))
+               return -EINVAL;
+       if (unlikely(i >= m->map.max_entries))
+               return -E2BIG;
+       if (unlikely(map_flags == BPF_NOEXIST))
+               return -EEXIST;
+
+       sock = sockfd_lookup(fd, &err);
+       if (!sock)
+               return err;
+
+       if (sock->sk->sk_family != PF_XDP) {
+               sockfd_put(sock);
+               return -EOPNOTSUPP;
+       }
+
+       xs = (struct xdp_sock *)sock->sk;
+
+       if (!xsk_is_setup_for_bpf_map(xs)) {
+               sockfd_put(sock);
+               return -EOPNOTSUPP;
+       }
+
+       sock_hold(sock->sk);
+
+       old_xs = xchg(&m->xsk_map[i], xs);
+       if (old_xs) {
+               /* Make sure we've flushed everything. */
+               synchronize_net();
+               sock_put((struct sock *)old_xs);
+       }
+
+       sockfd_put(sock);
+       return 0;
+}
+
+static int xsk_map_delete_elem(struct bpf_map *map, void *key)
+{
+       struct xsk_map *m = container_of(map, struct xsk_map, map);
+       struct xdp_sock *old_xs;
+       int k = *(u32 *)key;
+
+       if (k >= map->max_entries)
+               return -EINVAL;
+
+       old_xs = xchg(&m->xsk_map[k], NULL);
+       if (old_xs) {
+               /* Make sure we've flushed everything. */
+               synchronize_net();
+               sock_put((struct sock *)old_xs);
+       }
+
+       return 0;
+}
+
+const struct bpf_map_ops xsk_map_ops = {
+       .map_alloc = xsk_map_alloc,
+       .map_free = xsk_map_free,
+       .map_get_next_key = xsk_map_get_next_key,
+       .map_lookup_elem = xsk_map_lookup_elem,
+       .map_update_elem = xsk_map_update_elem,
+       .map_delete_elem = xsk_map_delete_elem,
+};
+
+
index 6d21894806b46f6a4a2aa7e2d2a9f43c324ca1d8..92d8c98c0f57ac92aacd888d9342debcdec46cdb 100644 (file)
@@ -34,6 +34,7 @@ int compat_get_timex(struct timex *txc, const struct compat_timex __user *utp)
 {
        struct compat_timex tx32;
 
+       memset(txc, 0, sizeof(struct timex));
        if (copy_from_user(&tx32, utp, sizeof(struct compat_timex)))
                return -EFAULT;
 
index 772a43fea825c498156c4972c66ee81686abecbe..c187aa3df3c8b789c23cceaf5214bc033fa3e2c9 100644 (file)
@@ -119,23 +119,20 @@ int get_callchain_buffers(int event_max_stack)
                goto exit;
        }
 
-       if (count > 1) {
-               /* If the allocation failed, give up */
-               if (!callchain_cpus_entries)
-                       err = -ENOMEM;
-               /*
-                * If requesting per event more than the global cap,
-                * return a different error to help userspace figure
-                * this out.
-                *
-                * And also do it here so that we have &callchain_mutex held.
-                */
-               if (event_max_stack > sysctl_perf_event_max_stack)
-                       err = -EOVERFLOW;
+       /*
+        * If requesting per event more than the global cap,
+        * return a different error to help userspace figure
+        * this out.
+        *
+        * And also do it here so that we have &callchain_mutex held.
+        */
+       if (event_max_stack > sysctl_perf_event_max_stack) {
+               err = -EOVERFLOW;
                goto exit;
        }
 
-       err = alloc_callchain_buffers();
+       if (count == 1)
+               err = alloc_callchain_buffers();
 exit:
        if (err)
                atomic_dec(&nr_callchain_events);
index 2d5fe26551f8775f2e537c667a12345c8c8a8e0d..67612ce359adc45efe1a721ced1d7c4a78aff75d 100644 (file)
@@ -7587,6 +7587,10 @@ static void perf_event_switch(struct task_struct *task,
                },
        };
 
+       if (!sched_in && task->state == TASK_RUNNING)
+               switch_event.event_id.header.misc |=
+                               PERF_RECORD_MISC_SWITCH_OUT_PREEMPT;
+
        perf_iterate_sb(perf_event_switch_output,
                       &switch_event,
                       NULL);
@@ -10205,9 +10209,9 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
                 * __u16 sample size limit.
                 */
                if (attr->sample_stack_user >= USHRT_MAX)
-                       ret = -EINVAL;
+                       return -EINVAL;
                else if (!IS_ALIGNED(attr->sample_stack_user, sizeof(u64)))
-                       ret = -EINVAL;
+                       return -EINVAL;
        }
 
        if (!attr->sample_max_stack)
index 6c6b3c48db7159c57035964d5a3403be475b6d83..1d8ca9ea997975e99af8aa5c73fd831dca0a22b4 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/slab.h>
 #include <linux/circ_buf.h>
 #include <linux/poll.h>
+#include <linux/nospec.h>
 
 #include "internal.h"
 
@@ -867,8 +868,10 @@ perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff)
                        return NULL;
 
                /* AUX space */
-               if (pgoff >= rb->aux_pgoff)
-                       return virt_to_page(rb->aux_pages[pgoff - rb->aux_pgoff]);
+               if (pgoff >= rb->aux_pgoff) {
+                       int aux_pgoff = array_index_nospec(pgoff - rb->aux_pgoff, rb->aux_nr_pages);
+                       return virt_to_page(rb->aux_pages[aux_pgoff]);
+               }
        }
 
        return __perf_mmap_to_page(rb, pgoff);
index ce6848e46e9451ad1a1310e45a5116b488ef07f0..1725b902983fcd5b561fdc3b30d843b931f4cffd 100644 (file)
@@ -491,7 +491,7 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset)
        if (!uprobe)
                return NULL;
 
-       uprobe->inode = igrab(inode);
+       uprobe->inode = inode;
        uprobe->offset = offset;
        init_rwsem(&uprobe->register_rwsem);
        init_rwsem(&uprobe->consumer_rwsem);
@@ -502,7 +502,6 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset)
        if (cur_uprobe) {
                kfree(uprobe);
                uprobe = cur_uprobe;
-               iput(inode);
        }
 
        return uprobe;
@@ -701,7 +700,6 @@ static void delete_uprobe(struct uprobe *uprobe)
        rb_erase(&uprobe->rb_node, &uprobes_tree);
        spin_unlock(&uprobes_treelock);
        RB_CLEAR_NODE(&uprobe->rb_node); /* for uprobe_is_active() */
-       iput(uprobe->inode);
        put_uprobe(uprobe);
 }
 
@@ -873,7 +871,8 @@ static void __uprobe_unregister(struct uprobe *uprobe, struct uprobe_consumer *u
  * tuple).  Creation refcount stops uprobe_unregister from freeing the
  * @uprobe even before the register operation is complete. Creation
  * refcount is released when the last @uc for the @uprobe
- * unregisters.
+ * unregisters. Caller of uprobe_register() is required to keep @inode
+ * (and the containing mount) referenced.
  *
  * Return errno if it cannot successully install probes
  * else return 0 (success)
index 242c8c93d285d08a50b484b8e5e78d43dce5bbd4..a5d21c42acfc8ca2d0e14a0969c9cdc9222fb42d 100644 (file)
@@ -216,10 +216,9 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
                if (!s)
                        continue;
 
-#ifdef CONFIG_DEBUG_KMEMLEAK
                /* Clear stale pointers from reused stack. */
                memset(s->addr, 0, THREAD_SIZE);
-#endif
+
                tsk->stack_vm_area = s;
                return s->addr;
        }
index 102160ff5c661e475e773888bfc3267d832bd4d6..ea619021d9011dca88f0aa10e3263f7dc2177394 100644 (file)
@@ -2428,7 +2428,7 @@ static int kprobe_blacklist_seq_show(struct seq_file *m, void *v)
        struct kprobe_blacklist_entry *ent =
                list_entry(v, struct kprobe_blacklist_entry, list);
 
-       seq_printf(m, "0x%p-0x%p\t%ps\n", (void *)ent->start_addr,
+       seq_printf(m, "0x%px-0x%px\t%ps\n", (void *)ent->start_addr,
                   (void *)ent->end_addr, (void *)ent->start_addr);
        return 0;
 }
index cd50e99202b011dfdb847dd2772f14e818d268bb..2017a39ab4904e8e2fffd648718aa7d05ecb8932 100644 (file)
@@ -55,7 +55,6 @@ enum KTHREAD_BITS {
        KTHREAD_IS_PER_CPU = 0,
        KTHREAD_SHOULD_STOP,
        KTHREAD_SHOULD_PARK,
-       KTHREAD_IS_PARKED,
 };
 
 static inline void set_kthread_struct(void *kthread)
@@ -177,14 +176,12 @@ void *kthread_probe_data(struct task_struct *task)
 
 static void __kthread_parkme(struct kthread *self)
 {
-       __set_current_state(TASK_PARKED);
-       while (test_bit(KTHREAD_SHOULD_PARK, &self->flags)) {
-               if (!test_and_set_bit(KTHREAD_IS_PARKED, &self->flags))
-                       complete(&self->parked);
+       for (;;) {
+               set_current_state(TASK_PARKED);
+               if (!test_bit(KTHREAD_SHOULD_PARK, &self->flags))
+                       break;
                schedule();
-               __set_current_state(TASK_PARKED);
        }
-       clear_bit(KTHREAD_IS_PARKED, &self->flags);
        __set_current_state(TASK_RUNNING);
 }
 
@@ -194,6 +191,11 @@ void kthread_parkme(void)
 }
 EXPORT_SYMBOL_GPL(kthread_parkme);
 
+void kthread_park_complete(struct task_struct *k)
+{
+       complete(&to_kthread(k)->parked);
+}
+
 static int kthread(void *_create)
 {
        /* Copy data: it's on kthread's stack */
@@ -450,22 +452,15 @@ void kthread_unpark(struct task_struct *k)
 {
        struct kthread *kthread = to_kthread(k);
 
-       clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
        /*
-        * We clear the IS_PARKED bit here as we don't wait
-        * until the task has left the park code. So if we'd
-        * park before that happens we'd see the IS_PARKED bit
-        * which might be about to be cleared.
+        * Newly created kthread was parked when the CPU was offline.
+        * The binding was lost and we need to set it again.
         */
-       if (test_and_clear_bit(KTHREAD_IS_PARKED, &kthread->flags)) {
-               /*
-                * Newly created kthread was parked when the CPU was offline.
-                * The binding was lost and we need to set it again.
-                */
-               if (test_bit(KTHREAD_IS_PER_CPU, &kthread->flags))
-                       __kthread_bind(k, kthread->cpu, TASK_PARKED);
-               wake_up_state(k, TASK_PARKED);
-       }
+       if (test_bit(KTHREAD_IS_PER_CPU, &kthread->flags))
+               __kthread_bind(k, kthread->cpu, TASK_PARKED);
+
+       clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
+       wake_up_state(k, TASK_PARKED);
 }
 EXPORT_SYMBOL_GPL(kthread_unpark);
 
@@ -488,12 +483,13 @@ int kthread_park(struct task_struct *k)
        if (WARN_ON(k->flags & PF_EXITING))
                return -ENOSYS;
 
-       if (!test_bit(KTHREAD_IS_PARKED, &kthread->flags)) {
-               set_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
-               if (k != current) {
-                       wake_up_process(k);
-                       wait_for_completion(&kthread->parked);
-               }
+       if (WARN_ON_ONCE(test_bit(KTHREAD_SHOULD_PARK, &kthread->flags)))
+               return -EBUSY;
+
+       set_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
+       if (k != current) {
+               wake_up_process(k);
+               wait_for_completion(&kthread->parked);
        }
 
        return 0;
index e795908f36070dd33ed94630bb63b188065f21ca..a903367793758f3e1cc52ab34c18f1bfa78f38e3 100644 (file)
@@ -352,16 +352,15 @@ static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
        struct task_struct *owner;
        bool ret = true;
 
+       BUILD_BUG_ON(!rwsem_has_anonymous_owner(RWSEM_OWNER_UNKNOWN));
+
        if (need_resched())
                return false;
 
        rcu_read_lock();
        owner = READ_ONCE(sem->owner);
-       if (!rwsem_owner_is_writer(owner)) {
-               /*
-                * Don't spin if the rwsem is readers owned.
-                */
-               ret = !rwsem_owner_is_reader(owner);
+       if (!owner || !is_rwsem_owner_spinnable(owner)) {
+               ret = !owner;   /* !owner is spinnable */
                goto done;
        }
 
@@ -382,11 +381,11 @@ static noinline bool rwsem_spin_on_owner(struct rw_semaphore *sem)
 {
        struct task_struct *owner = READ_ONCE(sem->owner);
 
-       if (!rwsem_owner_is_writer(owner))
-               goto out;
+       if (!is_rwsem_owner_spinnable(owner))
+               return false;
 
        rcu_read_lock();
-       while (sem->owner == owner) {
+       while (owner && (READ_ONCE(sem->owner) == owner)) {
                /*
                 * Ensure we emit the owner->on_cpu, dereference _after_
                 * checking sem->owner still matches owner, if that fails,
@@ -408,12 +407,12 @@ static noinline bool rwsem_spin_on_owner(struct rw_semaphore *sem)
                cpu_relax();
        }
        rcu_read_unlock();
-out:
+
        /*
         * If there is a new owner or the owner is not set, we continue
         * spinning.
         */
-       return !rwsem_owner_is_reader(READ_ONCE(sem->owner));
+       return is_rwsem_owner_spinnable(READ_ONCE(sem->owner));
 }
 
 static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
index 30465a2f2b6cf8e072ee797d1b58219f0799cb66..bc1e507be9ff7aea311261e78002d53375f9a6d7 100644 (file)
@@ -221,5 +221,3 @@ void up_read_non_owner(struct rw_semaphore *sem)
 EXPORT_SYMBOL(up_read_non_owner);
 
 #endif
-
-
index a17cba8d94bb10b4e3d6d038ea90259349e8a193..b9d0e72aa80f4064542a53854feb55da12a5f960 100644 (file)
@@ -1,20 +1,24 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 /*
  * The owner field of the rw_semaphore structure will be set to
- * RWSEM_READ_OWNED when a reader grabs the lock. A writer will clear
+ * RWSEM_READER_OWNED when a reader grabs the lock. A writer will clear
  * the owner field when it unlocks. A reader, on the other hand, will
  * not touch the owner field when it unlocks.
  *
- * In essence, the owner field now has the following 3 states:
+ * In essence, the owner field now has the following 4 states:
  *  1) 0
  *     - lock is free or the owner hasn't set the field yet
  *  2) RWSEM_READER_OWNED
  *     - lock is currently or previously owned by readers (lock is free
  *       or not set by owner yet)
- *  3) Other non-zero value
- *     - a writer owns the lock
+ *  3) RWSEM_ANONYMOUSLY_OWNED bit set with some other bits set as well
+ *     - lock is owned by an anonymous writer, so spinning on the lock
+ *       owner should be disabled.
+ *  4) Other non-zero value
+ *     - a writer owns the lock and other writers can spin on the lock owner.
  */
-#define RWSEM_READER_OWNED     ((struct task_struct *)1UL)
+#define RWSEM_ANONYMOUSLY_OWNED        (1UL << 0)
+#define RWSEM_READER_OWNED     ((struct task_struct *)RWSEM_ANONYMOUSLY_OWNED)
 
 #ifdef CONFIG_DEBUG_RWSEMS
 # define DEBUG_RWSEMS_WARN_ON(c)       DEBUG_LOCKS_WARN_ON(c)
@@ -51,14 +55,22 @@ static inline void rwsem_set_reader_owned(struct rw_semaphore *sem)
                WRITE_ONCE(sem->owner, RWSEM_READER_OWNED);
 }
 
-static inline bool rwsem_owner_is_writer(struct task_struct *owner)
+/*
+ * Return true if the a rwsem waiter can spin on the rwsem's owner
+ * and steal the lock, i.e. the lock is not anonymously owned.
+ * N.B. !owner is considered spinnable.
+ */
+static inline bool is_rwsem_owner_spinnable(struct task_struct *owner)
 {
-       return owner && owner != RWSEM_READER_OWNED;
+       return !((unsigned long)owner & RWSEM_ANONYMOUSLY_OWNED);
 }
 
-static inline bool rwsem_owner_is_reader(struct task_struct *owner)
+/*
+ * Return true if rwsem is owned by an anonymous writer or readers.
+ */
+static inline bool rwsem_has_anonymous_owner(struct task_struct *owner)
 {
-       return owner == RWSEM_READER_OWNED;
+       return (unsigned long)owner & RWSEM_ANONYMOUSLY_OWNED;
 }
 #else
 static inline void rwsem_set_owner(struct rw_semaphore *sem)
index a6e43a5806a11391b6606b42cc972b13f9eb3828..c9bea7f2b43e3b2568f2adeaa299a6545df41df8 100644 (file)
@@ -1472,7 +1472,8 @@ static ssize_t module_sect_show(struct module_attribute *mattr,
 {
        struct module_sect_attr *sattr =
                container_of(mattr, struct module_sect_attr, mattr);
-       return sprintf(buf, "0x%pK\n", (void *)sattr->address);
+       return sprintf(buf, "0x%px\n", kptr_restrict < 2 ?
+                      (void *)sattr->address : NULL);
 }
 
 static void free_sect_attrs(struct module_sect_attrs *sect_attrs)
@@ -3516,6 +3517,11 @@ static noinline int do_init_module(struct module *mod)
         * walking this with preempt disabled.  In all the failure paths, we
         * call synchronize_sched(), but we don't want to slow down the success
         * path, so use actual RCU here.
+        * Note that module_alloc() on most architectures creates W+X page
+        * mappings which won't be cleaned up until do_free_init() runs.  Any
+        * code such as mark_rodata_ro() which depends on those mappings to
+        * be cleaned up needs to sync with the queued work - ie
+        * rcu_barrier_sched()
         */
        call_rcu_sched(&freeinit->rcu, do_free_init);
        mutex_unlock(&module_mutex);
index 6be6c575b6cd1d7c20ea72900849b95404066641..2d4ff5353ded618a9b08e1d3b1c9da0fa7d148af 100644 (file)
@@ -2,6 +2,7 @@
 /*
  * Auto-group scheduling implementation:
  */
+#include <linux/nospec.h>
 #include "sched.h"
 
 unsigned int __read_mostly sysctl_sched_autogroup_enabled = 1;
@@ -209,7 +210,7 @@ int proc_sched_autogroup_set_nice(struct task_struct *p, int nice)
        static unsigned long next = INITIAL_JIFFIES;
        struct autogroup *ag;
        unsigned long shares;
-       int err;
+       int err, idx;
 
        if (nice < MIN_NICE || nice > MAX_NICE)
                return -EINVAL;
@@ -227,7 +228,9 @@ int proc_sched_autogroup_set_nice(struct task_struct *p, int nice)
 
        next = HZ / 10 + jiffies;
        ag = autogroup_task_get(p);
-       shares = scale_load(sched_prio_to_weight[nice + 20]);
+
+       idx = array_index_nospec(nice + 20, 40);
+       shares = scale_load(sched_prio_to_weight[idx]);
 
        down_write(&ag->lock);
        err = sched_group_set_shares(ag->tg, shares);
index 5e10aaeebfcc55af83a4ee2e65ff9bbc3b6c0e5d..092f7c4de9036667981ffff80e4e150e113a53c0 100644 (file)
@@ -7,6 +7,9 @@
  */
 #include "sched.h"
 
+#include <linux/kthread.h>
+#include <linux/nospec.h>
+
 #include <asm/switch_to.h>
 #include <asm/tlb.h>
 
@@ -2718,20 +2721,28 @@ static struct rq *finish_task_switch(struct task_struct *prev)
                membarrier_mm_sync_core_before_usermode(mm);
                mmdrop(mm);
        }
-       if (unlikely(prev_state == TASK_DEAD)) {
-               if (prev->sched_class->task_dead)
-                       prev->sched_class->task_dead(prev);
+       if (unlikely(prev_state & (TASK_DEAD|TASK_PARKED))) {
+               switch (prev_state) {
+               case TASK_DEAD:
+                       if (prev->sched_class->task_dead)
+                               prev->sched_class->task_dead(prev);
 
-               /*
-                * Remove function-return probe instances associated with this
-                * task and put them back on the free list.
-                */
-               kprobe_flush_task(prev);
+                       /*
+                        * Remove function-return probe instances associated with this
+                        * task and put them back on the free list.
+                        */
+                       kprobe_flush_task(prev);
+
+                       /* Task is done with its stack. */
+                       put_task_stack(prev);
 
-               /* Task is done with its stack. */
-               put_task_stack(prev);
+                       put_task_struct(prev);
+                       break;
 
-               put_task_struct(prev);
+               case TASK_PARKED:
+                       kthread_park_complete(prev);
+                       break;
+               }
        }
 
        tick_nohz_task_switch();
@@ -3498,23 +3509,8 @@ static void __sched notrace __schedule(bool preempt)
 
 void __noreturn do_task_dead(void)
 {
-       /*
-        * The setting of TASK_RUNNING by try_to_wake_up() may be delayed
-        * when the following two conditions become true.
-        *   - There is race condition of mmap_sem (It is acquired by
-        *     exit_mm()), and
-        *   - SMI occurs before setting TASK_RUNINNG.
-        *     (or hypervisor of virtual machine switches to other guest)
-        *  As a result, we may become TASK_RUNNING after becoming TASK_DEAD
-        *
-        * To avoid it, we have to wait for releasing tsk->pi_lock which
-        * is held by try_to_wake_up()
-        */
-       raw_spin_lock_irq(&current->pi_lock);
-       raw_spin_unlock_irq(&current->pi_lock);
-
        /* Causes final put_task_struct in finish_task_switch(): */
-       __set_current_state(TASK_DEAD);
+       set_special_state(TASK_DEAD);
 
        /* Tell freezer to ignore us: */
        current->flags |= PF_NOFREEZE;
@@ -6928,11 +6924,15 @@ static int cpu_weight_nice_write_s64(struct cgroup_subsys_state *css,
                                     struct cftype *cft, s64 nice)
 {
        unsigned long weight;
+       int idx;
 
        if (nice < MIN_NICE || nice > MAX_NICE)
                return -ERANGE;
 
-       weight = sched_prio_to_weight[NICE_TO_PRIO(nice) - MAX_RT_PRIO];
+       idx = NICE_TO_PRIO(nice) - MAX_RT_PRIO;
+       idx = array_index_nospec(idx, 40);
+       weight = sched_prio_to_weight[idx];
+
        return sched_group_set_shares(css_tg(css), scale_load(weight));
 }
 #endif
index d2c6083304b484352855bdc64262a33e952be9a9..e13df951aca7151225d316da25874b28c6e57d72 100644 (file)
@@ -305,7 +305,8 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,
         * Do not reduce the frequency if the CPU has not been idle
         * recently, as the reduction is likely to be premature then.
         */
-       if (busy && next_f < sg_policy->next_freq) {
+       if (busy && next_f < sg_policy->next_freq &&
+           sg_policy->next_freq != UINT_MAX) {
                next_f = sg_policy->next_freq;
 
                /* Reset cached freq as next_freq has changed */
@@ -396,19 +397,6 @@ static void sugov_irq_work(struct irq_work *irq_work)
 
        sg_policy = container_of(irq_work, struct sugov_policy, irq_work);
 
-       /*
-        * For RT tasks, the schedutil governor shoots the frequency to maximum.
-        * Special care must be taken to ensure that this kthread doesn't result
-        * in the same behavior.
-        *
-        * This is (mostly) guaranteed by the work_in_progress flag. The flag is
-        * updated only at the end of the sugov_work() function and before that
-        * the schedutil governor rejects all other frequency scaling requests.
-        *
-        * There is a very rare case though, where the RT thread yields right
-        * after the work_in_progress flag is cleared. The effects of that are
-        * neglected for now.
-        */
        kthread_queue_work(&sg_policy->worker, &sg_policy->work);
 }
 
index e7b3008b85bb022076efec9a14cc6bc139da424a..1356afd1eeb6d6468522c6e6dd50fb6b36dc5b7d 100644 (file)
@@ -1117,7 +1117,7 @@ extern bool sched_rt_bandwidth_account(struct rt_rq *rt_rq);
  * should be larger than 2^(64 - 20 - 8), which is more than 64 seconds.
  * So, overflow is not an issue here.
  */
-u64 grub_reclaim(u64 delta, struct rq *rq, struct sched_dl_entity *dl_se)
+static u64 grub_reclaim(u64 delta, struct rq *rq, struct sched_dl_entity *dl_se)
 {
        u64 u_inact = rq->dl.this_bw - rq->dl.running_bw; /* Utot - Uact */
        u64 u_act;
@@ -2731,8 +2731,6 @@ bool dl_cpu_busy(unsigned int cpu)
 #endif
 
 #ifdef CONFIG_SCHED_DEBUG
-extern void print_dl_rq(struct seq_file *m, int cpu, struct dl_rq *dl_rq);
-
 void print_dl_stats(struct seq_file *m, int cpu)
 {
        print_dl_rq(m, cpu, &cpu_rq(cpu)->dl);
index 54dc31e7ab9b2b56bc24932740a18a49a9666065..79f574dba09657316468174e478ea9dcda7b1230 100644 (file)
@@ -1854,7 +1854,6 @@ static int task_numa_migrate(struct task_struct *p)
 static void numa_migrate_preferred(struct task_struct *p)
 {
        unsigned long interval = HZ;
-       unsigned long numa_migrate_retry;
 
        /* This task has no NUMA fault statistics yet */
        if (unlikely(p->numa_preferred_nid == -1 || !p->numa_faults))
@@ -1862,18 +1861,7 @@ static void numa_migrate_preferred(struct task_struct *p)
 
        /* Periodically retry migrating the task to the preferred node */
        interval = min(interval, msecs_to_jiffies(p->numa_scan_period) / 16);
-       numa_migrate_retry = jiffies + interval;
-
-       /*
-        * Check that the new retry threshold is after the current one. If
-        * the retry is in the future, it implies that wake_affine has
-        * temporarily asked NUMA balancing to backoff from placement.
-        */
-       if (numa_migrate_retry > p->numa_migrate_retry)
-               return;
-
-       /* Safe to try placing the task on the preferred node */
-       p->numa_migrate_retry = numa_migrate_retry;
+       p->numa_migrate_retry = jiffies + interval;
 
        /* Success if task is already running on preferred CPU */
        if (task_node(p) == p->numa_preferred_nid)
@@ -5922,48 +5910,6 @@ wake_affine_weight(struct sched_domain *sd, struct task_struct *p,
        return this_eff_load < prev_eff_load ? this_cpu : nr_cpumask_bits;
 }
 
-#ifdef CONFIG_NUMA_BALANCING
-static void
-update_wa_numa_placement(struct task_struct *p, int prev_cpu, int target)
-{
-       unsigned long interval;
-
-       if (!static_branch_likely(&sched_numa_balancing))
-               return;
-
-       /* If balancing has no preference then continue gathering data */
-       if (p->numa_preferred_nid == -1)
-               return;
-
-       /*
-        * If the wakeup is not affecting locality then it is neutral from
-        * the perspective of NUMA balacing so continue gathering data.
-        */
-       if (cpu_to_node(prev_cpu) == cpu_to_node(target))
-               return;
-
-       /*
-        * Temporarily prevent NUMA balancing trying to place waker/wakee after
-        * wakee has been moved by wake_affine. This will potentially allow
-        * related tasks to converge and update their data placement. The
-        * 4 * numa_scan_period is to allow the two-pass filter to migrate
-        * hot data to the wakers node.
-        */
-       interval = max(sysctl_numa_balancing_scan_delay,
-                        p->numa_scan_period << 2);
-       p->numa_migrate_retry = jiffies + msecs_to_jiffies(interval);
-
-       interval = max(sysctl_numa_balancing_scan_delay,
-                        current->numa_scan_period << 2);
-       current->numa_migrate_retry = jiffies + msecs_to_jiffies(interval);
-}
-#else
-static void
-update_wa_numa_placement(struct task_struct *p, int prev_cpu, int target)
-{
-}
-#endif
-
 static int wake_affine(struct sched_domain *sd, struct task_struct *p,
                       int this_cpu, int prev_cpu, int sync)
 {
@@ -5979,7 +5925,6 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p,
        if (target == nr_cpumask_bits)
                return prev_cpu;
 
-       update_wa_numa_placement(p, prev_cpu, target);
        schedstat_inc(sd->ttwu_move_affine);
        schedstat_inc(p->se.statistics.nr_wakeups_affine);
        return target;
@@ -9847,6 +9792,7 @@ static int idle_balance(struct rq *this_rq, struct rq_flags *rf)
        if (curr_cost > this_rq->max_idle_balance_cost)
                this_rq->max_idle_balance_cost = curr_cost;
 
+out:
        /*
         * While browsing the domains, we released the rq lock, a task could
         * have been enqueued in the meantime. Since we're not going idle,
@@ -9855,7 +9801,6 @@ static int idle_balance(struct rq *this_rq, struct rq_flags *rf)
        if (this_rq->cfs.h_nr_running && !pulled_task)
                pulled_task = 1;
 
-out:
        /* Move the next balance forward */
        if (time_after(this_rq->next_balance, next_balance))
                this_rq->next_balance = next_balance;
index 7aef6b4e885a5e058ce75bdbc4f5bf756a2783b2..ef3c4e6f53457ba52151fe243c5d62c160ecc115 100644 (file)
@@ -2701,8 +2701,6 @@ int sched_rr_handler(struct ctl_table *table, int write,
 }
 
 #ifdef CONFIG_SCHED_DEBUG
-extern void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq);
-
 void print_rt_stats(struct seq_file *m, int cpu)
 {
        rt_rq_iter_t iter;
index 15750c222ca2ad4c7e7e20806de1dd143a099f87..1f0a4bc6a39d2063f618f12e8532362172eb951c 100644 (file)
@@ -2025,8 +2025,9 @@ extern bool sched_debug_enabled;
 extern void print_cfs_stats(struct seq_file *m, int cpu);
 extern void print_rt_stats(struct seq_file *m, int cpu);
 extern void print_dl_stats(struct seq_file *m, int cpu);
-extern void
-print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq);
+extern void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq);
+extern void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq);
+extern void print_dl_rq(struct seq_file *m, int cpu, struct dl_rq *dl_rq);
 #ifdef CONFIG_NUMA_BALANCING
 extern void
 show_numa_stats(struct task_struct *p, struct seq_file *m);
index d4ccea599692397e7526212480d73df3494d3554..9c33163a6165ad847d3a7d3baa71a7f358307503 100644 (file)
@@ -1961,14 +1961,27 @@ static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info)
                        return;
        }
 
+       set_special_state(TASK_TRACED);
+
        /*
         * We're committing to trapping.  TRACED should be visible before
         * TRAPPING is cleared; otherwise, the tracer might fail do_wait().
         * Also, transition to TRACED and updates to ->jobctl should be
         * atomic with respect to siglock and should be done after the arch
         * hook as siglock is released and regrabbed across it.
+        *
+        *     TRACER                               TRACEE
+        *
+        *     ptrace_attach()
+        * [L]   wait_on_bit(JOBCTL_TRAPPING)   [S] set_special_state(TRACED)
+        *     do_wait()
+        *       set_current_state()                smp_wmb();
+        *       ptrace_do_wait()
+        *         wait_task_stopped()
+        *           task_stopped_code()
+        * [L]         task_is_traced()         [S] task_clear_jobctl_trapping();
         */
-       set_current_state(TASK_TRACED);
+       smp_wmb();
 
        current->last_siginfo = info;
        current->exit_code = exit_code;
@@ -2176,7 +2189,7 @@ static bool do_signal_stop(int signr)
                if (task_participate_group_stop(current))
                        notify = CLD_STOPPED;
 
-               __set_current_state(TASK_STOPPED);
+               set_special_state(TASK_STOPPED);
                spin_unlock_irq(&current->sighand->siglock);
 
                /*
index b7591261652d3ea88811f9d2af2a3ce188fb3320..64c0291b579c9a1ce240e5eb941f2f6fcbff5f29 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/smpboot.h>
 #include <linux/atomic.h>
 #include <linux/nmi.h>
+#include <linux/sched/wake_q.h>
 
 /*
  * Structure to determine completion condition and record errors.  May
@@ -65,27 +66,31 @@ static void cpu_stop_signal_done(struct cpu_stop_done *done)
 }
 
 static void __cpu_stop_queue_work(struct cpu_stopper *stopper,
-                                       struct cpu_stop_work *work)
+                                       struct cpu_stop_work *work,
+                                       struct wake_q_head *wakeq)
 {
        list_add_tail(&work->list, &stopper->works);
-       wake_up_process(stopper->thread);
+       wake_q_add(wakeq, stopper->thread);
 }
 
 /* queue @work to @stopper.  if offline, @work is completed immediately */
 static bool cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work)
 {
        struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
+       DEFINE_WAKE_Q(wakeq);
        unsigned long flags;
        bool enabled;
 
        spin_lock_irqsave(&stopper->lock, flags);
        enabled = stopper->enabled;
        if (enabled)
-               __cpu_stop_queue_work(stopper, work);
+               __cpu_stop_queue_work(stopper, work, &wakeq);
        else if (work->done)
                cpu_stop_signal_done(work->done);
        spin_unlock_irqrestore(&stopper->lock, flags);
 
+       wake_up_q(&wakeq);
+
        return enabled;
 }
 
@@ -229,6 +234,7 @@ static int cpu_stop_queue_two_works(int cpu1, struct cpu_stop_work *work1,
 {
        struct cpu_stopper *stopper1 = per_cpu_ptr(&cpu_stopper, cpu1);
        struct cpu_stopper *stopper2 = per_cpu_ptr(&cpu_stopper, cpu2);
+       DEFINE_WAKE_Q(wakeq);
        int err;
 retry:
        spin_lock_irq(&stopper1->lock);
@@ -252,8 +258,8 @@ static int cpu_stop_queue_two_works(int cpu1, struct cpu_stop_work *work1,
                        goto unlock;
 
        err = 0;
-       __cpu_stop_queue_work(stopper1, work1);
-       __cpu_stop_queue_work(stopper2, work2);
+       __cpu_stop_queue_work(stopper1, work1, &wakeq);
+       __cpu_stop_queue_work(stopper2, work2, &wakeq);
 unlock:
        spin_unlock(&stopper2->lock);
        spin_unlock_irq(&stopper1->lock);
@@ -263,6 +269,9 @@ static int cpu_stop_queue_two_works(int cpu1, struct cpu_stop_work *work1,
                        cpu_relax();
                goto retry;
        }
+
+       wake_up_q(&wakeq);
+
        return err;
 }
 /**
index e8c0dab4fd653a84d316c458a1db767995b02b47..07148b4974516cec45b934e5ed5881e1554e7af0 100644 (file)
@@ -704,24 +704,6 @@ static const struct bin_table bin_net_netfilter_table[] = {
        {}
 };
 
-static const struct bin_table bin_net_irda_table[] = {
-       { CTL_INT,      NET_IRDA_DISCOVERY,             "discovery" },
-       { CTL_STR,      NET_IRDA_DEVNAME,               "devname" },
-       { CTL_INT,      NET_IRDA_DEBUG,                 "debug" },
-       { CTL_INT,      NET_IRDA_FAST_POLL,             "fast_poll_increase" },
-       { CTL_INT,      NET_IRDA_DISCOVERY_SLOTS,       "discovery_slots" },
-       { CTL_INT,      NET_IRDA_DISCOVERY_TIMEOUT,     "discovery_timeout" },
-       { CTL_INT,      NET_IRDA_SLOT_TIMEOUT,          "slot_timeout" },
-       { CTL_INT,      NET_IRDA_MAX_BAUD_RATE,         "max_baud_rate" },
-       { CTL_INT,      NET_IRDA_MIN_TX_TURN_TIME,      "min_tx_turn_time" },
-       { CTL_INT,      NET_IRDA_MAX_TX_DATA_SIZE,      "max_tx_data_size" },
-       { CTL_INT,      NET_IRDA_MAX_TX_WINDOW,         "max_tx_window" },
-       { CTL_INT,      NET_IRDA_MAX_NOREPLY_TIME,      "max_noreply_time" },
-       { CTL_INT,      NET_IRDA_WARN_NOREPLY_TIME,     "warn_noreply_time" },
-       { CTL_INT,      NET_IRDA_LAP_KEEPALIVE_TIME,    "lap_keepalive_time" },
-       {}
-};
-
 static const struct bin_table bin_net_table[] = {
        { CTL_DIR,      NET_CORE,               "core",         bin_net_core_table },
        /* NET_ETHER not used */
@@ -743,7 +725,7 @@ static const struct bin_table bin_net_table[] = {
        { CTL_DIR,      NET_LLC,                "llc",          bin_net_llc_table },
        { CTL_DIR,      NET_NETFILTER,          "netfilter",    bin_net_netfilter_table },
        /* NET_DCCP "dccp" no longer used */
-       { CTL_DIR,      NET_IRDA,               "irda",         bin_net_irda_table },
+       /* NET_IRDA "irda" no longer used */
        { CTL_INT,      2089,                   "nf_conntrack_max" },
        {}
 };
index 0e974cface0b170761896e320cbb3be908e577dc..84f37420fcf544c09e12f730f05a8a85411b36f1 100644 (file)
@@ -119,6 +119,16 @@ static DEFINE_SPINLOCK(watchdog_lock);
 static int watchdog_running;
 static atomic_t watchdog_reset_pending;
 
+static void inline clocksource_watchdog_lock(unsigned long *flags)
+{
+       spin_lock_irqsave(&watchdog_lock, *flags);
+}
+
+static void inline clocksource_watchdog_unlock(unsigned long *flags)
+{
+       spin_unlock_irqrestore(&watchdog_lock, *flags);
+}
+
 static int clocksource_watchdog_kthread(void *data);
 static void __clocksource_change_rating(struct clocksource *cs, int rating);
 
@@ -142,9 +152,19 @@ static void __clocksource_unstable(struct clocksource *cs)
        cs->flags &= ~(CLOCK_SOURCE_VALID_FOR_HRES | CLOCK_SOURCE_WATCHDOG);
        cs->flags |= CLOCK_SOURCE_UNSTABLE;
 
+       /*
+        * If the clocksource is registered clocksource_watchdog_kthread() will
+        * re-rate and re-select.
+        */
+       if (list_empty(&cs->list)) {
+               cs->rating = 0;
+               return;
+       }
+
        if (cs->mark_unstable)
                cs->mark_unstable(cs);
 
+       /* kick clocksource_watchdog_kthread() */
        if (finished_booting)
                schedule_work(&watchdog_work);
 }
@@ -153,10 +173,8 @@ static void __clocksource_unstable(struct clocksource *cs)
  * clocksource_mark_unstable - mark clocksource unstable via watchdog
  * @cs:                clocksource to be marked unstable
  *
- * This function is called instead of clocksource_change_rating from
- * cpu hotplug code to avoid a deadlock between the clocksource mutex
- * and the cpu hotplug mutex. It defers the update of the clocksource
- * to the watchdog thread.
+ * This function is called by the x86 TSC code to mark clocksources as unstable;
+ * it defers demotion and re-selection to a kthread.
  */
 void clocksource_mark_unstable(struct clocksource *cs)
 {
@@ -164,7 +182,7 @@ void clocksource_mark_unstable(struct clocksource *cs)
 
        spin_lock_irqsave(&watchdog_lock, flags);
        if (!(cs->flags & CLOCK_SOURCE_UNSTABLE)) {
-               if (list_empty(&cs->wd_list))
+               if (!list_empty(&cs->list) && list_empty(&cs->wd_list))
                        list_add(&cs->wd_list, &watchdog_list);
                __clocksource_unstable(cs);
        }
@@ -319,9 +337,8 @@ static void clocksource_resume_watchdog(void)
 
 static void clocksource_enqueue_watchdog(struct clocksource *cs)
 {
-       unsigned long flags;
+       INIT_LIST_HEAD(&cs->wd_list);
 
-       spin_lock_irqsave(&watchdog_lock, flags);
        if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) {
                /* cs is a clocksource to be watched. */
                list_add(&cs->wd_list, &watchdog_list);
@@ -331,7 +348,6 @@ static void clocksource_enqueue_watchdog(struct clocksource *cs)
                if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS)
                        cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
        }
-       spin_unlock_irqrestore(&watchdog_lock, flags);
 }
 
 static void clocksource_select_watchdog(bool fallback)
@@ -373,9 +389,6 @@ static void clocksource_select_watchdog(bool fallback)
 
 static void clocksource_dequeue_watchdog(struct clocksource *cs)
 {
-       unsigned long flags;
-
-       spin_lock_irqsave(&watchdog_lock, flags);
        if (cs != watchdog) {
                if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) {
                        /* cs is a watched clocksource. */
@@ -384,21 +397,19 @@ static void clocksource_dequeue_watchdog(struct clocksource *cs)
                        clocksource_stop_watchdog();
                }
        }
-       spin_unlock_irqrestore(&watchdog_lock, flags);
 }
 
 static int __clocksource_watchdog_kthread(void)
 {
        struct clocksource *cs, *tmp;
        unsigned long flags;
-       LIST_HEAD(unstable);
        int select = 0;
 
        spin_lock_irqsave(&watchdog_lock, flags);
        list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) {
                if (cs->flags & CLOCK_SOURCE_UNSTABLE) {
                        list_del_init(&cs->wd_list);
-                       list_add(&cs->wd_list, &unstable);
+                       __clocksource_change_rating(cs, 0);
                        select = 1;
                }
                if (cs->flags & CLOCK_SOURCE_RESELECT) {
@@ -410,11 +421,6 @@ static int __clocksource_watchdog_kthread(void)
        clocksource_stop_watchdog();
        spin_unlock_irqrestore(&watchdog_lock, flags);
 
-       /* Needs to be done outside of watchdog lock */
-       list_for_each_entry_safe(cs, tmp, &unstable, wd_list) {
-               list_del_init(&cs->wd_list);
-               __clocksource_change_rating(cs, 0);
-       }
        return select;
 }
 
@@ -447,6 +453,9 @@ static inline int __clocksource_watchdog_kthread(void) { return 0; }
 static bool clocksource_is_watchdog(struct clocksource *cs) { return false; }
 void clocksource_mark_unstable(struct clocksource *cs) { }
 
+static void inline clocksource_watchdog_lock(unsigned long *flags) { }
+static void inline clocksource_watchdog_unlock(unsigned long *flags) { }
+
 #endif /* CONFIG_CLOCKSOURCE_WATCHDOG */
 
 /**
@@ -779,14 +788,19 @@ EXPORT_SYMBOL_GPL(__clocksource_update_freq_scale);
  */
 int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq)
 {
+       unsigned long flags;
 
        /* Initialize mult/shift and max_idle_ns */
        __clocksource_update_freq_scale(cs, scale, freq);
 
        /* Add clocksource to the clocksource list */
        mutex_lock(&clocksource_mutex);
+
+       clocksource_watchdog_lock(&flags);
        clocksource_enqueue(cs);
        clocksource_enqueue_watchdog(cs);
+       clocksource_watchdog_unlock(&flags);
+
        clocksource_select();
        clocksource_select_watchdog(false);
        mutex_unlock(&clocksource_mutex);
@@ -808,8 +822,13 @@ static void __clocksource_change_rating(struct clocksource *cs, int rating)
  */
 void clocksource_change_rating(struct clocksource *cs, int rating)
 {
+       unsigned long flags;
+
        mutex_lock(&clocksource_mutex);
+       clocksource_watchdog_lock(&flags);
        __clocksource_change_rating(cs, rating);
+       clocksource_watchdog_unlock(&flags);
+
        clocksource_select();
        clocksource_select_watchdog(false);
        mutex_unlock(&clocksource_mutex);
@@ -821,6 +840,8 @@ EXPORT_SYMBOL(clocksource_change_rating);
  */
 static int clocksource_unbind(struct clocksource *cs)
 {
+       unsigned long flags;
+
        if (clocksource_is_watchdog(cs)) {
                /* Select and try to install a replacement watchdog. */
                clocksource_select_watchdog(true);
@@ -834,8 +855,12 @@ static int clocksource_unbind(struct clocksource *cs)
                if (curr_clocksource == cs)
                        return -EBUSY;
        }
+
+       clocksource_watchdog_lock(&flags);
        clocksource_dequeue_watchdog(cs);
        list_del_init(&cs->list);
+       clocksource_watchdog_unlock(&flags);
+
        return 0;
 }
 
index eda1210ce50f88dd88a53ab9fafbfeef5da021ca..14e858753d7689d96392402ebce61e0116574efa 100644 (file)
@@ -90,6 +90,11 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
                        .clockid = CLOCK_REALTIME,
                        .get_time = &ktime_get_real,
                },
+               {
+                       .index = HRTIMER_BASE_BOOTTIME,
+                       .clockid = CLOCK_BOOTTIME,
+                       .get_time = &ktime_get_boottime,
+               },
                {
                        .index = HRTIMER_BASE_TAI,
                        .clockid = CLOCK_TAI,
@@ -105,6 +110,11 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
                        .clockid = CLOCK_REALTIME,
                        .get_time = &ktime_get_real,
                },
+               {
+                       .index = HRTIMER_BASE_BOOTTIME_SOFT,
+                       .clockid = CLOCK_BOOTTIME,
+                       .get_time = &ktime_get_boottime,
+               },
                {
                        .index = HRTIMER_BASE_TAI_SOFT,
                        .clockid = CLOCK_TAI,
@@ -119,7 +129,7 @@ static const int hrtimer_clock_to_base_table[MAX_CLOCKS] = {
 
        [CLOCK_REALTIME]        = HRTIMER_BASE_REALTIME,
        [CLOCK_MONOTONIC]       = HRTIMER_BASE_MONOTONIC,
-       [CLOCK_BOOTTIME]        = HRTIMER_BASE_MONOTONIC,
+       [CLOCK_BOOTTIME]        = HRTIMER_BASE_BOOTTIME,
        [CLOCK_TAI]             = HRTIMER_BASE_TAI,
 };
 
@@ -571,12 +581,14 @@ __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base, unsigned int active_
 static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base)
 {
        ktime_t *offs_real = &base->clock_base[HRTIMER_BASE_REALTIME].offset;
+       ktime_t *offs_boot = &base->clock_base[HRTIMER_BASE_BOOTTIME].offset;
        ktime_t *offs_tai = &base->clock_base[HRTIMER_BASE_TAI].offset;
 
        ktime_t now = ktime_get_update_offsets_now(&base->clock_was_set_seq,
-                                                  offs_real, offs_tai);
+                                           offs_real, offs_boot, offs_tai);
 
        base->clock_base[HRTIMER_BASE_REALTIME_SOFT].offset = *offs_real;
+       base->clock_base[HRTIMER_BASE_BOOTTIME_SOFT].offset = *offs_boot;
        base->clock_base[HRTIMER_BASE_TAI_SOFT].offset = *offs_tai;
 
        return now;
index 2541bd89f20eb95f96d435d21f4eb7c4d1a0cda6..5a6251ac6f7acd183c35a51d9d55fb680fda64dd 100644 (file)
@@ -1205,10 +1205,12 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
                           u64 *newval, u64 *oldval)
 {
        u64 now;
+       int ret;
 
        WARN_ON_ONCE(clock_idx == CPUCLOCK_SCHED);
+       ret = cpu_timer_sample_group(clock_idx, tsk, &now);
 
-       if (oldval && cpu_timer_sample_group(clock_idx, tsk, &now) != -EINVAL) {
+       if (oldval && ret != -EINVAL) {
                /*
                 * We are setting itimer. The *oldval is absolute and we update
                 * it to be relative, *newval argument is relative and we update
index e0dbae98db9d9b11cbcfa6695c850ad8f0c27b87..69a937c3cd81260e8a83942ea0d21750c86fa5be 100644 (file)
@@ -83,8 +83,6 @@ int do_clock_gettime(clockid_t which_clock, struct timespec64 *tp)
        case CLOCK_BOOTTIME:
                get_monotonic_boottime64(tp);
                break;
-       case CLOCK_MONOTONIC_ACTIVE:
-               ktime_get_active_ts64(tp);
        default:
                return -EINVAL;
        }
index b6899b5060bd8450a20ab8b90b626ddf8327bec7..10b7186d063830b9e45a84146ed243a9f0e80b07 100644 (file)
@@ -252,16 +252,15 @@ static int posix_get_coarse_res(const clockid_t which_clock, struct timespec64 *
        return 0;
 }
 
-static int posix_get_tai(clockid_t which_clock, struct timespec64 *tp)
+static int posix_get_boottime(const clockid_t which_clock, struct timespec64 *tp)
 {
-       timekeeping_clocktai64(tp);
+       get_monotonic_boottime64(tp);
        return 0;
 }
 
-static int posix_get_monotonic_active(clockid_t which_clock,
-                                     struct timespec64 *tp)
+static int posix_get_tai(clockid_t which_clock, struct timespec64 *tp)
 {
-       ktime_get_active_ts64(tp);
+       timekeeping_clocktai64(tp);
        return 0;
 }
 
@@ -1317,9 +1316,19 @@ static const struct k_clock clock_tai = {
        .timer_arm              = common_hrtimer_arm,
 };
 
-static const struct k_clock clock_monotonic_active = {
+static const struct k_clock clock_boottime = {
        .clock_getres           = posix_get_hrtimer_res,
-       .clock_get              = posix_get_monotonic_active,
+       .clock_get              = posix_get_boottime,
+       .nsleep                 = common_nsleep,
+       .timer_create           = common_timer_create,
+       .timer_set              = common_timer_set,
+       .timer_get              = common_timer_get,
+       .timer_del              = common_timer_del,
+       .timer_rearm            = common_hrtimer_rearm,
+       .timer_forward          = common_hrtimer_forward,
+       .timer_remaining        = common_hrtimer_remaining,
+       .timer_try_to_cancel    = common_hrtimer_try_to_cancel,
+       .timer_arm              = common_hrtimer_arm,
 };
 
 static const struct k_clock * const posix_clocks[] = {
@@ -1330,11 +1339,10 @@ static const struct k_clock * const posix_clocks[] = {
        [CLOCK_MONOTONIC_RAW]           = &clock_monotonic_raw,
        [CLOCK_REALTIME_COARSE]         = &clock_realtime_coarse,
        [CLOCK_MONOTONIC_COARSE]        = &clock_monotonic_coarse,
-       [CLOCK_BOOTTIME]                = &clock_monotonic,
+       [CLOCK_BOOTTIME]                = &clock_boottime,
        [CLOCK_REALTIME_ALARM]          = &alarm_clock,
        [CLOCK_BOOTTIME_ALARM]          = &alarm_clock,
        [CLOCK_TAI]                     = &clock_tai,
-       [CLOCK_MONOTONIC_ACTIVE]        = &clock_monotonic_active,
 };
 
 static const struct k_clock *clockid_to_kclock(const clockid_t id)
index b398c2ea69b290cdaec1769b7d11cbc501646652..aa2094d5dd275372f999d051887aec22bfab9f19 100644 (file)
@@ -612,6 +612,14 @@ static void tick_handle_oneshot_broadcast(struct clock_event_device *dev)
        now = ktime_get();
        /* Find all expired events */
        for_each_cpu(cpu, tick_broadcast_oneshot_mask) {
+               /*
+                * Required for !SMP because for_each_cpu() reports
+                * unconditionally CPU0 as set on UP kernels.
+                */
+               if (!IS_ENABLED(CONFIG_SMP) &&
+                   cpumask_empty(tick_broadcast_oneshot_mask))
+                       break;
+
                td = &per_cpu(tick_cpu_device, cpu);
                if (td->evtdev->next_event <= now) {
                        cpumask_set_cpu(cpu, tmpmask);
index 099572ca4a8f239a54d88f37ce4ad1418c8aa483..49edc1c4f3e645894f839c40489ab81594a02398 100644 (file)
@@ -419,19 +419,6 @@ void tick_suspend_local(void)
        clockevents_shutdown(td->evtdev);
 }
 
-static void tick_forward_next_period(void)
-{
-       ktime_t delta, now = ktime_get();
-       u64 n;
-
-       delta = ktime_sub(now, tick_next_period);
-       n = ktime_divns(delta, tick_period);
-       tick_next_period += n * tick_period;
-       if (tick_next_period < now)
-               tick_next_period += tick_period;
-       tick_sched_forward_next_period();
-}
-
 /**
  * tick_resume_local - Resume the local tick device
  *
@@ -444,8 +431,6 @@ void tick_resume_local(void)
        struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
        bool broadcast = tick_resume_check_broadcast();
 
-       tick_forward_next_period();
-
        clockevents_tick_resume(td->evtdev);
        if (!broadcast) {
                if (td->mode == TICKDEV_MODE_PERIODIC)
index 21efab7485ca517bf6fd675233d1dd268e3ce252..e277284c2831c9c1dae2219c1a135e1f5dc8945d 100644 (file)
@@ -141,12 +141,6 @@ static inline void tick_check_oneshot_broadcast_this_cpu(void) { }
 static inline bool tick_broadcast_oneshot_available(void) { return tick_oneshot_possible(); }
 #endif /* !(BROADCAST && ONESHOT) */
 
-#if defined(CONFIG_NO_HZ_COMMON) || defined(CONFIG_HIGH_RES_TIMERS)
-extern void tick_sched_forward_next_period(void);
-#else
-static inline void tick_sched_forward_next_period(void) { }
-#endif
-
 /* NO_HZ_FULL internal */
 #ifdef CONFIG_NO_HZ_FULL
 extern void tick_nohz_init(void);
index c1f518e7aa808a281b5dab69f3961a02b2c14315..6fe615d57ebbbe3b52185ccebb4c041cec0f62b9 100644 (file)
@@ -82,16 +82,15 @@ int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *))
        if (!dev || !(dev->features & CLOCK_EVT_FEAT_ONESHOT) ||
                    !tick_device_is_functional(dev)) {
 
-               printk(KERN_INFO "Clockevents: "
-                      "could not switch to one-shot mode:");
+               pr_info("Clockevents: could not switch to one-shot mode:");
                if (!dev) {
-                       printk(" no tick device\n");
+                       pr_cont(" no tick device\n");
                } else {
                        if (!tick_device_is_functional(dev))
-                               printk(" %s is not functional.\n", dev->name);
+                               pr_cont(" %s is not functional.\n", dev->name);
                        else
-                               printk(" %s does not support one-shot mode.\n",
-                                      dev->name);
+                               pr_cont(" %s does not support one-shot mode.\n",
+                                       dev->name);
                }
                return -EINVAL;
        }
index 646645e981f942480d49857fc84ef2d5bcd14791..da9455a6b42ba1f03cbfaf75f427e92f1241d2bb 100644 (file)
@@ -51,15 +51,6 @@ struct tick_sched *tick_get_tick_sched(int cpu)
  */
 static ktime_t last_jiffies_update;
 
-/*
- * Called after resume. Make sure that jiffies are not fast forwarded due to
- * clock monotonic being forwarded by the suspended time.
- */
-void tick_sched_forward_next_period(void)
-{
-       last_jiffies_update = tick_next_period;
-}
-
 /*
  * Must be called with interrupts disabled !
  */
@@ -804,12 +795,12 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
                return;
        }
 
-       hrtimer_set_expires(&ts->sched_timer, tick);
-
-       if (ts->nohz_mode == NOHZ_MODE_HIGHRES)
-               hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED);
-       else
+       if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
+               hrtimer_start(&ts->sched_timer, tick, HRTIMER_MODE_ABS_PINNED);
+       } else {
+               hrtimer_set_expires(&ts->sched_timer, tick);
                tick_program_event(tick, 1);
+       }
 }
 
 static void tick_nohz_retain_tick(struct tick_sched *ts)
index ca90219a1e73783d1ac991515453138233d82607..49cbceef5debc7c06aab4f4f5a2479b291855ee7 100644 (file)
@@ -138,12 +138,7 @@ static void tk_set_wall_to_mono(struct timekeeper *tk, struct timespec64 wtm)
 
 static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta)
 {
-       /* Update both bases so mono and raw stay coupled. */
-       tk->tkr_mono.base += delta;
-       tk->tkr_raw.base += delta;
-
-       /* Accumulate time spent in suspend */
-       tk->time_suspended += delta;
+       tk->offs_boot = ktime_add(tk->offs_boot, delta);
 }
 
 /*
@@ -473,6 +468,36 @@ u64 ktime_get_raw_fast_ns(void)
 }
 EXPORT_SYMBOL_GPL(ktime_get_raw_fast_ns);
 
+/**
+ * ktime_get_boot_fast_ns - NMI safe and fast access to boot clock.
+ *
+ * To keep it NMI safe since we're accessing from tracing, we're not using a
+ * separate timekeeper with updates to monotonic clock and boot offset
+ * protected with seqlocks. This has the following minor side effects:
+ *
+ * (1) Its possible that a timestamp be taken after the boot offset is updated
+ * but before the timekeeper is updated. If this happens, the new boot offset
+ * is added to the old timekeeping making the clock appear to update slightly
+ * earlier:
+ *    CPU 0                                        CPU 1
+ *    timekeeping_inject_sleeptime64()
+ *    __timekeeping_inject_sleeptime(tk, delta);
+ *                                                 timestamp();
+ *    timekeeping_update(tk, TK_CLEAR_NTP...);
+ *
+ * (2) On 32-bit systems, the 64-bit boot offset (tk->offs_boot) may be
+ * partially updated.  Since the tk->offs_boot update is a rare event, this
+ * should be a rare occurrence which postprocessing should be able to handle.
+ */
+u64 notrace ktime_get_boot_fast_ns(void)
+{
+       struct timekeeper *tk = &tk_core.timekeeper;
+
+       return (ktime_get_mono_fast_ns() + ktime_to_ns(tk->offs_boot));
+}
+EXPORT_SYMBOL_GPL(ktime_get_boot_fast_ns);
+
+
 /*
  * See comment for __ktime_get_fast_ns() vs. timestamp ordering
  */
@@ -764,6 +789,7 @@ EXPORT_SYMBOL_GPL(ktime_get_resolution_ns);
 
 static ktime_t *offsets[TK_OFFS_MAX] = {
        [TK_OFFS_REAL]  = &tk_core.timekeeper.offs_real,
+       [TK_OFFS_BOOT]  = &tk_core.timekeeper.offs_boot,
        [TK_OFFS_TAI]   = &tk_core.timekeeper.offs_tai,
 };
 
@@ -860,39 +886,6 @@ void ktime_get_ts64(struct timespec64 *ts)
 }
 EXPORT_SYMBOL_GPL(ktime_get_ts64);
 
-/**
- * ktime_get_active_ts64 - Get the active non-suspended monotonic clock
- * @ts:                pointer to timespec variable
- *
- * The function calculates the monotonic clock from the realtime clock and
- * the wall_to_monotonic offset, subtracts the accumulated suspend time and
- * stores the result in normalized timespec64 format in the variable
- * pointed to by @ts.
- */
-void ktime_get_active_ts64(struct timespec64 *ts)
-{
-       struct timekeeper *tk = &tk_core.timekeeper;
-       struct timespec64 tomono, tsusp;
-       u64 nsec, nssusp;
-       unsigned int seq;
-
-       WARN_ON(timekeeping_suspended);
-
-       do {
-               seq = read_seqcount_begin(&tk_core.seq);
-               ts->tv_sec = tk->xtime_sec;
-               nsec = timekeeping_get_ns(&tk->tkr_mono);
-               tomono = tk->wall_to_monotonic;
-               nssusp = tk->time_suspended;
-       } while (read_seqcount_retry(&tk_core.seq, seq));
-
-       ts->tv_sec += tomono.tv_sec;
-       ts->tv_nsec = 0;
-       timespec64_add_ns(ts, nsec + tomono.tv_nsec);
-       tsusp = ns_to_timespec64(nssusp);
-       *ts = timespec64_sub(*ts, tsusp);
-}
-
 /**
  * ktime_get_seconds - Get the seconds portion of CLOCK_MONOTONIC
  *
@@ -1593,6 +1586,7 @@ static void __timekeeping_inject_sleeptime(struct timekeeper *tk,
                return;
        }
        tk_xtime_add(tk, delta);
+       tk_set_wall_to_mono(tk, timespec64_sub(tk->wall_to_monotonic, *delta));
        tk_update_sleep_time(tk, timespec64_to_ktime(*delta));
        tk_debug_account_sleep_time(delta);
 }
@@ -2125,7 +2119,7 @@ void update_wall_time(void)
 void getboottime64(struct timespec64 *ts)
 {
        struct timekeeper *tk = &tk_core.timekeeper;
-       ktime_t t = ktime_sub(tk->offs_real, tk->time_suspended);
+       ktime_t t = ktime_sub(tk->offs_real, tk->offs_boot);
 
        *ts = ktime_to_timespec64(t);
 }
@@ -2139,13 +2133,6 @@ unsigned long get_seconds(void)
 }
 EXPORT_SYMBOL(get_seconds);
 
-struct timespec __current_kernel_time(void)
-{
-       struct timekeeper *tk = &tk_core.timekeeper;
-
-       return timespec64_to_timespec(tk_xtime(tk));
-}
-
 struct timespec64 current_kernel_time64(void)
 {
        struct timekeeper *tk = &tk_core.timekeeper;
@@ -2195,6 +2182,7 @@ void do_timer(unsigned long ticks)
  * ktime_get_update_offsets_now - hrtimer helper
  * @cwsseq:    pointer to check and store the clock was set sequence number
  * @offs_real: pointer to storage for monotonic -> realtime offset
+ * @offs_boot: pointer to storage for monotonic -> boottime offset
  * @offs_tai:  pointer to storage for monotonic -> clock tai offset
  *
  * Returns current monotonic time and updates the offsets if the
@@ -2204,7 +2192,7 @@ void do_timer(unsigned long ticks)
  * Called from hrtimer_interrupt() or retrigger_next_event()
  */
 ktime_t ktime_get_update_offsets_now(unsigned int *cwsseq, ktime_t *offs_real,
-                                    ktime_t *offs_tai)
+                                    ktime_t *offs_boot, ktime_t *offs_tai)
 {
        struct timekeeper *tk = &tk_core.timekeeper;
        unsigned int seq;
@@ -2221,6 +2209,7 @@ ktime_t ktime_get_update_offsets_now(unsigned int *cwsseq, ktime_t *offs_real,
                if (*cwsseq != tk->clock_was_set_seq) {
                        *cwsseq = tk->clock_was_set_seq;
                        *offs_real = tk->offs_real;
+                       *offs_boot = tk->offs_boot;
                        *offs_tai = tk->offs_tai;
                }
 
index 79b67f5e0343caf35cf15c3cd99b905d29a31a10..7a9b4eb7a1d5bde85e7a7b1c7747602cdd605975 100644 (file)
@@ -6,6 +6,7 @@
  */
 extern ktime_t ktime_get_update_offsets_now(unsigned int *cwsseq,
                                            ktime_t *offs_real,
+                                           ktime_t *offs_boot,
                                            ktime_t *offs_tai);
 
 extern int timekeeping_valid_for_hres(void);
index d88e96d4e12c013d46d5aaae20d99759895fe187..ce2cbbff27e45cb7a3cbd52470faa2bb78c18ed9 100644 (file)
@@ -20,6 +20,7 @@
 #include "trace.h"
 
 u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
+u64 bpf_get_stack(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
 
 /**
  * trace_call_bpf - invoke BPF program
@@ -474,8 +475,6 @@ BPF_CALL_2(bpf_current_task_under_cgroup, struct bpf_map *, map, u32, idx)
        struct bpf_array *array = container_of(map, struct bpf_array, map);
        struct cgroup *cgrp;
 
-       if (unlikely(in_interrupt()))
-               return -EINVAL;
        if (unlikely(idx >= array->map.max_entries))
                return -E2BIG;
 
@@ -577,6 +576,8 @@ kprobe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
                return &bpf_perf_event_output_proto;
        case BPF_FUNC_get_stackid:
                return &bpf_get_stackid_proto;
+       case BPF_FUNC_get_stack:
+               return &bpf_get_stack_proto;
        case BPF_FUNC_perf_event_read_value:
                return &bpf_perf_event_read_value_proto;
 #ifdef CONFIG_BPF_KPROBE_OVERRIDE
@@ -664,6 +665,25 @@ static const struct bpf_func_proto bpf_get_stackid_proto_tp = {
        .arg3_type      = ARG_ANYTHING,
 };
 
+BPF_CALL_4(bpf_get_stack_tp, void *, tp_buff, void *, buf, u32, size,
+          u64, flags)
+{
+       struct pt_regs *regs = *(struct pt_regs **)tp_buff;
+
+       return bpf_get_stack((unsigned long) regs, (unsigned long) buf,
+                            (unsigned long) size, flags, 0);
+}
+
+static const struct bpf_func_proto bpf_get_stack_proto_tp = {
+       .func           = bpf_get_stack_tp,
+       .gpl_only       = true,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_CTX,
+       .arg2_type      = ARG_PTR_TO_UNINIT_MEM,
+       .arg3_type      = ARG_CONST_SIZE_OR_ZERO,
+       .arg4_type      = ARG_ANYTHING,
+};
+
 static const struct bpf_func_proto *
 tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
@@ -672,6 +692,8 @@ tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
                return &bpf_perf_event_output_proto_tp;
        case BPF_FUNC_get_stackid:
                return &bpf_get_stackid_proto_tp;
+       case BPF_FUNC_get_stack:
+               return &bpf_get_stack_proto_tp;
        default:
                return tracing_func_proto(func_id, prog);
        }
@@ -734,6 +756,8 @@ pe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
                return &bpf_perf_event_output_proto_tp;
        case BPF_FUNC_get_stackid:
                return &bpf_get_stackid_proto_tp;
+       case BPF_FUNC_get_stack:
+               return &bpf_get_stack_proto_tp;
        case BPF_FUNC_perf_prog_read_value:
                return &bpf_perf_prog_read_value_proto;
        default:
@@ -744,7 +768,7 @@ pe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 /*
  * bpf_raw_tp_regs are separate from bpf_pt_regs used from skb/xdp
  * to avoid potential recursive reuse issue when/if tracepoints are added
- * inside bpf_*_event_output and/or bpf_get_stack_id
+ * inside bpf_*_event_output, bpf_get_stackid and/or bpf_get_stack
  */
 static DEFINE_PER_CPU(struct pt_regs, bpf_raw_tp_regs);
 BPF_CALL_5(bpf_perf_event_output_raw_tp, struct bpf_raw_tracepoint_args *, args,
@@ -787,6 +811,26 @@ static const struct bpf_func_proto bpf_get_stackid_proto_raw_tp = {
        .arg3_type      = ARG_ANYTHING,
 };
 
+BPF_CALL_4(bpf_get_stack_raw_tp, struct bpf_raw_tracepoint_args *, args,
+          void *, buf, u32, size, u64, flags)
+{
+       struct pt_regs *regs = this_cpu_ptr(&bpf_raw_tp_regs);
+
+       perf_fetch_caller_regs(regs);
+       return bpf_get_stack((unsigned long) regs, (unsigned long) buf,
+                            (unsigned long) size, flags, 0);
+}
+
+static const struct bpf_func_proto bpf_get_stack_proto_raw_tp = {
+       .func           = bpf_get_stack_raw_tp,
+       .gpl_only       = true,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_CTX,
+       .arg2_type      = ARG_PTR_TO_MEM,
+       .arg3_type      = ARG_CONST_SIZE_OR_ZERO,
+       .arg4_type      = ARG_ANYTHING,
+};
+
 static const struct bpf_func_proto *
 raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
@@ -795,6 +839,8 @@ raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
                return &bpf_perf_event_output_proto_raw_tp;
        case BPF_FUNC_get_stackid:
                return &bpf_get_stackid_proto_raw_tp;
+       case BPF_FUNC_get_stack:
+               return &bpf_get_stack_proto_raw_tp;
        default:
                return tracing_func_proto(func_id, prog);
        }
@@ -977,6 +1023,7 @@ int perf_event_query_prog_array(struct perf_event *event, void __user *info)
 {
        struct perf_event_query_bpf __user *uquery = info;
        struct perf_event_query_bpf query = {};
+       u32 *ids, prog_cnt, ids_len;
        int ret;
 
        if (!capable(CAP_SYS_ADMIN))
@@ -985,16 +1032,32 @@ int perf_event_query_prog_array(struct perf_event *event, void __user *info)
                return -EINVAL;
        if (copy_from_user(&query, uquery, sizeof(query)))
                return -EFAULT;
-       if (query.ids_len > BPF_TRACE_MAX_PROGS)
+
+       ids_len = query.ids_len;
+       if (ids_len > BPF_TRACE_MAX_PROGS)
                return -E2BIG;
+       ids = kcalloc(ids_len, sizeof(u32), GFP_USER | __GFP_NOWARN);
+       if (!ids)
+               return -ENOMEM;
+       /*
+        * The above kcalloc returns ZERO_SIZE_PTR when ids_len = 0, which
+        * is required when user only wants to check for uquery->prog_cnt.
+        * There is no need to check for it since the case is handled
+        * gracefully in bpf_prog_array_copy_info.
+        */
 
        mutex_lock(&bpf_event_mutex);
        ret = bpf_prog_array_copy_info(event->tp_event->prog_array,
-                                      uquery->ids,
-                                      query.ids_len,
-                                      &uquery->prog_cnt);
+                                      ids,
+                                      ids_len,
+                                      &prog_cnt);
        mutex_unlock(&bpf_event_mutex);
 
+       if (copy_to_user(&uquery->prog_cnt, &prog_cnt, sizeof(prog_cnt)) ||
+           copy_to_user(uquery->ids, ids, ids_len * sizeof(u32)))
+               ret = -EFAULT;
+
+       kfree(ids);
        return ret;
 }
 
index 16bbf062018fa79af48db2339db8ad1c094b3e85..8d83bcf9ef69fa894706b2005e5d5cf9fba97003 100644 (file)
@@ -5514,10 +5514,10 @@ static __init int ftrace_init_dyn_tracefs(struct dentry *d_tracer)
        ftrace_create_filter_files(&global_ops, d_tracer);
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
-       trace_create_file("set_graph_function", 0444, d_tracer,
+       trace_create_file("set_graph_function", 0644, d_tracer,
                                    NULL,
                                    &ftrace_graph_fops);
-       trace_create_file("set_graph_notrace", 0444, d_tracer,
+       trace_create_file("set_graph_notrace", 0644, d_tracer,
                                    NULL,
                                    &ftrace_graph_notrace_fops);
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
index dfbcf9ee1447645594b06736e2ca9616e6cd93eb..414d7210b2eca7ccb78ac1748a31aefb99414007 100644 (file)
@@ -1165,7 +1165,7 @@ static struct {
        { trace_clock,                  "perf",         1 },
        { ktime_get_mono_fast_ns,       "mono",         1 },
        { ktime_get_raw_fast_ns,        "mono_raw",     1 },
-       { ktime_get_mono_fast_ns,       "boot",         1 },
+       { ktime_get_boot_fast_ns,       "boot",         1 },
        ARCH_TRACE_CLOCKS
 };
 
index e954ae3d82c0f78e2ac5646e84563c6237e23bce..e3a658bac10feef4e42740f9c5b556687162bd32 100644 (file)
@@ -356,7 +356,7 @@ FTRACE_ENTRY(hwlat, hwlat_entry,
                __field(        unsigned int,           seqnum          )
        ),
 
-       F_printk("cnt:%u\tts:%010llu.%010lu\tinner:%llu\touter:%llunmi-ts:%llu\tnmi-count:%u\n",
+       F_printk("cnt:%u\tts:%010llu.%010lu\tinner:%llu\touter:%llu\tnmi-ts:%llu\tnmi-count:%u\n",
                 __entry->seqnum,
                 __entry->tv_sec,
                 __entry->tv_nsec,
index 9b4716bb8bb02fb93d9fd449e0969eaeb8c2fca8..7d306b74230fac88385e9e64648bcdb4a340faf5 100644 (file)
@@ -762,6 +762,9 @@ static int regex_match_full(char *str, struct regex *r, int len)
 
 static int regex_match_front(char *str, struct regex *r, int len)
 {
+       if (len < r->len)
+               return 0;
+
        if (strncmp(str, r->pattern, r->len) == 0)
                return 1;
        return 0;
@@ -1499,14 +1502,14 @@ static int process_preds(struct trace_event_call *call,
                return ret;
        }
 
-       if (!nr_preds) {
-               prog = NULL;
-       } else {
-               prog = predicate_parse(filter_string, nr_parens, nr_preds,
+       if (!nr_preds)
+               return -EINVAL;
+
+       prog = predicate_parse(filter_string, nr_parens, nr_preds,
                               parse_pred, call, pe);
-               if (IS_ERR(prog))
-                       return PTR_ERR(prog);
-       }
+       if (IS_ERR(prog))
+               return PTR_ERR(prog);
+
        rcu_assign_pointer(filter->prog, prog);
        return 0;
 }
index 0d7b3ffbecc2a03a7604509e2f19cdd3df5e4973..b9061ed59bbd9faabc7da7a65ae959a89e4b689f 100644 (file)
@@ -2466,6 +2466,7 @@ parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file,
                else if (strcmp(modifier, "usecs") == 0)
                        *flags |= HIST_FIELD_FL_TIMESTAMP_USECS;
                else {
+                       hist_err("Invalid field modifier: ", modifier);
                        field = ERR_PTR(-EINVAL);
                        goto out;
                }
@@ -2481,6 +2482,7 @@ parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file,
        else {
                field = trace_find_event_field(file->event_call, field_name);
                if (!field || !field->size) {
+                       hist_err("Couldn't find field: ", field_name);
                        field = ERR_PTR(-EINVAL);
                        goto out;
                }
@@ -4913,6 +4915,16 @@ static void hist_field_print(struct seq_file *m, struct hist_field *hist_field)
                seq_printf(m, "%s", field_name);
        } else if (hist_field->flags & HIST_FIELD_FL_TIMESTAMP)
                seq_puts(m, "common_timestamp");
+
+       if (hist_field->flags) {
+               if (!(hist_field->flags & HIST_FIELD_FL_VAR_REF) &&
+                   !(hist_field->flags & HIST_FIELD_FL_EXPR)) {
+                       const char *flags = get_hist_field_flags(hist_field);
+
+                       if (flags)
+                               seq_printf(m, ".%s", flags);
+               }
+       }
 }
 
 static int event_hist_trigger_print(struct seq_file *m,
index 1cd3fb4d70f8d92d5c360038a823a1bac5e1a504..02aed76e0978801428ff33f1b8a421a8deb34f14 100644 (file)
@@ -512,8 +512,6 @@ static int __register_trace_kprobe(struct trace_kprobe *tk)
        if (ret == 0)
                tk->tp.flags |= TP_FLAG_REGISTERED;
        else {
-               pr_warn("Could not insert probe at %s+%lu: %d\n",
-                       trace_kprobe_symbol(tk), trace_kprobe_offset(tk), ret);
                if (ret == -ENOENT && trace_kprobe_is_on_module(tk)) {
                        pr_warn("This probe might be able to register after target module is loaded. Continue.\n");
                        ret = 0;
index 3c7bfc4bf5e9981b687ca8dd4ed1cf890b38ee12..4237eba4ef2020c6debb9df2851702b859ae515f 100644 (file)
@@ -472,7 +472,7 @@ static __init int stack_trace_init(void)
                        NULL, &stack_trace_fops);
 
 #ifdef CONFIG_DYNAMIC_FTRACE
-       trace_create_file("stack_trace_filter", 0444, d_tracer,
+       trace_create_file("stack_trace_filter", 0644, d_tracer,
                          &trace_ops, &stack_trace_filter_fops);
 #endif
 
index 34fd0e0ec51d2a49c27d082f363be11276d7d753..ac892878dbe60d8f4ac527be8fc1f8d39a2958b9 100644 (file)
@@ -55,6 +55,7 @@ struct trace_uprobe {
        struct list_head                list;
        struct trace_uprobe_filter      filter;
        struct uprobe_consumer          consumer;
+       struct path                     path;
        struct inode                    *inode;
        char                            *filename;
        unsigned long                   offset;
@@ -289,7 +290,7 @@ static void free_trace_uprobe(struct trace_uprobe *tu)
        for (i = 0; i < tu->tp.nr_args; i++)
                traceprobe_free_probe_arg(&tu->tp.args[i]);
 
-       iput(tu->inode);
+       path_put(&tu->path);
        kfree(tu->tp.call.class->system);
        kfree(tu->tp.call.name);
        kfree(tu->filename);
@@ -363,7 +364,6 @@ static int register_trace_uprobe(struct trace_uprobe *tu)
 static int create_trace_uprobe(int argc, char **argv)
 {
        struct trace_uprobe *tu;
-       struct inode *inode;
        char *arg, *event, *group, *filename;
        char buf[MAX_EVENT_NAME_LEN];
        struct path path;
@@ -371,7 +371,6 @@ static int create_trace_uprobe(int argc, char **argv)
        bool is_delete, is_return;
        int i, ret;
 
-       inode = NULL;
        ret = 0;
        is_delete = false;
        is_return = false;
@@ -437,21 +436,16 @@ static int create_trace_uprobe(int argc, char **argv)
        }
        /* Find the last occurrence, in case the path contains ':' too. */
        arg = strrchr(argv[1], ':');
-       if (!arg) {
-               ret = -EINVAL;
-               goto fail_address_parse;
-       }
+       if (!arg)
+               return -EINVAL;
 
        *arg++ = '\0';
        filename = argv[1];
        ret = kern_path(filename, LOOKUP_FOLLOW, &path);
        if (ret)
-               goto fail_address_parse;
-
-       inode = igrab(d_real_inode(path.dentry));
-       path_put(&path);
+               return ret;
 
-       if (!inode || !S_ISREG(inode->i_mode)) {
+       if (!d_is_reg(path.dentry)) {
                ret = -EINVAL;
                goto fail_address_parse;
        }
@@ -490,7 +484,7 @@ static int create_trace_uprobe(int argc, char **argv)
                goto fail_address_parse;
        }
        tu->offset = offset;
-       tu->inode = inode;
+       tu->path = path;
        tu->filename = kstrdup(filename, GFP_KERNEL);
 
        if (!tu->filename) {
@@ -558,7 +552,7 @@ static int create_trace_uprobe(int argc, char **argv)
        return ret;
 
 fail_address_parse:
-       iput(inode);
+       path_put(&path);
 
        pr_info("Failed to parse address or file.\n");
 
@@ -922,6 +916,7 @@ probe_event_enable(struct trace_uprobe *tu, struct trace_event_file *file,
                goto err_flags;
 
        tu->consumer.filter = filter;
+       tu->inode = d_real_inode(tu->path.dentry);
        ret = uprobe_register(tu->inode, tu->offset, &tu->consumer);
        if (ret)
                goto err_buffer;
@@ -967,6 +962,7 @@ probe_event_disable(struct trace_uprobe *tu, struct trace_event_file *file)
        WARN_ON(!uprobe_filter_is_empty(&tu->filter));
 
        uprobe_unregister(tu->inode, tu->offset, &tu->consumer);
+       tu->inode = NULL;
        tu->tp.flags &= file ? ~TP_FLAG_TRACE : ~TP_FLAG_PROFILE;
 
        uprobe_buffer_disable();
@@ -1337,7 +1333,6 @@ struct trace_event_call *
 create_local_trace_uprobe(char *name, unsigned long offs, bool is_return)
 {
        struct trace_uprobe *tu;
-       struct inode *inode;
        struct path path;
        int ret;
 
@@ -1345,11 +1340,8 @@ create_local_trace_uprobe(char *name, unsigned long offs, bool is_return)
        if (ret)
                return ERR_PTR(ret);
 
-       inode = igrab(d_inode(path.dentry));
-       path_put(&path);
-
-       if (!inode || !S_ISREG(inode->i_mode)) {
-               iput(inode);
+       if (!d_is_reg(path.dentry)) {
+               path_put(&path);
                return ERR_PTR(-EINVAL);
        }
 
@@ -1364,11 +1356,12 @@ create_local_trace_uprobe(char *name, unsigned long offs, bool is_return)
        if (IS_ERR(tu)) {
                pr_info("Failed to allocate trace_uprobe.(%d)\n",
                        (int)PTR_ERR(tu));
+               path_put(&path);
                return ERR_CAST(tu);
        }
 
        tu->offset = offs;
-       tu->inode = inode;
+       tu->path = path;
        tu->filename = kstrdup(name, GFP_KERNEL);
        init_trace_event_call(tu, &tu->tp.call);
 
index 671b134573876d7ab5f8c3b2f1b8722b909c1201..1e37da2e0c25d13ec7e6bd79be165a38f58b4fb3 100644 (file)
@@ -207,7 +207,7 @@ static int tracepoint_add_func(struct tracepoint *tp,
                        lockdep_is_held(&tracepoints_mutex));
        old = func_add(&tp_funcs, func, prio);
        if (IS_ERR(old)) {
-               WARN_ON_ONCE(1);
+               WARN_ON_ONCE(PTR_ERR(old) != -ENOMEM);
                return PTR_ERR(old);
        }
 
@@ -239,7 +239,7 @@ static int tracepoint_remove_func(struct tracepoint *tp,
                        lockdep_is_held(&tracepoints_mutex));
        old = func_remove(&tp_funcs, func);
        if (IS_ERR(old)) {
-               WARN_ON_ONCE(1);
+               WARN_ON_ONCE(PTR_ERR(old) != -ENOMEM);
                return PTR_ERR(old);
        }
 
index c0bba30fef0ac1f8bda32af158b5596fcc14ce09..bbfb229aa0675ed73d438ce2700e674891758310 100644 (file)
@@ -84,7 +84,8 @@ void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
                __free_pages(page, page_order);
                page = NULL;
 
-               if (dev->coherent_dma_mask < DMA_BIT_MASK(32) &&
+               if (IS_ENABLED(CONFIG_ZONE_DMA) &&
+                   dev->coherent_dma_mask < DMA_BIT_MASK(32) &&
                    !(gfp & GFP_DMA)) {
                        gfp = (gfp & ~GFP_DMA32) | GFP_DMA;
                        goto again;
index df782418b333eedea39cdb793c38ad623f96a707..81f9e33aa7e72aa7f3a524ff061aa3c9d48fe1a9 100644 (file)
@@ -111,27 +111,22 @@ EXPORT_SYMBOL(errseq_set);
  * errseq_sample() - Grab current errseq_t value.
  * @eseq: Pointer to errseq_t to be sampled.
  *
- * This function allows callers to sample an errseq_t value, marking it as
- * "seen" if required.
+ * This function allows callers to initialise their errseq_t variable.
+ * If the error has been "seen", new callers will not see an old error.
+ * If there is an unseen error in @eseq, the caller of this function will
+ * see it the next time it checks for an error.
  *
+ * Context: Any context.
  * Return: The current errseq value.
  */
 errseq_t errseq_sample(errseq_t *eseq)
 {
        errseq_t old = READ_ONCE(*eseq);
-       errseq_t new = old;
 
-       /*
-        * For the common case of no errors ever having been set, we can skip
-        * marking the SEEN bit. Once an error has been set, the value will
-        * never go back to zero.
-        */
-       if (old != 0) {
-               new |= ERRSEQ_SEEN;
-               if (old != new)
-                       cmpxchg(eseq, old, new);
-       }
-       return new;
+       /* If nobody has seen this error yet, then we can be the first. */
+       if (!(old & ERRSEQ_SEEN))
+               old = 0;
+       return old;
 }
 EXPORT_SYMBOL(errseq_sample);
 
index 5985a25e6cbcff7392c50a27a4beea379ab5d62b..5367ffa5c18f9cf98a811dc0e1a2fa15433fa401 100644 (file)
@@ -132,7 +132,12 @@ static int __init find_bit_test(void)
        test_find_next_bit(bitmap, BITMAP_LEN);
        test_find_next_zero_bit(bitmap, BITMAP_LEN);
        test_find_last_bit(bitmap, BITMAP_LEN);
-       test_find_first_bit(bitmap, BITMAP_LEN);
+
+       /*
+        * test_find_first_bit() may take some time, so
+        * traverse only part of bitmap to avoid soft lockup.
+        */
+       test_find_first_bit(bitmap, BITMAP_LEN / 10);
        test_find_next_and_bit(bitmap, bitmap2, BITMAP_LEN);
 
        pr_err("\nStart testing find_bit() with sparse bitmap\n");
index e1d1f290bf354540c9148863c931c3f1d38278cf..18989b5b3b56b8b0b59836492606ec99fd3debbe 100644 (file)
@@ -233,13 +233,12 @@ static int kobject_add_internal(struct kobject *kobj)
 
                /* be noisy on error issues */
                if (error == -EEXIST)
-                       WARN(1,
-                            "%s failed for %s with -EEXIST, don't try to register things with the same name in the same directory.\n",
-                            __func__, kobject_name(kobj));
+                       pr_err("%s failed for %s with -EEXIST, don't try to register things with the same name in the same directory.\n",
+                              __func__, kobject_name(kobj));
                else
-                       WARN(1, "%s failed for %s (error: %d parent: %s)\n",
-                            __func__, kobject_name(kobj), error,
-                            parent ? kobject_name(parent) : "'none'");
+                       pr_err("%s failed for %s (error: %d parent: %s)\n",
+                              __func__, kobject_name(kobj), error,
+                              parent ? kobject_name(parent) : "'none'");
        } else
                kobj->state_in_sysfs = 1;
 
index 15ea216a67ce63128809320bb640057a41de76c6..63d0816ab23b09a2cfac1576a0f74957a5158d81 100644 (file)
@@ -22,6 +22,7 @@
 #include <linux/socket.h>
 #include <linux/skbuff.h>
 #include <linux/netlink.h>
+#include <linux/uidgid.h>
 #include <linux/uuid.h>
 #include <linux/ctype.h>
 #include <net/sock.h>
@@ -231,30 +232,6 @@ int kobject_synth_uevent(struct kobject *kobj, const char *buf, size_t count)
        return r;
 }
 
-#ifdef CONFIG_NET
-static int kobj_bcast_filter(struct sock *dsk, struct sk_buff *skb, void *data)
-{
-       struct kobject *kobj = data, *ksobj;
-       const struct kobj_ns_type_operations *ops;
-
-       ops = kobj_ns_ops(kobj);
-       if (!ops && kobj->kset) {
-               ksobj = &kobj->kset->kobj;
-               if (ksobj->parent != NULL)
-                       ops = kobj_ns_ops(ksobj->parent);
-       }
-
-       if (ops && ops->netlink_ns && kobj->ktype->namespace) {
-               const void *sock_ns, *ns;
-               ns = kobj->ktype->namespace(kobj);
-               sock_ns = ops->netlink_ns(dsk);
-               return sock_ns != ns;
-       }
-
-       return 0;
-}
-#endif
-
 #ifdef CONFIG_UEVENT_HELPER
 static int kobj_usermode_filter(struct kobject *kobj)
 {
@@ -296,15 +273,44 @@ static void cleanup_uevent_env(struct subprocess_info *info)
 }
 #endif
 
-static int kobject_uevent_net_broadcast(struct kobject *kobj,
-                                       struct kobj_uevent_env *env,
+#ifdef CONFIG_NET
+static struct sk_buff *alloc_uevent_skb(struct kobj_uevent_env *env,
                                        const char *action_string,
                                        const char *devpath)
 {
-       int retval = 0;
-#if defined(CONFIG_NET)
+       struct netlink_skb_parms *parms;
+       struct sk_buff *skb = NULL;
+       char *scratch;
+       size_t len;
+
+       /* allocate message with maximum possible size */
+       len = strlen(action_string) + strlen(devpath) + 2;
+       skb = alloc_skb(len + env->buflen, GFP_KERNEL);
+       if (!skb)
+               return NULL;
+
+       /* add header */
+       scratch = skb_put(skb, len);
+       sprintf(scratch, "%s@%s", action_string, devpath);
+
+       skb_put_data(skb, env->buf, env->buflen);
+
+       parms = &NETLINK_CB(skb);
+       parms->creds.uid = GLOBAL_ROOT_UID;
+       parms->creds.gid = GLOBAL_ROOT_GID;
+       parms->dst_group = 1;
+       parms->portid = 0;
+
+       return skb;
+}
+
+static int uevent_net_broadcast_untagged(struct kobj_uevent_env *env,
+                                        const char *action_string,
+                                        const char *devpath)
+{
        struct sk_buff *skb = NULL;
        struct uevent_sock *ue_sk;
+       int retval = 0;
 
        /* send netlink message */
        list_for_each_entry(ue_sk, &uevent_sock_list, list) {
@@ -314,37 +320,99 @@ static int kobject_uevent_net_broadcast(struct kobject *kobj,
                        continue;
 
                if (!skb) {
-                       /* allocate message with the maximum possible size */
-                       size_t len = strlen(action_string) + strlen(devpath) + 2;
-                       char *scratch;
-
                        retval = -ENOMEM;
-                       skb = alloc_skb(len + env->buflen, GFP_KERNEL);
+                       skb = alloc_uevent_skb(env, action_string, devpath);
                        if (!skb)
                                continue;
-
-                       /* add header */
-                       scratch = skb_put(skb, len);
-                       sprintf(scratch, "%s@%s", action_string, devpath);
-
-                       skb_put_data(skb, env->buf, env->buflen);
-
-                       NETLINK_CB(skb).dst_group = 1;
                }
 
-               retval = netlink_broadcast_filtered(uevent_sock, skb_get(skb),
-                                                   0, 1, GFP_KERNEL,
-                                                   kobj_bcast_filter,
-                                                   kobj);
+               retval = netlink_broadcast(uevent_sock, skb_get(skb), 0, 1,
+                                          GFP_KERNEL);
                /* ENOBUFS should be handled in userspace */
                if (retval == -ENOBUFS || retval == -ESRCH)
                        retval = 0;
        }
        consume_skb(skb);
-#endif
+
        return retval;
 }
 
+static int uevent_net_broadcast_tagged(struct sock *usk,
+                                      struct kobj_uevent_env *env,
+                                      const char *action_string,
+                                      const char *devpath)
+{
+       struct user_namespace *owning_user_ns = sock_net(usk)->user_ns;
+       struct sk_buff *skb = NULL;
+       int ret = 0;
+
+       skb = alloc_uevent_skb(env, action_string, devpath);
+       if (!skb)
+               return -ENOMEM;
+
+       /* fix credentials */
+       if (owning_user_ns != &init_user_ns) {
+               struct netlink_skb_parms *parms = &NETLINK_CB(skb);
+               kuid_t root_uid;
+               kgid_t root_gid;
+
+               /* fix uid */
+               root_uid = make_kuid(owning_user_ns, 0);
+               if (uid_valid(root_uid))
+                       parms->creds.uid = root_uid;
+
+               /* fix gid */
+               root_gid = make_kgid(owning_user_ns, 0);
+               if (gid_valid(root_gid))
+                       parms->creds.gid = root_gid;
+       }
+
+       ret = netlink_broadcast(usk, skb, 0, 1, GFP_KERNEL);
+       /* ENOBUFS should be handled in userspace */
+       if (ret == -ENOBUFS || ret == -ESRCH)
+               ret = 0;
+
+       return ret;
+}
+#endif
+
+static int kobject_uevent_net_broadcast(struct kobject *kobj,
+                                       struct kobj_uevent_env *env,
+                                       const char *action_string,
+                                       const char *devpath)
+{
+       int ret = 0;
+
+#ifdef CONFIG_NET
+       const struct kobj_ns_type_operations *ops;
+       const struct net *net = NULL;
+
+       ops = kobj_ns_ops(kobj);
+       if (!ops && kobj->kset) {
+               struct kobject *ksobj = &kobj->kset->kobj;
+               if (ksobj->parent != NULL)
+                       ops = kobj_ns_ops(ksobj->parent);
+       }
+
+       /* kobjects currently only carry network namespace tags and they
+        * are the only tag relevant here since we want to decide which
+        * network namespaces to broadcast the uevent into.
+        */
+       if (ops && ops->netlink_ns && kobj->ktype->namespace)
+               if (ops->type == KOBJ_NS_TYPE_NET)
+                       net = kobj->ktype->namespace(kobj);
+
+       if (!net)
+               ret = uevent_net_broadcast_untagged(env, action_string,
+                                                   devpath);
+       else
+               ret = uevent_net_broadcast_tagged(net->uevent_sock->sk, env,
+                                                 action_string, devpath);
+#endif
+
+       return ret;
+}
+
 static void zap_modalias_env(struct kobj_uevent_env *env)
 {
        static const char modalias_prefix[] = "MODALIAS=";
@@ -703,9 +771,13 @@ static int uevent_net_init(struct net *net)
 
        net->uevent_sock = ue_sk;
 
-       mutex_lock(&uevent_sock_mutex);
-       list_add_tail(&ue_sk->list, &uevent_sock_list);
-       mutex_unlock(&uevent_sock_mutex);
+       /* Restrict uevents to initial user namespace. */
+       if (sock_net(ue_sk->sk)->user_ns == &init_user_ns) {
+               mutex_lock(&uevent_sock_mutex);
+               list_add_tail(&ue_sk->list, &uevent_sock_list);
+               mutex_unlock(&uevent_sock_mutex);
+       }
+
        return 0;
 }
 
@@ -713,9 +785,11 @@ static void uevent_net_exit(struct net *net)
 {
        struct uevent_sock *ue_sk = net->uevent_sock;
 
-       mutex_lock(&uevent_sock_mutex);
-       list_del(&ue_sk->list);
-       mutex_unlock(&uevent_sock_mutex);
+       if (sock_net(ue_sk->sk)->user_ns == &init_user_ns) {
+               mutex_lock(&uevent_sock_mutex);
+               list_del(&ue_sk->list);
+               mutex_unlock(&uevent_sock_mutex);
+       }
 
        netlink_kernel_release(ue_sk->sk);
        kfree(ue_sk);
index da9e10c827df554040b8c5eac71badbd2b588a95..43e0cbedc3a0ce1adf0d7807b623c9f8b60f3e53 100644 (file)
@@ -1612,11 +1612,9 @@ static void set_iter_tags(struct radix_tree_iter *iter,
 static void __rcu **skip_siblings(struct radix_tree_node **nodep,
                        void __rcu **slot, struct radix_tree_iter *iter)
 {
-       void *sib = node_to_entry(slot - 1);
-
        while (iter->index < iter->next_index) {
                *nodep = rcu_dereference_raw(*slot);
-               if (*nodep && *nodep != sib)
+               if (*nodep && !is_sibling_entry(iter->node, *nodep))
                        return slot;
                slot++;
                iter->index = __radix_tree_iter_add(iter, 1);
@@ -1631,7 +1629,7 @@ void __rcu **__radix_tree_next_slot(void __rcu **slot,
                                struct radix_tree_iter *iter, unsigned flags)
 {
        unsigned tag = flags & RADIX_TREE_ITER_TAG_MASK;
-       struct radix_tree_node *node = rcu_dereference_raw(*slot);
+       struct radix_tree_node *node;
 
        slot = skip_siblings(&node, slot, iter);
 
index 2b2b79974b614a94e5325e8c2271804cb27069aa..9427b5766134cb139ef385b27f92f6027fecceca 100644 (file)
@@ -668,8 +668,9 @@ EXPORT_SYMBOL_GPL(rhashtable_insert_slow);
  * For a completely stable walk you should construct your own data
  * structure outside the hash table.
  *
- * This function may sleep so you must not call it from interrupt
- * context or with spin locks held.
+ * This function may be called from any process context, including
+ * non-preemptable context, but cannot be called from softirq or
+ * hardirq context.
  *
  * You must call rhashtable_walk_exit after this function returns.
  */
@@ -726,6 +727,7 @@ int rhashtable_walk_start_check(struct rhashtable_iter *iter)
        __acquires(RCU)
 {
        struct rhashtable *ht = iter->ht;
+       bool rhlist = ht->rhlist;
 
        rcu_read_lock();
 
@@ -734,11 +736,52 @@ int rhashtable_walk_start_check(struct rhashtable_iter *iter)
                list_del(&iter->walker.list);
        spin_unlock(&ht->lock);
 
-       if (!iter->walker.tbl && !iter->end_of_table) {
+       if (iter->end_of_table)
+               return 0;
+       if (!iter->walker.tbl) {
                iter->walker.tbl = rht_dereference_rcu(ht->tbl, ht);
+               iter->slot = 0;
+               iter->skip = 0;
                return -EAGAIN;
        }
 
+       if (iter->p && !rhlist) {
+               /*
+                * We need to validate that 'p' is still in the table, and
+                * if so, update 'skip'
+                */
+               struct rhash_head *p;
+               int skip = 0;
+               rht_for_each_rcu(p, iter->walker.tbl, iter->slot) {
+                       skip++;
+                       if (p == iter->p) {
+                               iter->skip = skip;
+                               goto found;
+                       }
+               }
+               iter->p = NULL;
+       } else if (iter->p && rhlist) {
+               /* Need to validate that 'list' is still in the table, and
+                * if so, update 'skip' and 'p'.
+                */
+               struct rhash_head *p;
+               struct rhlist_head *list;
+               int skip = 0;
+               rht_for_each_rcu(p, iter->walker.tbl, iter->slot) {
+                       for (list = container_of(p, struct rhlist_head, rhead);
+                            list;
+                            list = rcu_dereference(list->next)) {
+                               skip++;
+                               if (list == iter->list) {
+                                       iter->p = p;
+                                       skip = skip;
+                                       goto found;
+                               }
+                       }
+               }
+               iter->p = NULL;
+       }
+found:
        return 0;
 }
 EXPORT_SYMBOL_GPL(rhashtable_walk_start_check);
@@ -914,8 +957,6 @@ void rhashtable_walk_stop(struct rhashtable_iter *iter)
                iter->walker.tbl = NULL;
        spin_unlock(&ht->lock);
 
-       iter->p = NULL;
-
 out:
        rcu_read_unlock();
 }
index fece57566d45b24454b9cb766d4eecd652031ab2..cc640588f1457337217586c4ed5fd1ff072933c7 100644 (file)
@@ -714,7 +714,7 @@ swiotlb_alloc_buffer(struct device *dev, size_t size, dma_addr_t *dma_handle,
 
        phys_addr = swiotlb_tbl_map_single(dev,
                        __phys_to_dma(dev, io_tlb_start),
-                       0, size, DMA_FROM_DEVICE, 0);
+                       0, size, DMA_FROM_DEVICE, attrs);
        if (phys_addr == SWIOTLB_MAP_ERROR)
                goto out_warn;
 
@@ -737,7 +737,7 @@ swiotlb_alloc_buffer(struct device *dev, size_t size, dma_addr_t *dma_handle,
        swiotlb_tbl_unmap_single(dev, phys_addr, size, DMA_TO_DEVICE,
                        DMA_ATTR_SKIP_CPU_SYNC);
 out_warn:
-       if ((attrs & DMA_ATTR_NO_WARN) && printk_ratelimit()) {
+       if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit()) {
                dev_warn(dev,
                        "swiotlb: coherent allocation failed, size=%zu\n",
                        size);
index de16f7869fb19b72b6db134fbc13f8d3b4495f90..6cd7d0740005954ed37f33617298faee60816386 100644 (file)
@@ -331,23 +331,32 @@ static void noinline __init test_mem_optimisations(void)
        unsigned int start, nbits;
 
        for (start = 0; start < 1024; start += 8) {
-               memset(bmap1, 0x5a, sizeof(bmap1));
-               memset(bmap2, 0x5a, sizeof(bmap2));
                for (nbits = 0; nbits < 1024 - start; nbits += 8) {
+                       memset(bmap1, 0x5a, sizeof(bmap1));
+                       memset(bmap2, 0x5a, sizeof(bmap2));
+
                        bitmap_set(bmap1, start, nbits);
                        __bitmap_set(bmap2, start, nbits);
-                       if (!bitmap_equal(bmap1, bmap2, 1024))
+                       if (!bitmap_equal(bmap1, bmap2, 1024)) {
                                printk("set not equal %d %d\n", start, nbits);
-                       if (!__bitmap_equal(bmap1, bmap2, 1024))
+                               failed_tests++;
+                       }
+                       if (!__bitmap_equal(bmap1, bmap2, 1024)) {
                                printk("set not __equal %d %d\n", start, nbits);
+                               failed_tests++;
+                       }
 
                        bitmap_clear(bmap1, start, nbits);
                        __bitmap_clear(bmap2, start, nbits);
-                       if (!bitmap_equal(bmap1, bmap2, 1024))
+                       if (!bitmap_equal(bmap1, bmap2, 1024)) {
                                printk("clear not equal %d %d\n", start, nbits);
-                       if (!__bitmap_equal(bmap1, bmap2, 1024))
+                               failed_tests++;
+                       }
+                       if (!__bitmap_equal(bmap1, bmap2, 1024)) {
                                printk("clear not __equal %d %d\n", start,
                                                                        nbits);
+                               failed_tests++;
+                       }
                }
        }
 }
index 8e157806df7a6d78fed7afc63787d8517fbcc976..317f231462d4d98b004432e439a98e2d5e2a8a55 100644 (file)
@@ -386,116 +386,6 @@ static int bpf_fill_ld_abs_get_processor_id(struct bpf_test *self)
        return 0;
 }
 
-#define PUSH_CNT 68
-/* test: {skb->data[0], vlan_push} x 68 + {skb->data[0], vlan_pop} x 68 */
-static int bpf_fill_ld_abs_vlan_push_pop(struct bpf_test *self)
-{
-       unsigned int len = BPF_MAXINSNS;
-       struct bpf_insn *insn;
-       int i = 0, j, k = 0;
-
-       insn = kmalloc_array(len, sizeof(*insn), GFP_KERNEL);
-       if (!insn)
-               return -ENOMEM;
-
-       insn[i++] = BPF_MOV64_REG(R6, R1);
-loop:
-       for (j = 0; j < PUSH_CNT; j++) {
-               insn[i++] = BPF_LD_ABS(BPF_B, 0);
-               insn[i] = BPF_JMP_IMM(BPF_JNE, R0, 0x34, len - i - 2);
-               i++;
-               insn[i++] = BPF_MOV64_REG(R1, R6);
-               insn[i++] = BPF_MOV64_IMM(R2, 1);
-               insn[i++] = BPF_MOV64_IMM(R3, 2);
-               insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
-                                        bpf_skb_vlan_push_proto.func - __bpf_call_base);
-               insn[i] = BPF_JMP_IMM(BPF_JNE, R0, 0, len - i - 2);
-               i++;
-       }
-
-       for (j = 0; j < PUSH_CNT; j++) {
-               insn[i++] = BPF_LD_ABS(BPF_B, 0);
-               insn[i] = BPF_JMP_IMM(BPF_JNE, R0, 0x34, len - i - 2);
-               i++;
-               insn[i++] = BPF_MOV64_REG(R1, R6);
-               insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
-                                        bpf_skb_vlan_pop_proto.func - __bpf_call_base);
-               insn[i] = BPF_JMP_IMM(BPF_JNE, R0, 0, len - i - 2);
-               i++;
-       }
-       if (++k < 5)
-               goto loop;
-
-       for (; i < len - 1; i++)
-               insn[i] = BPF_ALU32_IMM(BPF_MOV, R0, 0xbef);
-
-       insn[len - 1] = BPF_EXIT_INSN();
-
-       self->u.ptr.insns = insn;
-       self->u.ptr.len = len;
-
-       return 0;
-}
-
-static int bpf_fill_ld_abs_vlan_push_pop2(struct bpf_test *self)
-{
-       struct bpf_insn *insn;
-
-       insn = kmalloc_array(16, sizeof(*insn), GFP_KERNEL);
-       if (!insn)
-               return -ENOMEM;
-
-       /* Due to func address being non-const, we need to
-        * assemble this here.
-        */
-       insn[0] = BPF_MOV64_REG(R6, R1);
-       insn[1] = BPF_LD_ABS(BPF_B, 0);
-       insn[2] = BPF_LD_ABS(BPF_H, 0);
-       insn[3] = BPF_LD_ABS(BPF_W, 0);
-       insn[4] = BPF_MOV64_REG(R7, R6);
-       insn[5] = BPF_MOV64_IMM(R6, 0);
-       insn[6] = BPF_MOV64_REG(R1, R7);
-       insn[7] = BPF_MOV64_IMM(R2, 1);
-       insn[8] = BPF_MOV64_IMM(R3, 2);
-       insn[9] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
-                              bpf_skb_vlan_push_proto.func - __bpf_call_base);
-       insn[10] = BPF_MOV64_REG(R6, R7);
-       insn[11] = BPF_LD_ABS(BPF_B, 0);
-       insn[12] = BPF_LD_ABS(BPF_H, 0);
-       insn[13] = BPF_LD_ABS(BPF_W, 0);
-       insn[14] = BPF_MOV64_IMM(R0, 42);
-       insn[15] = BPF_EXIT_INSN();
-
-       self->u.ptr.insns = insn;
-       self->u.ptr.len = 16;
-
-       return 0;
-}
-
-static int bpf_fill_jump_around_ld_abs(struct bpf_test *self)
-{
-       unsigned int len = BPF_MAXINSNS;
-       struct bpf_insn *insn;
-       int i = 0;
-
-       insn = kmalloc_array(len, sizeof(*insn), GFP_KERNEL);
-       if (!insn)
-               return -ENOMEM;
-
-       insn[i++] = BPF_MOV64_REG(R6, R1);
-       insn[i++] = BPF_LD_ABS(BPF_B, 0);
-       insn[i] = BPF_JMP_IMM(BPF_JEQ, R0, 10, len - i - 2);
-       i++;
-       while (i < len - 1)
-               insn[i++] = BPF_LD_ABS(BPF_B, 1);
-       insn[i] = BPF_EXIT_INSN();
-
-       self->u.ptr.insns = insn;
-       self->u.ptr.len = len;
-
-       return 0;
-}
-
 static int __bpf_fill_stxdw(struct bpf_test *self, int size)
 {
        unsigned int len = BPF_MAXINSNS;
@@ -1987,40 +1877,6 @@ static struct bpf_test tests[] = {
                { },
                { { 0, -1 } }
        },
-       {
-               "INT: DIV + ABS",
-               .u.insns_int = {
-                       BPF_ALU64_REG(BPF_MOV, R6, R1),
-                       BPF_LD_ABS(BPF_B, 3),
-                       BPF_ALU64_IMM(BPF_MOV, R2, 2),
-                       BPF_ALU32_REG(BPF_DIV, R0, R2),
-                       BPF_ALU64_REG(BPF_MOV, R8, R0),
-                       BPF_LD_ABS(BPF_B, 4),
-                       BPF_ALU64_REG(BPF_ADD, R8, R0),
-                       BPF_LD_IND(BPF_B, R8, -70),
-                       BPF_EXIT_INSN(),
-               },
-               INTERNAL,
-               { 10, 20, 30, 40, 50 },
-               { { 4, 0 }, { 5, 10 } }
-       },
-       {
-               /* This one doesn't go through verifier, but is just raw insn
-                * as opposed to cBPF tests from here. Thus div by 0 tests are
-                * done in test_verifier in BPF kselftests.
-                */
-               "INT: DIV by -1",
-               .u.insns_int = {
-                       BPF_ALU64_REG(BPF_MOV, R6, R1),
-                       BPF_ALU64_IMM(BPF_MOV, R7, -1),
-                       BPF_LD_ABS(BPF_B, 3),
-                       BPF_ALU32_REG(BPF_DIV, R0, R7),
-                       BPF_EXIT_INSN(),
-               },
-               INTERNAL,
-               { 10, 20, 30, 40, 50 },
-               { { 3, 0 }, { 4, 0 } }
-       },
        {
                "check: missing ret",
                .u.insns = {
@@ -2383,50 +2239,6 @@ static struct bpf_test tests[] = {
                { },
                { { 0, 1 } }
        },
-       {
-               "nmap reduced",
-               .u.insns_int = {
-                       BPF_MOV64_REG(R6, R1),
-                       BPF_LD_ABS(BPF_H, 12),
-                       BPF_JMP_IMM(BPF_JNE, R0, 0x806, 28),
-                       BPF_LD_ABS(BPF_H, 12),
-                       BPF_JMP_IMM(BPF_JNE, R0, 0x806, 26),
-                       BPF_MOV32_IMM(R0, 18),
-                       BPF_STX_MEM(BPF_W, R10, R0, -64),
-                       BPF_LDX_MEM(BPF_W, R7, R10, -64),
-                       BPF_LD_IND(BPF_W, R7, 14),
-                       BPF_STX_MEM(BPF_W, R10, R0, -60),
-                       BPF_MOV32_IMM(R0, 280971478),
-                       BPF_STX_MEM(BPF_W, R10, R0, -56),
-                       BPF_LDX_MEM(BPF_W, R7, R10, -56),
-                       BPF_LDX_MEM(BPF_W, R0, R10, -60),
-                       BPF_ALU32_REG(BPF_SUB, R0, R7),
-                       BPF_JMP_IMM(BPF_JNE, R0, 0, 15),
-                       BPF_LD_ABS(BPF_H, 12),
-                       BPF_JMP_IMM(BPF_JNE, R0, 0x806, 13),
-                       BPF_MOV32_IMM(R0, 22),
-                       BPF_STX_MEM(BPF_W, R10, R0, -56),
-                       BPF_LDX_MEM(BPF_W, R7, R10, -56),
-                       BPF_LD_IND(BPF_H, R7, 14),
-                       BPF_STX_MEM(BPF_W, R10, R0, -52),
-                       BPF_MOV32_IMM(R0, 17366),
-                       BPF_STX_MEM(BPF_W, R10, R0, -48),
-                       BPF_LDX_MEM(BPF_W, R7, R10, -48),
-                       BPF_LDX_MEM(BPF_W, R0, R10, -52),
-                       BPF_ALU32_REG(BPF_SUB, R0, R7),
-                       BPF_JMP_IMM(BPF_JNE, R0, 0, 2),
-                       BPF_MOV32_IMM(R0, 256),
-                       BPF_EXIT_INSN(),
-                       BPF_MOV32_IMM(R0, 0),
-                       BPF_EXIT_INSN(),
-               },
-               INTERNAL,
-               { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0x06, 0, 0,
-                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-                 0x10, 0xbf, 0x48, 0xd6, 0x43, 0xd6},
-               { { 38, 256 } },
-               .stack_depth = 64,
-       },
        /* BPF_ALU | BPF_MOV | BPF_X */
        {
                "ALU_MOV_X: dst = 2",
@@ -5485,22 +5297,6 @@ static struct bpf_test tests[] = {
                { { 1, 0xbee } },
                .fill_helper = bpf_fill_ld_abs_get_processor_id,
        },
-       {
-               "BPF_MAXINSNS: ld_abs+vlan_push/pop",
-               { },
-               INTERNAL,
-               { 0x34 },
-               { { ETH_HLEN, 0xbef } },
-               .fill_helper = bpf_fill_ld_abs_vlan_push_pop,
-       },
-       {
-               "BPF_MAXINSNS: jump around ld_abs",
-               { },
-               INTERNAL,
-               { 10, 11 },
-               { { 2, 10 } },
-               .fill_helper = bpf_fill_jump_around_ld_abs,
-       },
        /*
         * LD_IND / LD_ABS on fragmented SKBs
         */
@@ -5682,6 +5478,53 @@ static struct bpf_test tests[] = {
                { [0x3c] = 0x25, [0x3d] = 0x05,  [0x3e] = 0x19, [0x3f] = 0x82 },
                { {0x40, 0x05 } },
        },
+       {
+               "LD_IND byte positive offset, all ff",
+               .u.insns = {
+                       BPF_STMT(BPF_LDX | BPF_IMM, 0x3e),
+                       BPF_STMT(BPF_LD | BPF_IND | BPF_B, 0x1),
+                       BPF_STMT(BPF_RET | BPF_A, 0x0),
+               },
+               CLASSIC,
+               { [0x3c] = 0xff, [0x3d] = 0xff,  [0x3e] = 0xff, [0x3f] = 0xff },
+               { {0x40, 0xff } },
+       },
+       {
+               "LD_IND byte positive offset, out of bounds",
+               .u.insns = {
+                       BPF_STMT(BPF_LDX | BPF_IMM, 0x3e),
+                       BPF_STMT(BPF_LD | BPF_IND | BPF_B, 0x1),
+                       BPF_STMT(BPF_RET | BPF_A, 0x0),
+               },
+               CLASSIC,
+               { [0x3c] = 0x25, [0x3d] = 0x05,  [0x3e] = 0x19, [0x3f] = 0x82 },
+               { {0x3f, 0 }, },
+       },
+       {
+               "LD_IND byte negative offset, out of bounds",
+               .u.insns = {
+                       BPF_STMT(BPF_LDX | BPF_IMM, 0x3e),
+                       BPF_STMT(BPF_LD | BPF_IND | BPF_B, -0x3f),
+                       BPF_STMT(BPF_RET | BPF_A, 0x0),
+               },
+               CLASSIC,
+               { [0x3c] = 0x25, [0x3d] = 0x05,  [0x3e] = 0x19, [0x3f] = 0x82 },
+               { {0x3f, 0 } },
+       },
+       {
+               "LD_IND byte negative offset, multiple calls",
+               .u.insns = {
+                       BPF_STMT(BPF_LDX | BPF_IMM, 0x3b),
+                       BPF_STMT(BPF_LD | BPF_IND | BPF_B, SKF_LL_OFF + 1),
+                       BPF_STMT(BPF_LD | BPF_IND | BPF_B, SKF_LL_OFF + 2),
+                       BPF_STMT(BPF_LD | BPF_IND | BPF_B, SKF_LL_OFF + 3),
+                       BPF_STMT(BPF_LD | BPF_IND | BPF_B, SKF_LL_OFF + 4),
+                       BPF_STMT(BPF_RET | BPF_A, 0x0),
+               },
+               CLASSIC,
+               { [0x3c] = 0x25, [0x3d] = 0x05,  [0x3e] = 0x19, [0x3f] = 0x82 },
+               { {0x40, 0x82 }, },
+       },
        {
                "LD_IND halfword positive offset",
                .u.insns = {
@@ -5730,6 +5573,39 @@ static struct bpf_test tests[] = {
                },
                { {0x40, 0x66cc } },
        },
+       {
+               "LD_IND halfword positive offset, all ff",
+               .u.insns = {
+                       BPF_STMT(BPF_LDX | BPF_IMM, 0x3d),
+                       BPF_STMT(BPF_LD | BPF_IND | BPF_H, 0x1),
+                       BPF_STMT(BPF_RET | BPF_A, 0x0),
+               },
+               CLASSIC,
+               { [0x3c] = 0xff, [0x3d] = 0xff,  [0x3e] = 0xff, [0x3f] = 0xff },
+               { {0x40, 0xffff } },
+       },
+       {
+               "LD_IND halfword positive offset, out of bounds",
+               .u.insns = {
+                       BPF_STMT(BPF_LDX | BPF_IMM, 0x3e),
+                       BPF_STMT(BPF_LD | BPF_IND | BPF_H, 0x1),
+                       BPF_STMT(BPF_RET | BPF_A, 0x0),
+               },
+               CLASSIC,
+               { [0x3c] = 0x25, [0x3d] = 0x05,  [0x3e] = 0x19, [0x3f] = 0x82 },
+               { {0x3f, 0 }, },
+       },
+       {
+               "LD_IND halfword negative offset, out of bounds",
+               .u.insns = {
+                       BPF_STMT(BPF_LDX | BPF_IMM, 0x3e),
+                       BPF_STMT(BPF_LD | BPF_IND | BPF_H, -0x3f),
+                       BPF_STMT(BPF_RET | BPF_A, 0x0),
+               },
+               CLASSIC,
+               { [0x3c] = 0x25, [0x3d] = 0x05,  [0x3e] = 0x19, [0x3f] = 0x82 },
+               { {0x3f, 0 } },
+       },
        {
                "LD_IND word positive offset",
                .u.insns = {
@@ -5820,6 +5696,39 @@ static struct bpf_test tests[] = {
                },
                { {0x40, 0x66cc77dd } },
        },
+       {
+               "LD_IND word positive offset, all ff",
+               .u.insns = {
+                       BPF_STMT(BPF_LDX | BPF_IMM, 0x3b),
+                       BPF_STMT(BPF_LD | BPF_IND | BPF_W, 0x1),
+                       BPF_STMT(BPF_RET | BPF_A, 0x0),
+               },
+               CLASSIC,
+               { [0x3c] = 0xff, [0x3d] = 0xff,  [0x3e] = 0xff, [0x3f] = 0xff },
+               { {0x40, 0xffffffff } },
+       },
+       {
+               "LD_IND word positive offset, out of bounds",
+               .u.insns = {
+                       BPF_STMT(BPF_LDX | BPF_IMM, 0x3e),
+                       BPF_STMT(BPF_LD | BPF_IND | BPF_W, 0x1),
+                       BPF_STMT(BPF_RET | BPF_A, 0x0),
+               },
+               CLASSIC,
+               { [0x3c] = 0x25, [0x3d] = 0x05,  [0x3e] = 0x19, [0x3f] = 0x82 },
+               { {0x3f, 0 }, },
+       },
+       {
+               "LD_IND word negative offset, out of bounds",
+               .u.insns = {
+                       BPF_STMT(BPF_LDX | BPF_IMM, 0x3e),
+                       BPF_STMT(BPF_LD | BPF_IND | BPF_W, -0x3f),
+                       BPF_STMT(BPF_RET | BPF_A, 0x0),
+               },
+               CLASSIC,
+               { [0x3c] = 0x25, [0x3d] = 0x05,  [0x3e] = 0x19, [0x3f] = 0x82 },
+               { {0x3f, 0 } },
+       },
        {
                "LD_ABS byte",
                .u.insns = {
@@ -5837,6 +5746,68 @@ static struct bpf_test tests[] = {
                },
                { {0x40, 0xcc } },
        },
+       {
+               "LD_ABS byte positive offset, all ff",
+               .u.insns = {
+                       BPF_STMT(BPF_LD | BPF_ABS | BPF_B, 0x3f),
+                       BPF_STMT(BPF_RET | BPF_A, 0x0),
+               },
+               CLASSIC,
+               { [0x3c] = 0xff, [0x3d] = 0xff,  [0x3e] = 0xff, [0x3f] = 0xff },
+               { {0x40, 0xff } },
+       },
+       {
+               "LD_ABS byte positive offset, out of bounds",
+               .u.insns = {
+                       BPF_STMT(BPF_LD | BPF_ABS | BPF_B, 0x3f),
+                       BPF_STMT(BPF_RET | BPF_A, 0x0),
+               },
+               CLASSIC,
+               { [0x3c] = 0x25, [0x3d] = 0x05,  [0x3e] = 0x19, [0x3f] = 0x82 },
+               { {0x3f, 0 }, },
+       },
+       {
+               "LD_ABS byte negative offset, out of bounds load",
+               .u.insns = {
+                       BPF_STMT(BPF_LD | BPF_ABS | BPF_B, -1),
+                       BPF_STMT(BPF_RET | BPF_A, 0x0),
+               },
+               CLASSIC | FLAG_EXPECTED_FAIL,
+               .expected_errcode = -EINVAL,
+       },
+       {
+               "LD_ABS byte negative offset, in bounds",
+               .u.insns = {
+                       BPF_STMT(BPF_LD | BPF_ABS | BPF_B, SKF_LL_OFF + 0x3f),
+                       BPF_STMT(BPF_RET | BPF_A, 0x0),
+               },
+               CLASSIC,
+               { [0x3c] = 0x25, [0x3d] = 0x05,  [0x3e] = 0x19, [0x3f] = 0x82 },
+               { {0x40, 0x82 }, },
+       },
+       {
+               "LD_ABS byte negative offset, out of bounds",
+               .u.insns = {
+                       BPF_STMT(BPF_LD | BPF_ABS | BPF_B, SKF_LL_OFF + 0x3f),
+                       BPF_STMT(BPF_RET | BPF_A, 0x0),
+               },
+               CLASSIC,
+               { [0x3c] = 0x25, [0x3d] = 0x05,  [0x3e] = 0x19, [0x3f] = 0x82 },
+               { {0x3f, 0 }, },
+       },
+       {
+               "LD_ABS byte negative offset, multiple calls",
+               .u.insns = {
+                       BPF_STMT(BPF_LD | BPF_ABS | BPF_B, SKF_LL_OFF + 0x3c),
+                       BPF_STMT(BPF_LD | BPF_ABS | BPF_B, SKF_LL_OFF + 0x3d),
+                       BPF_STMT(BPF_LD | BPF_ABS | BPF_B, SKF_LL_OFF + 0x3e),
+                       BPF_STMT(BPF_LD | BPF_ABS | BPF_B, SKF_LL_OFF + 0x3f),
+                       BPF_STMT(BPF_RET | BPF_A, 0x0),
+               },
+               CLASSIC,
+               { [0x3c] = 0x25, [0x3d] = 0x05,  [0x3e] = 0x19, [0x3f] = 0x82 },
+               { {0x40, 0x82 }, },
+       },
        {
                "LD_ABS halfword",
                .u.insns = {
@@ -5871,6 +5842,55 @@ static struct bpf_test tests[] = {
                },
                { {0x40, 0x99ff } },
        },
+       {
+               "LD_ABS halfword positive offset, all ff",
+               .u.insns = {
+                       BPF_STMT(BPF_LD | BPF_ABS | BPF_H, 0x3e),
+                       BPF_STMT(BPF_RET | BPF_A, 0x0),
+               },
+               CLASSIC,
+               { [0x3c] = 0xff, [0x3d] = 0xff,  [0x3e] = 0xff, [0x3f] = 0xff },
+               { {0x40, 0xffff } },
+       },
+       {
+               "LD_ABS halfword positive offset, out of bounds",
+               .u.insns = {
+                       BPF_STMT(BPF_LD | BPF_ABS | BPF_H, 0x3f),
+                       BPF_STMT(BPF_RET | BPF_A, 0x0),
+               },
+               CLASSIC,
+               { [0x3c] = 0x25, [0x3d] = 0x05,  [0x3e] = 0x19, [0x3f] = 0x82 },
+               { {0x3f, 0 }, },
+       },
+       {
+               "LD_ABS halfword negative offset, out of bounds load",
+               .u.insns = {
+                       BPF_STMT(BPF_LD | BPF_ABS | BPF_H, -1),
+                       BPF_STMT(BPF_RET | BPF_A, 0x0),
+               },
+               CLASSIC | FLAG_EXPECTED_FAIL,
+               .expected_errcode = -EINVAL,
+       },
+       {
+               "LD_ABS halfword negative offset, in bounds",
+               .u.insns = {
+                       BPF_STMT(BPF_LD | BPF_ABS | BPF_H, SKF_LL_OFF + 0x3e),
+                       BPF_STMT(BPF_RET | BPF_A, 0x0),
+               },
+               CLASSIC,
+               { [0x3c] = 0x25, [0x3d] = 0x05,  [0x3e] = 0x19, [0x3f] = 0x82 },
+               { {0x40, 0x1982 }, },
+       },
+       {
+               "LD_ABS halfword negative offset, out of bounds",
+               .u.insns = {
+                       BPF_STMT(BPF_LD | BPF_ABS | BPF_H, SKF_LL_OFF + 0x3e),
+                       BPF_STMT(BPF_RET | BPF_A, 0x0),
+               },
+               CLASSIC,
+               { [0x3c] = 0x25, [0x3d] = 0x05,  [0x3e] = 0x19, [0x3f] = 0x82 },
+               { {0x3f, 0 }, },
+       },
        {
                "LD_ABS word",
                .u.insns = {
@@ -5939,6 +5959,140 @@ static struct bpf_test tests[] = {
                },
                { {0x40, 0x88ee99ff } },
        },
+       {
+               "LD_ABS word positive offset, all ff",
+               .u.insns = {
+                       BPF_STMT(BPF_LD | BPF_ABS | BPF_W, 0x3c),
+                       BPF_STMT(BPF_RET | BPF_A, 0x0),
+               },
+               CLASSIC,
+               { [0x3c] = 0xff, [0x3d] = 0xff,  [0x3e] = 0xff, [0x3f] = 0xff },
+               { {0x40, 0xffffffff } },
+       },
+       {
+               "LD_ABS word positive offset, out of bounds",
+               .u.insns = {
+                       BPF_STMT(BPF_LD | BPF_ABS | BPF_W, 0x3f),
+                       BPF_STMT(BPF_RET | BPF_A, 0x0),
+               },
+               CLASSIC,
+               { [0x3c] = 0x25, [0x3d] = 0x05,  [0x3e] = 0x19, [0x3f] = 0x82 },
+               { {0x3f, 0 }, },
+       },
+       {
+               "LD_ABS word negative offset, out of bounds load",
+               .u.insns = {
+                       BPF_STMT(BPF_LD | BPF_ABS | BPF_W, -1),
+                       BPF_STMT(BPF_RET | BPF_A, 0x0),
+               },
+               CLASSIC | FLAG_EXPECTED_FAIL,
+               .expected_errcode = -EINVAL,
+       },
+       {
+               "LD_ABS word negative offset, in bounds",
+               .u.insns = {
+                       BPF_STMT(BPF_LD | BPF_ABS | BPF_W, SKF_LL_OFF + 0x3c),
+                       BPF_STMT(BPF_RET | BPF_A, 0x0),
+               },
+               CLASSIC,
+               { [0x3c] = 0x25, [0x3d] = 0x05,  [0x3e] = 0x19, [0x3f] = 0x82 },
+               { {0x40, 0x25051982 }, },
+       },
+       {
+               "LD_ABS word negative offset, out of bounds",
+               .u.insns = {
+                       BPF_STMT(BPF_LD | BPF_ABS | BPF_W, SKF_LL_OFF + 0x3c),
+                       BPF_STMT(BPF_RET | BPF_A, 0x0),
+               },
+               CLASSIC,
+               { [0x3c] = 0x25, [0x3d] = 0x05,  [0x3e] = 0x19, [0x3f] = 0x82 },
+               { {0x3f, 0 }, },
+       },
+       {
+               "LDX_MSH standalone, preserved A",
+               .u.insns = {
+                       BPF_STMT(BPF_LD | BPF_IMM, 0xffeebbaa),
+                       BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 0x3c),
+                       BPF_STMT(BPF_RET | BPF_A, 0x0),
+               },
+               CLASSIC,
+               { [0x3c] = 0x25, [0x3d] = 0x05,  [0x3e] = 0x19, [0x3f] = 0x82 },
+               { {0x40, 0xffeebbaa }, },
+       },
+       {
+               "LDX_MSH standalone, preserved A 2",
+               .u.insns = {
+                       BPF_STMT(BPF_LD | BPF_IMM, 0x175e9d63),
+                       BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 0x3c),
+                       BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 0x3d),
+                       BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 0x3e),
+                       BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 0x3f),
+                       BPF_STMT(BPF_RET | BPF_A, 0x0),
+               },
+               CLASSIC,
+               { [0x3c] = 0x25, [0x3d] = 0x05,  [0x3e] = 0x19, [0x3f] = 0x82 },
+               { {0x40, 0x175e9d63 }, },
+       },
+       {
+               "LDX_MSH standalone, test result 1",
+               .u.insns = {
+                       BPF_STMT(BPF_LD | BPF_IMM, 0xffeebbaa),
+                       BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 0x3c),
+                       BPF_STMT(BPF_MISC | BPF_TXA, 0),
+                       BPF_STMT(BPF_RET | BPF_A, 0x0),
+               },
+               CLASSIC,
+               { [0x3c] = 0x25, [0x3d] = 0x05,  [0x3e] = 0x19, [0x3f] = 0x82 },
+               { {0x40, 0x14 }, },
+       },
+       {
+               "LDX_MSH standalone, test result 2",
+               .u.insns = {
+                       BPF_STMT(BPF_LD | BPF_IMM, 0xffeebbaa),
+                       BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 0x3e),
+                       BPF_STMT(BPF_MISC | BPF_TXA, 0),
+                       BPF_STMT(BPF_RET | BPF_A, 0x0),
+               },
+               CLASSIC,
+               { [0x3c] = 0x25, [0x3d] = 0x05,  [0x3e] = 0x19, [0x3f] = 0x82 },
+               { {0x40, 0x24 }, },
+       },
+       {
+               "LDX_MSH standalone, negative offset",
+               .u.insns = {
+                       BPF_STMT(BPF_LD | BPF_IMM, 0xffeebbaa),
+                       BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, -1),
+                       BPF_STMT(BPF_MISC | BPF_TXA, 0),
+                       BPF_STMT(BPF_RET | BPF_A, 0x0),
+               },
+               CLASSIC,
+               { [0x3c] = 0x25, [0x3d] = 0x05,  [0x3e] = 0x19, [0x3f] = 0x82 },
+               { {0x40, 0 }, },
+       },
+       {
+               "LDX_MSH standalone, negative offset 2",
+               .u.insns = {
+                       BPF_STMT(BPF_LD | BPF_IMM, 0xffeebbaa),
+                       BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, SKF_LL_OFF + 0x3e),
+                       BPF_STMT(BPF_MISC | BPF_TXA, 0),
+                       BPF_STMT(BPF_RET | BPF_A, 0x0),
+               },
+               CLASSIC,
+               { [0x3c] = 0x25, [0x3d] = 0x05,  [0x3e] = 0x19, [0x3f] = 0x82 },
+               { {0x40, 0x24 }, },
+       },
+       {
+               "LDX_MSH standalone, out of bounds",
+               .u.insns = {
+                       BPF_STMT(BPF_LD | BPF_IMM, 0xffeebbaa),
+                       BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 0x40),
+                       BPF_STMT(BPF_MISC | BPF_TXA, 0),
+                       BPF_STMT(BPF_RET | BPF_A, 0x0),
+               },
+               CLASSIC,
+               { [0x3c] = 0x25, [0x3d] = 0x05,  [0x3e] = 0x19, [0x3f] = 0x82 },
+               { {0x40, 0 }, },
+       },
        /*
         * verify that the interpreter or JIT correctly sets A and X
         * to 0.
@@ -6127,14 +6281,6 @@ static struct bpf_test tests[] = {
                {},
                { {0x1, 0x42 } },
        },
-       {
-               "LD_ABS with helper changing skb data",
-               { },
-               INTERNAL,
-               { 0x34 },
-               { { ETH_HLEN, 42 } },
-               .fill_helper = bpf_fill_ld_abs_vlan_push_pop2,
-       },
        /* Checking interpreter vs JIT wrt signed extended imms. */
        {
                "JNE signed compare, test 1",
index 30c0cb8cc9bce78089cb6ad48bcb6b3d5d02e6b2..23920c5ff72859c79f4f50e8e267b56c8f230c7c 100644 (file)
@@ -1669,19 +1669,22 @@ char *pointer_string(char *buf, char *end, const void *ptr,
        return number(buf, end, (unsigned long int)ptr, spec);
 }
 
-static bool have_filled_random_ptr_key __read_mostly;
+static DEFINE_STATIC_KEY_TRUE(not_filled_random_ptr_key);
 static siphash_key_t ptr_key __read_mostly;
 
-static void fill_random_ptr_key(struct random_ready_callback *unused)
+static void enable_ptr_key_workfn(struct work_struct *work)
 {
        get_random_bytes(&ptr_key, sizeof(ptr_key));
-       /*
-        * have_filled_random_ptr_key==true is dependent on get_random_bytes().
-        * ptr_to_id() needs to see have_filled_random_ptr_key==true
-        * after get_random_bytes() returns.
-        */
-       smp_mb();
-       WRITE_ONCE(have_filled_random_ptr_key, true);
+       /* Needs to run from preemptible context */
+       static_branch_disable(&not_filled_random_ptr_key);
+}
+
+static DECLARE_WORK(enable_ptr_key_work, enable_ptr_key_workfn);
+
+static void fill_random_ptr_key(struct random_ready_callback *unused)
+{
+       /* This may be in an interrupt handler. */
+       queue_work(system_unbound_wq, &enable_ptr_key_work);
 }
 
 static struct random_ready_callback random_ready = {
@@ -1695,7 +1698,8 @@ static int __init initialize_ptr_random(void)
        if (!ret) {
                return 0;
        } else if (ret == -EALREADY) {
-               fill_random_ptr_key(&random_ready);
+               /* This is in preemptible context */
+               enable_ptr_key_workfn(&enable_ptr_key_work);
                return 0;
        }
 
@@ -1709,7 +1713,7 @@ static char *ptr_to_id(char *buf, char *end, void *ptr, struct printf_spec spec)
        unsigned long hashval;
        const int default_width = 2 * sizeof(ptr);
 
-       if (unlikely(!have_filled_random_ptr_key)) {
+       if (static_branch_unlikely(&not_filled_random_ptr_key)) {
                spec.field_width = default_width;
                /* string length must be less than default_width */
                return string(buf, end, "(ptrval)", spec);
index d5004d82a1d6d7f9b7ec8bc994bd9320f7ea8ced..e14c01513bfd0777baec40a0a3555571c1621b11 100644 (file)
@@ -636,6 +636,7 @@ config DEFERRED_STRUCT_PAGE_INIT
        default n
        depends on NO_BOOTMEM
        depends on !FLATMEM
+       depends on !NEED_PER_CPU_KM
        help
          Ordinarily all struct pages are initialised during early boot in a
          single thread. On very large machines this can take a considerable
index 023190c69dce71c528cccb7498e7456d41f674d3..7441bd93b732000f7405f191991b102eefc26832 100644 (file)
@@ -115,6 +115,7 @@ static int bdi_debug_register(struct backing_dev_info *bdi, const char *name)
                                               bdi, &bdi_debug_stats_fops);
        if (!bdi->debug_stats) {
                debugfs_remove(bdi->debug_dir);
+               bdi->debug_dir = NULL;
                return -ENOMEM;
        }
 
@@ -383,7 +384,7 @@ static void wb_shutdown(struct bdi_writeback *wb)
         * the barrier provided by test_and_clear_bit() above.
         */
        smp_wmb();
-       clear_bit(WB_shutting_down, &wb->state);
+       clear_and_wake_up_bit(WB_shutting_down, &wb->state);
 }
 
 static void wb_exit(struct bdi_writeback *wb)
index 9276bdb2343c73884c2be1c11dca5c5c462054ce..0604cb02e6f3b7a3bdbc4eee71a9b0bd413c0212 100644 (file)
@@ -786,7 +786,7 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
        VM_BUG_ON_PAGE(!PageLocked(new), new);
        VM_BUG_ON_PAGE(new->mapping, new);
 
-       error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM);
+       error = radix_tree_preload(gfp_mask & GFP_RECLAIM_MASK);
        if (!error) {
                struct address_space *mapping = old->mapping;
                void (*freepage)(struct page *);
@@ -842,7 +842,7 @@ static int __add_to_page_cache_locked(struct page *page,
                        return error;
        }
 
-       error = radix_tree_maybe_preload(gfp_mask & ~__GFP_HIGHMEM);
+       error = radix_tree_maybe_preload(gfp_mask & GFP_RECLAIM_MASK);
        if (error) {
                if (!huge)
                        mem_cgroup_cancel_charge(page, memcg, false);
@@ -1585,8 +1585,7 @@ struct page *pagecache_get_page(struct address_space *mapping, pgoff_t offset,
                if (fgp_flags & FGP_ACCESSED)
                        __SetPageReferenced(page);
 
-               err = add_to_page_cache_lru(page, mapping, offset,
-                               gfp_mask & GFP_RECLAIM_MASK);
+               err = add_to_page_cache_lru(page, mapping, offset, gfp_mask);
                if (unlikely(err)) {
                        put_page(page);
                        page = NULL;
@@ -2387,7 +2386,7 @@ static int page_cache_read(struct file *file, pgoff_t offset, gfp_t gfp_mask)
                if (!page)
                        return -ENOMEM;
 
-               ret = add_to_page_cache_lru(page, mapping, offset, gfp_mask & GFP_KERNEL);
+               ret = add_to_page_cache_lru(page, mapping, offset, gfp_mask);
                if (ret == 0)
                        ret = mapping->a_ops->readpage(file, page);
                else if (ret == -EEXIST)
index 76af4cfeaf68149f365cf9b29bef64ebe6f1474b..541904a7c60fd596b1f5bc432fc7932bc005fadf 100644 (file)
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -544,6 +544,9 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
        if (vm_flags & (VM_IO | VM_PFNMAP))
                return -EFAULT;
 
+       if (gup_flags & FOLL_ANON && !vma_is_anonymous(vma))
+               return -EFAULT;
+
        if (write) {
                if (!(vm_flags & VM_WRITE)) {
                        if (!(gup_flags & FOLL_FORCE))
index 14ed6ee5e02fc8bc6acc767de9e42ed464ce5675..a3a1815f8e11810ec1b94cb25d3c5dd6eb30f556 100644 (file)
@@ -2925,7 +2925,10 @@ void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new)
                pmde = maybe_pmd_mkwrite(pmde, vma);
 
        flush_cache_range(vma, mmun_start, mmun_start + HPAGE_PMD_SIZE);
-       page_add_anon_rmap(new, vma, mmun_start, true);
+       if (PageAnon(new))
+               page_add_anon_rmap(new, vma, mmun_start, true);
+       else
+               page_add_file_rmap(new, true);
        set_pmd_at(mm, mmun_start, pvmw->pmd, pmde);
        if (vma->vm_flags & VM_LOCKED)
                mlock_vma_page(new);
index e074f7c637aa4e62d6268d3e765564e6f2a9cde0..2bd3df3d101a777144f7e91895dbcbcdd0e455cc 100644 (file)
@@ -2192,7 +2192,7 @@ static void __memcg_schedule_kmem_cache_create(struct mem_cgroup *memcg,
 {
        struct memcg_kmem_cache_create_work *cw;
 
-       cw = kmalloc(sizeof(*cw), GFP_NOWAIT);
+       cw = kmalloc(sizeof(*cw), GFP_NOWAIT | __GFP_NOWARN);
        if (!cw)
                return;
 
index f65dd69e1fd1a7a1dd7c88679ae8aacabe19aacc..8c0af0f7cab18a8e01af67142887abd0ca3c0b81 100644 (file)
@@ -472,7 +472,7 @@ int migrate_page_move_mapping(struct address_space *mapping,
        pslot = radix_tree_lookup_slot(&mapping->i_pages,
                                        page_index(page));
 
-       expected_count += 1 + page_has_private(page);
+       expected_count += hpage_nr_pages(page) + page_has_private(page);
        if (page_count(page) != expected_count ||
                radix_tree_deref_slot_protected(pslot,
                                        &mapping->i_pages.xa_lock) != page) {
@@ -505,7 +505,7 @@ int migrate_page_move_mapping(struct address_space *mapping,
         */
        newpage->index = page->index;
        newpage->mapping = page->mapping;
-       get_page(newpage);      /* add cache reference */
+       page_ref_add(newpage, hpage_nr_pages(page)); /* add cache reference */
        if (PageSwapBacked(page)) {
                __SetPageSwapBacked(newpage);
                if (PageSwapCache(page)) {
@@ -524,13 +524,24 @@ int migrate_page_move_mapping(struct address_space *mapping,
        }
 
        radix_tree_replace_slot(&mapping->i_pages, pslot, newpage);
+       if (PageTransHuge(page)) {
+               int i;
+               int index = page_index(page);
+
+               for (i = 1; i < HPAGE_PMD_NR; i++) {
+                       pslot = radix_tree_lookup_slot(&mapping->i_pages,
+                                                      index + i);
+                       radix_tree_replace_slot(&mapping->i_pages, pslot,
+                                               newpage + i);
+               }
+       }
 
        /*
         * Drop cache reference from old page by unfreezing
         * to one less reference.
         * We know this isn't the last reference.
         */
-       page_ref_unfreeze(page, expected_count - 1);
+       page_ref_unfreeze(page, expected_count - hpage_nr_pages(page));
 
        xa_unlock(&mapping->i_pages);
        /* Leave irq disabled to prevent preemption while updating stats */
@@ -1622,6 +1633,9 @@ static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes,
                current_node = NUMA_NO_NODE;
        }
 out_flush:
+       if (list_empty(&pagelist))
+               return err;
+
        /* Make sure we do not overwrite the existing error */
        err1 = do_move_pages_to_node(mm, &pagelist, current_node);
        if (!err1)
index 188f195883b90b40d8371e8e04ff5acd4d9d1526..fc41c0543d7fab21542d2c314f40dfbea69ebbe3 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -100,11 +100,20 @@ pgprot_t protection_map[16] __ro_after_init = {
        __S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111
 };
 
+#ifndef CONFIG_ARCH_HAS_FILTER_PGPROT
+static inline pgprot_t arch_filter_pgprot(pgprot_t prot)
+{
+       return prot;
+}
+#endif
+
 pgprot_t vm_get_page_prot(unsigned long vm_flags)
 {
-       return __pgprot(pgprot_val(protection_map[vm_flags &
+       pgprot_t ret = __pgprot(pgprot_val(protection_map[vm_flags &
                                (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]) |
                        pgprot_val(arch_vm_get_page_prot(vm_flags)));
+
+       return arch_filter_pgprot(ret);
 }
 EXPORT_SYMBOL(vm_get_page_prot);
 
@@ -1315,6 +1324,35 @@ static inline int mlock_future_check(struct mm_struct *mm,
        return 0;
 }
 
+static inline u64 file_mmap_size_max(struct file *file, struct inode *inode)
+{
+       if (S_ISREG(inode->i_mode))
+               return MAX_LFS_FILESIZE;
+
+       if (S_ISBLK(inode->i_mode))
+               return MAX_LFS_FILESIZE;
+
+       /* Special "we do even unsigned file positions" case */
+       if (file->f_mode & FMODE_UNSIGNED_OFFSET)
+               return 0;
+
+       /* Yes, random drivers might want more. But I'm tired of buggy drivers */
+       return ULONG_MAX;
+}
+
+static inline bool file_mmap_ok(struct file *file, struct inode *inode,
+                               unsigned long pgoff, unsigned long len)
+{
+       u64 maxsize = file_mmap_size_max(file, inode);
+
+       if (maxsize && len > maxsize)
+               return false;
+       maxsize -= len;
+       if (pgoff > maxsize >> PAGE_SHIFT)
+               return false;
+       return true;
+}
+
 /*
  * The caller must hold down_write(&current->mm->mmap_sem).
  */
@@ -1400,6 +1438,9 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
                struct inode *inode = file_inode(file);
                unsigned long flags_mask;
 
+               if (!file_mmap_ok(file, inode, pgoff, len))
+                       return -EOVERFLOW;
+
                flags_mask = LEGACY_MAP_MASK | file->f_op->mmap_supported_flags;
 
                switch (flags & MAP_TYPE) {
@@ -3015,6 +3056,32 @@ void exit_mmap(struct mm_struct *mm)
        /* mm's last user has gone, and its about to be pulled down */
        mmu_notifier_release(mm);
 
+       if (unlikely(mm_is_oom_victim(mm))) {
+               /*
+                * Manually reap the mm to free as much memory as possible.
+                * Then, as the oom reaper does, set MMF_OOM_SKIP to disregard
+                * this mm from further consideration.  Taking mm->mmap_sem for
+                * write after setting MMF_OOM_SKIP will guarantee that the oom
+                * reaper will not run on this mm again after mmap_sem is
+                * dropped.
+                *
+                * Nothing can be holding mm->mmap_sem here and the above call
+                * to mmu_notifier_release(mm) ensures mmu notifier callbacks in
+                * __oom_reap_task_mm() will not block.
+                *
+                * This needs to be done before calling munlock_vma_pages_all(),
+                * which clears VM_LOCKED, otherwise the oom reaper cannot
+                * reliably test it.
+                */
+               mutex_lock(&oom_lock);
+               __oom_reap_task_mm(mm);
+               mutex_unlock(&oom_lock);
+
+               set_bit(MMF_OOM_SKIP, &mm->flags);
+               down_write(&mm->mmap_sem);
+               up_write(&mm->mmap_sem);
+       }
+
        if (mm->locked_vm) {
                vma = mm->mmap;
                while (vma) {
@@ -3036,24 +3103,6 @@ void exit_mmap(struct mm_struct *mm)
        /* update_hiwater_rss(mm) here? but nobody should be looking */
        /* Use -1 here to ensure all VMAs in the mm are unmapped */
        unmap_vmas(&tlb, vma, 0, -1);
-
-       if (unlikely(mm_is_oom_victim(mm))) {
-               /*
-                * Wait for oom_reap_task() to stop working on this
-                * mm. Because MMF_OOM_SKIP is already set before
-                * calling down_read(), oom_reap_task() will not run
-                * on this "mm" post up_write().
-                *
-                * mm_is_oom_victim() cannot be set from under us
-                * either because victim->mm is already set to NULL
-                * under task_lock before calling mmput and oom_mm is
-                * set not NULL by the OOM killer only if victim->mm
-                * is found not NULL while holding the task_lock.
-                */
-               set_bit(MMF_OOM_SKIP, &mm->flags);
-               down_write(&mm->mmap_sem);
-               up_write(&mm->mmap_sem);
-       }
        free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, USER_PGTABLES_CEILING);
        tlb_finish_mmu(&tlb, 0, -1);
 
index ff992fa8760aa1e22fc8d5d07df12e293690e877..8ba6cb88cf58a3d48396f7900ac92e467a2207f0 100644 (file)
@@ -469,7 +469,6 @@ bool process_shares_mm(struct task_struct *p, struct mm_struct *mm)
        return false;
 }
 
-
 #ifdef CONFIG_MMU
 /*
  * OOM Reaper kernel thread which tries to reap the memory used by the OOM
@@ -480,16 +479,54 @@ static DECLARE_WAIT_QUEUE_HEAD(oom_reaper_wait);
 static struct task_struct *oom_reaper_list;
 static DEFINE_SPINLOCK(oom_reaper_lock);
 
-static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
+void __oom_reap_task_mm(struct mm_struct *mm)
 {
-       struct mmu_gather tlb;
        struct vm_area_struct *vma;
+
+       /*
+        * Tell all users of get_user/copy_from_user etc... that the content
+        * is no longer stable. No barriers really needed because unmapping
+        * should imply barriers already and the reader would hit a page fault
+        * if it stumbled over a reaped memory.
+        */
+       set_bit(MMF_UNSTABLE, &mm->flags);
+
+       for (vma = mm->mmap ; vma; vma = vma->vm_next) {
+               if (!can_madv_dontneed_vma(vma))
+                       continue;
+
+               /*
+                * Only anonymous pages have a good chance to be dropped
+                * without additional steps which we cannot afford as we
+                * are OOM already.
+                *
+                * We do not even care about fs backed pages because all
+                * which are reclaimable have already been reclaimed and
+                * we do not want to block exit_mmap by keeping mm ref
+                * count elevated without a good reason.
+                */
+               if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED)) {
+                       const unsigned long start = vma->vm_start;
+                       const unsigned long end = vma->vm_end;
+                       struct mmu_gather tlb;
+
+                       tlb_gather_mmu(&tlb, mm, start, end);
+                       mmu_notifier_invalidate_range_start(mm, start, end);
+                       unmap_page_range(&tlb, vma, start, end, NULL);
+                       mmu_notifier_invalidate_range_end(mm, start, end);
+                       tlb_finish_mmu(&tlb, start, end);
+               }
+       }
+}
+
+static bool oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
+{
        bool ret = true;
 
        /*
         * We have to make sure to not race with the victim exit path
         * and cause premature new oom victim selection:
-        * __oom_reap_task_mm           exit_mm
+        * oom_reap_task_mm             exit_mm
         *   mmget_not_zero
         *                                mmput
         *                                  atomic_dec_and_test
@@ -534,39 +571,8 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
 
        trace_start_task_reaping(tsk->pid);
 
-       /*
-        * Tell all users of get_user/copy_from_user etc... that the content
-        * is no longer stable. No barriers really needed because unmapping
-        * should imply barriers already and the reader would hit a page fault
-        * if it stumbled over a reaped memory.
-        */
-       set_bit(MMF_UNSTABLE, &mm->flags);
-
-       for (vma = mm->mmap ; vma; vma = vma->vm_next) {
-               if (!can_madv_dontneed_vma(vma))
-                       continue;
+       __oom_reap_task_mm(mm);
 
-               /*
-                * Only anonymous pages have a good chance to be dropped
-                * without additional steps which we cannot afford as we
-                * are OOM already.
-                *
-                * We do not even care about fs backed pages because all
-                * which are reclaimable have already been reclaimed and
-                * we do not want to block exit_mmap by keeping mm ref
-                * count elevated without a good reason.
-                */
-               if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED)) {
-                       const unsigned long start = vma->vm_start;
-                       const unsigned long end = vma->vm_end;
-
-                       tlb_gather_mmu(&tlb, mm, start, end);
-                       mmu_notifier_invalidate_range_start(mm, start, end);
-                       unmap_page_range(&tlb, vma, start, end, NULL);
-                       mmu_notifier_invalidate_range_end(mm, start, end);
-                       tlb_finish_mmu(&tlb, start, end);
-               }
-       }
        pr_info("oom_reaper: reaped process %d (%s), now anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB\n",
                        task_pid_nr(tsk), tsk->comm,
                        K(get_mm_counter(mm, MM_ANONPAGES)),
@@ -587,14 +593,13 @@ static void oom_reap_task(struct task_struct *tsk)
        struct mm_struct *mm = tsk->signal->oom_mm;
 
        /* Retry the down_read_trylock(mmap_sem) a few times */
-       while (attempts++ < MAX_OOM_REAP_RETRIES && !__oom_reap_task_mm(tsk, mm))
+       while (attempts++ < MAX_OOM_REAP_RETRIES && !oom_reap_task_mm(tsk, mm))
                schedule_timeout_idle(HZ/10);
 
        if (attempts <= MAX_OOM_REAP_RETRIES ||
            test_bit(MMF_OOM_SKIP, &mm->flags))
                goto done;
 
-
        pr_info("oom_reaper: unable to reap pid:%d (%s)\n",
                task_pid_nr(tsk), tsk->comm);
        debug_show_all_locks();
index 5c1a3279e63f865664bafcaf4d3c7f98af697cce..337c6afb3345e1e0d6071a63718f4dc6aac140b7 100644 (file)
@@ -2502,13 +2502,13 @@ void account_page_redirty(struct page *page)
        if (mapping && mapping_cap_account_dirty(mapping)) {
                struct inode *inode = mapping->host;
                struct bdi_writeback *wb;
-               bool locked;
+               struct wb_lock_cookie cookie = {};
 
-               wb = unlocked_inode_to_wb_begin(inode, &locked);
+               wb = unlocked_inode_to_wb_begin(inode, &cookie);
                current->nr_dirtied--;
                dec_node_page_state(page, NR_DIRTIED);
                dec_wb_stat(wb, WB_DIRTIED);
-               unlocked_inode_to_wb_end(inode, locked);
+               unlocked_inode_to_wb_end(inode, &cookie);
        }
 }
 EXPORT_SYMBOL(account_page_redirty);
@@ -2614,15 +2614,15 @@ void __cancel_dirty_page(struct page *page)
        if (mapping_cap_account_dirty(mapping)) {
                struct inode *inode = mapping->host;
                struct bdi_writeback *wb;
-               bool locked;
+               struct wb_lock_cookie cookie = {};
 
                lock_page_memcg(page);
-               wb = unlocked_inode_to_wb_begin(inode, &locked);
+               wb = unlocked_inode_to_wb_begin(inode, &cookie);
 
                if (TestClearPageDirty(page))
                        account_page_cleaned(page, mapping, wb);
 
-               unlocked_inode_to_wb_end(inode, locked);
+               unlocked_inode_to_wb_end(inode, &cookie);
                unlock_page_memcg(page);
        } else {
                ClearPageDirty(page);
@@ -2654,7 +2654,7 @@ int clear_page_dirty_for_io(struct page *page)
        if (mapping && mapping_cap_account_dirty(mapping)) {
                struct inode *inode = mapping->host;
                struct bdi_writeback *wb;
-               bool locked;
+               struct wb_lock_cookie cookie = {};
 
                /*
                 * Yes, Virginia, this is indeed insane.
@@ -2691,14 +2691,14 @@ int clear_page_dirty_for_io(struct page *page)
                 * always locked coming in here, so we get the desired
                 * exclusion.
                 */
-               wb = unlocked_inode_to_wb_begin(inode, &locked);
+               wb = unlocked_inode_to_wb_begin(inode, &cookie);
                if (TestClearPageDirty(page)) {
                        dec_lruvec_page_state(page, NR_FILE_DIRTY);
                        dec_zone_page_state(page, NR_ZONE_WRITE_PENDING);
                        dec_wb_stat(wb, WB_RECLAIMABLE);
                        ret = 1;
                }
-               unlocked_inode_to_wb_end(inode, locked);
+               unlocked_inode_to_wb_end(inode, &cookie);
                return ret;
        }
        return TestClearPageDirty(page);
index f0dd4e4565bc6bc9117fe8ec9b8a2371d7f7f8b4..8d5337fed37b81bcae0e20dad65bf0202e031477 100644 (file)
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1374,9 +1374,6 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
                if (!pvmw.pte && (flags & TTU_MIGRATION)) {
                        VM_BUG_ON_PAGE(PageHuge(page) || !PageTransCompound(page), page);
 
-                       if (!PageAnon(page))
-                               continue;
-
                        set_pmd_migration_entry(&pvmw, page);
                        continue;
                }
index 62eef264a7bd38313aa6855f90b4fa7fd52b0b76..73dc2fcc0eab280f5c5f3620d126c18443b40b31 100644 (file)
@@ -629,7 +629,7 @@ void offline_mem_sections(unsigned long start_pfn, unsigned long end_pfn)
        unsigned long pfn;
 
        for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
-               unsigned long section_nr = pfn_to_section_nr(start_pfn);
+               unsigned long section_nr = pfn_to_section_nr(pfn);
                struct mem_section *ms;
 
                /*
index 536332e988b872973b1e4dc663eebf0c3f595802..a2b9518980ce47c4219a7f28bc705bdf93ae1d68 100644 (file)
@@ -1161,7 +1161,7 @@ const char * const vmstat_text[] = {
        "nr_vmscan_immediate_reclaim",
        "nr_dirtied",
        "nr_written",
-       "nr_indirectly_reclaimable",
+       "", /* nr_indirectly_reclaimable */
 
        /* enum writeback_stat_item counters */
        "nr_dirty_threshold",
@@ -1740,6 +1740,10 @@ static int vmstat_show(struct seq_file *m, void *arg)
        unsigned long *l = arg;
        unsigned long off = l - (unsigned long *)m->private;
 
+       /* Skip hidden vmstat items. */
+       if (*vmstat_text[off] == '\0')
+               return 0;
+
        seq_puts(m, vmstat_text[off]);
        seq_put_decimal_ull(m, " ", *l);
        seq_putc(m, '\n');
index c0bca6153b95d2257333fabe9c1b3eb398d66488..4b366d181f35d12f9a1e600bb7083bbf4dfe7fff 100644 (file)
@@ -144,7 +144,8 @@ enum z3fold_page_flags {
        PAGE_HEADLESS = 0,
        MIDDLE_CHUNK_MAPPED,
        NEEDS_COMPACTING,
-       PAGE_STALE
+       PAGE_STALE,
+       UNDER_RECLAIM
 };
 
 /*****************
@@ -173,6 +174,7 @@ static struct z3fold_header *init_z3fold_page(struct page *page,
        clear_bit(MIDDLE_CHUNK_MAPPED, &page->private);
        clear_bit(NEEDS_COMPACTING, &page->private);
        clear_bit(PAGE_STALE, &page->private);
+       clear_bit(UNDER_RECLAIM, &page->private);
 
        spin_lock_init(&zhdr->page_lock);
        kref_init(&zhdr->refcount);
@@ -756,6 +758,10 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
                atomic64_dec(&pool->pages_nr);
                return;
        }
+       if (test_bit(UNDER_RECLAIM, &page->private)) {
+               z3fold_page_unlock(zhdr);
+               return;
+       }
        if (test_and_set_bit(NEEDS_COMPACTING, &page->private)) {
                z3fold_page_unlock(zhdr);
                return;
@@ -840,6 +846,8 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
                        kref_get(&zhdr->refcount);
                        list_del_init(&zhdr->buddy);
                        zhdr->cpu = -1;
+                       set_bit(UNDER_RECLAIM, &page->private);
+                       break;
                }
 
                list_del_init(&page->lru);
@@ -887,25 +895,35 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
                                goto next;
                }
 next:
-               spin_lock(&pool->lock);
                if (test_bit(PAGE_HEADLESS, &page->private)) {
                        if (ret == 0) {
-                               spin_unlock(&pool->lock);
                                free_z3fold_page(page);
                                return 0;
                        }
-               } else if (kref_put(&zhdr->refcount, release_z3fold_page)) {
-                       atomic64_dec(&pool->pages_nr);
+                       spin_lock(&pool->lock);
+                       list_add(&page->lru, &pool->lru);
+                       spin_unlock(&pool->lock);
+               } else {
+                       z3fold_page_lock(zhdr);
+                       clear_bit(UNDER_RECLAIM, &page->private);
+                       if (kref_put(&zhdr->refcount,
+                                       release_z3fold_page_locked)) {
+                               atomic64_dec(&pool->pages_nr);
+                               return 0;
+                       }
+                       /*
+                        * if we are here, the page is still not completely
+                        * free. Take the global pool lock then to be able
+                        * to add it back to the lru list
+                        */
+                       spin_lock(&pool->lock);
+                       list_add(&page->lru, &pool->lru);
                        spin_unlock(&pool->lock);
-                       return 0;
+                       z3fold_page_unlock(zhdr);
                }
 
-               /*
-                * Add to the beginning of LRU.
-                * Pool lock has to be kept here to ensure the page has
-                * not already been released
-                */
-               list_add(&page->lru, &pool->lru);
+               /* We started off locked to we need to lock the pool back */
+               spin_lock(&pool->lock);
        }
        spin_unlock(&pool->lock);
        return -EAGAIN;
index 5505ee6ebdbe63ec717bad318f1385d41e5e3582..73a65789271ba9346902dd721b0accd8ce747adc 100644 (file)
@@ -118,17 +118,21 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
 }
 
 int vlan_check_real_dev(struct net_device *real_dev,
-                       __be16 protocol, u16 vlan_id)
+                       __be16 protocol, u16 vlan_id,
+                       struct netlink_ext_ack *extack)
 {
        const char *name = real_dev->name;
 
        if (real_dev->features & NETIF_F_VLAN_CHALLENGED) {
                pr_info("VLANs not supported on %s\n", name);
+               NL_SET_ERR_MSG_MOD(extack, "VLANs not supported on device");
                return -EOPNOTSUPP;
        }
 
-       if (vlan_find_dev(real_dev, protocol, vlan_id) != NULL)
+       if (vlan_find_dev(real_dev, protocol, vlan_id) != NULL) {
+               NL_SET_ERR_MSG_MOD(extack, "VLAN device already exists");
                return -EEXIST;
+       }
 
        return 0;
 }
@@ -215,7 +219,8 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id)
        if (vlan_id >= VLAN_VID_MASK)
                return -ERANGE;
 
-       err = vlan_check_real_dev(real_dev, htons(ETH_P_8021Q), vlan_id);
+       err = vlan_check_real_dev(real_dev, htons(ETH_P_8021Q), vlan_id,
+                                 NULL);
        if (err < 0)
                return err;
 
index e23aac3e4d377e8d4b574e66cd0c12ed8be01c27..44df1c3df02d3ad0b880a6946a321874516e3967 100644 (file)
@@ -109,7 +109,8 @@ int vlan_dev_change_flags(const struct net_device *dev, u32 flag, u32 mask);
 void vlan_dev_get_realdev_name(const struct net_device *dev, char *result);
 
 int vlan_check_real_dev(struct net_device *real_dev,
-                       __be16 protocol, u16 vlan_id);
+                       __be16 protocol, u16 vlan_id,
+                       struct netlink_ext_ack *extack);
 void vlan_setup(struct net_device *dev);
 int register_vlan_dev(struct net_device *dev, struct netlink_ext_ack *extack);
 void unregister_vlan_dev(struct net_device *dev, struct list_head *head);
index 236452ebbd9ea68fae4820ae3c2946cac7ada6f1..546af0e73ac343a7f1d792ab2f0f5227fd25e645 100644 (file)
@@ -215,7 +215,9 @@ int vlan_dev_set_egress_priority(const struct net_device *dev,
        return 0;
 }
 
-/* Flags are defined in the vlan_flags enum in include/linux/if_vlan.h file. */
+/* Flags are defined in the vlan_flags enum in
+ * include/uapi/linux/if_vlan.h file.
+ */
 int vlan_dev_change_flags(const struct net_device *dev, u32 flags, u32 mask)
 {
        struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
index 6689c0b272a7dbbb7685e8ae2024d3d7003643da..9b60c1e399e27e7f0eb300f30479e1f0553079d5 100644 (file)
@@ -47,14 +47,20 @@ static int vlan_validate(struct nlattr *tb[], struct nlattr *data[],
        int err;
 
        if (tb[IFLA_ADDRESS]) {
-               if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
+               if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) {
+                       NL_SET_ERR_MSG_MOD(extack, "Invalid link address");
                        return -EINVAL;
-               if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
+               }
+               if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) {
+                       NL_SET_ERR_MSG_MOD(extack, "Invalid link address");
                        return -EADDRNOTAVAIL;
+               }
        }
 
-       if (!data)
+       if (!data) {
+               NL_SET_ERR_MSG_MOD(extack, "VLAN properties not specified");
                return -EINVAL;
+       }
 
        if (data[IFLA_VLAN_PROTOCOL]) {
                switch (nla_get_be16(data[IFLA_VLAN_PROTOCOL])) {
@@ -62,29 +68,38 @@ static int vlan_validate(struct nlattr *tb[], struct nlattr *data[],
                case htons(ETH_P_8021AD):
                        break;
                default:
+                       NL_SET_ERR_MSG_MOD(extack, "Invalid VLAN protocol");
                        return -EPROTONOSUPPORT;
                }
        }
 
        if (data[IFLA_VLAN_ID]) {
                id = nla_get_u16(data[IFLA_VLAN_ID]);
-               if (id >= VLAN_VID_MASK)
+               if (id >= VLAN_VID_MASK) {
+                       NL_SET_ERR_MSG_MOD(extack, "Invalid VLAN id");
                        return -ERANGE;
+               }
        }
        if (data[IFLA_VLAN_FLAGS]) {
                flags = nla_data(data[IFLA_VLAN_FLAGS]);
                if ((flags->flags & flags->mask) &
                    ~(VLAN_FLAG_REORDER_HDR | VLAN_FLAG_GVRP |
-                     VLAN_FLAG_LOOSE_BINDING | VLAN_FLAG_MVRP))
+                     VLAN_FLAG_LOOSE_BINDING | VLAN_FLAG_MVRP)) {
+                       NL_SET_ERR_MSG_MOD(extack, "Invalid VLAN flags");
                        return -EINVAL;
+               }
        }
 
        err = vlan_validate_qos_map(data[IFLA_VLAN_INGRESS_QOS]);
-       if (err < 0)
+       if (err < 0) {
+               NL_SET_ERR_MSG_MOD(extack, "Invalid ingress QOS map");
                return err;
+       }
        err = vlan_validate_qos_map(data[IFLA_VLAN_EGRESS_QOS]);
-       if (err < 0)
+       if (err < 0) {
+               NL_SET_ERR_MSG_MOD(extack, "Invalid egress QOS map");
                return err;
+       }
        return 0;
 }
 
@@ -126,14 +141,21 @@ static int vlan_newlink(struct net *src_net, struct net_device *dev,
        __be16 proto;
        int err;
 
-       if (!data[IFLA_VLAN_ID])
+       if (!data[IFLA_VLAN_ID]) {
+               NL_SET_ERR_MSG_MOD(extack, "VLAN id not specified");
                return -EINVAL;
+       }
 
-       if (!tb[IFLA_LINK])
+       if (!tb[IFLA_LINK]) {
+               NL_SET_ERR_MSG_MOD(extack, "link not specified");
                return -EINVAL;
+       }
+
        real_dev = __dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK]));
-       if (!real_dev)
+       if (!real_dev) {
+               NL_SET_ERR_MSG_MOD(extack, "link does not exist");
                return -ENODEV;
+       }
 
        if (data[IFLA_VLAN_PROTOCOL])
                proto = nla_get_be16(data[IFLA_VLAN_PROTOCOL]);
@@ -146,7 +168,8 @@ static int vlan_newlink(struct net *src_net, struct net_device *dev,
        dev->priv_flags |= (real_dev->priv_flags & IFF_XMIT_DST_RELEASE);
        vlan->flags      = VLAN_FLAG_REORDER_HDR;
 
-       err = vlan_check_real_dev(real_dev, vlan->vlan_proto, vlan->vlan_id);
+       err = vlan_check_real_dev(real_dev, vlan->vlan_proto, vlan->vlan_id,
+                                 extack);
        if (err < 0)
                return err;
 
index 6ab36aea77275ef342b88f8e66e0e5c294086fd0..eb9777f0575565cb7144f56edeed2a5f91912cf7 100644 (file)
@@ -104,7 +104,7 @@ EXPORT_SYMBOL(v9fs_unregister_trans);
 
 /**
  * v9fs_get_trans_by_name - get transport with the matching name
- * @name: string identifying transport
+ * @s: string identifying transport
  *
  */
 struct p9_trans_module *v9fs_get_trans_by_name(char *s)
index 38aa6345bdfa2ec9fc52155e5b0eb3b369fdf463..b718db2085b21c8583601a3c9aa23414dc16ef3b 100644 (file)
@@ -16,7 +16,7 @@
 #include <linux/module.h>
 
 /**
- *  p9_release_req_pages - Release pages after the transaction.
+ *  p9_release_pages - Release pages after the transaction.
  */
 void p9_release_pages(struct page **pages, int nr_pages)
 {
index 0cfba919d167db9042dcf3fc2b4be7d1c8eaa852..848969fe797904aae1eb3cac7bd18f00382761cd 100644 (file)
@@ -1092,8 +1092,8 @@ static struct p9_trans_module p9_fd_trans = {
 };
 
 /**
- * p9_poll_proc - poll worker thread
- * @a: thread state and arguments
+ * p9_poll_workfn - poll worker thread
+ * @work: work queue
  *
  * polls all v9fs transports for new events and queues the appropriate
  * work to the work queue
index 6d8e3031978f3493edb123a51b58b30b1c2807c5..3d414acb7015d8fc73f20bec381e6200ce3d3d53 100644 (file)
@@ -68,8 +68,6 @@
  * @pd: Protection Domain pointer
  * @qp: Queue Pair pointer
  * @cq: Completion Queue pointer
- * @dm_mr: DMA Memory Region pointer
- * @lkey: The local access only memory region key
  * @timeout: Number of uSecs to wait for connection management events
  * @privport: Whether a privileged port may be used
  * @port: The port to use
@@ -632,7 +630,7 @@ static int p9_rdma_bind_privport(struct p9_trans_rdma *rdma)
 }
 
 /**
- * trans_create_rdma - Transport method for creating atransport instance
+ * rdma_create_trans - Transport method for creating a transport instance
  * @client: client instance
  * @addr: IP address string
  * @args: Mount options string
index 3aa5a93ad107c1490146240a112e3a9bf3a89b62..4d0372263e5d3057f0b300fbacb6f50a3c58e6ad 100644 (file)
@@ -60,7 +60,6 @@ static atomic_t vp_pinned = ATOMIC_INIT(0);
 
 /**
  * struct virtio_chan - per-instance transport information
- * @initialized: whether the channel is initialized
  * @inuse: whether the channel is in use
  * @lock: protects multiple elements within this structure
  * @client: client instance
@@ -385,8 +384,8 @@ static int p9_get_mapped_pages(struct virtio_chan *chan,
  * @uidata: user bffer that should be ued for zero copy read
  * @uodata: user buffer that shoud be user for zero copy write
  * @inlen: read buffer size
- * @olen: write buffer size
- * @hdrlen: reader header size, This is the size of response protocol data
+ * @outlen: write buffer size
+ * @in_hdr_len: reader header size, This is the size of response protocol data
  *
  */
 static int
index 086a4abdfa7cfcfad4934eb52613f4fe57723eb2..0f19960390a671dc1929575537ff5ad803add2ff 100644 (file)
@@ -485,7 +485,7 @@ static int xen_9pfs_front_probe(struct xenbus_device *dev,
 
 static int xen_9pfs_front_resume(struct xenbus_device *dev)
 {
-       dev_warn(&dev->dev, "suspsend/resume unsupported\n");
+       dev_warn(&dev->dev, "suspend/resume unsupported\n");
        return 0;
 }
 
index 6fa1a4493b8c7233053a4976fdb3671cfb584fbd..df8d45ef47d8701817b45b8848d9eb3dd52dcfdc 100644 (file)
@@ -59,6 +59,7 @@ source "net/tls/Kconfig"
 source "net/xfrm/Kconfig"
 source "net/iucv/Kconfig"
 source "net/smc/Kconfig"
+source "net/xdp/Kconfig"
 
 config INET
        bool "TCP/IP networking"
@@ -407,6 +408,9 @@ config GRO_CELLS
        bool
        default n
 
+config SOCK_VALIDATE_XMIT
+       bool
+
 config NET_DEVLINK
        tristate "Network physical/parent device Netlink interface"
        help
index a6147c61b17402ac49a493ee0f32872b0a02e293..77aaddedbd29f7058d945f4fb1cac2de9f87e047 100644 (file)
@@ -85,3 +85,4 @@ obj-y                         += l3mdev/
 endif
 obj-$(CONFIG_QRTR)             += qrtr/
 obj-$(CONFIG_NET_NCSI)         += ncsi/
+obj-$(CONFIG_XDP_SOCKETS)      += xdp/
index 01d5d20a6eb1a51dceb99b7617faa339e1fd13d8..3138a869b5c0ce1c2bf47ec80b1f1781645ce303 100644 (file)
@@ -41,6 +41,9 @@ static unsigned char bridge_ula_lec[] = { 0x01, 0x80, 0xc2, 0x00, 0x00 };
 #include <linux/module.h>
 #include <linux/init.h>
 
+/* Hardening for Spectre-v1 */
+#include <linux/nospec.h>
+
 #include "lec.h"
 #include "lec_arpc.h"
 #include "resources.h"
@@ -687,8 +690,10 @@ static int lec_vcc_attach(struct atm_vcc *vcc, void __user *arg)
        bytes_left = copy_from_user(&ioc_data, arg, sizeof(struct atmlec_ioc));
        if (bytes_left != 0)
                pr_info("copy from user failed for %d bytes\n", bytes_left);
-       if (ioc_data.dev_num < 0 || ioc_data.dev_num >= MAX_LEC_ITF ||
-           !dev_lec[ioc_data.dev_num])
+       if (ioc_data.dev_num < 0 || ioc_data.dev_num >= MAX_LEC_ITF)
+               return -EINVAL;
+       ioc_data.dev_num = array_index_nospec(ioc_data.dev_num, MAX_LEC_ITF);
+       if (!dev_lec[ioc_data.dev_num])
                return -EINVAL;
        vpriv = kmalloc(sizeof(struct lec_vcc_priv), GFP_KERNEL);
        if (!vpriv)
index 40d260f2bea5ef130693fbd19db1aa78bfc2f997..b0ee9edaae353b2a6f1d69b096c005f433c75cab 100644 (file)
@@ -3422,6 +3422,37 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen,
        return 0;
 }
 
+int __hci_cmd_send(struct hci_dev *hdev, u16 opcode, u32 plen,
+                  const void *param)
+{
+       struct sk_buff *skb;
+
+       if (hci_opcode_ogf(opcode) != 0x3f) {
+               /* A controller receiving a command shall respond with either
+                * a Command Status Event or a Command Complete Event.
+                * Therefore, all standard HCI commands must be sent via the
+                * standard API, using hci_send_cmd or hci_cmd_sync helpers.
+                * Some vendors do not comply with this rule for vendor-specific
+                * commands and do not return any event. We want to support
+                * unresponded commands for such cases only.
+                */
+               bt_dev_err(hdev, "unresponded command not supported");
+               return -EINVAL;
+       }
+
+       skb = hci_prepare_cmd(hdev, opcode, plen, param);
+       if (!skb) {
+               bt_dev_err(hdev, "no memory for command (opcode 0x%4.4x)",
+                          opcode);
+               return -ENOMEM;
+       }
+
+       hci_send_frame(hdev, skb);
+
+       return 0;
+}
+EXPORT_SYMBOL(__hci_cmd_send);
+
 /* Get data from the previously sent command */
 void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 opcode)
 {
index 139707cd9d352c9c76302b58c40b3148de33d051..235b5aaab23de901b17b107e7234dfcf0d67d903 100644 (file)
@@ -4942,10 +4942,14 @@ static void hci_le_adv_report_evt(struct hci_dev *hdev, struct sk_buff *skb)
                struct hci_ev_le_advertising_info *ev = ptr;
                s8 rssi;
 
-               rssi = ev->data[ev->length];
-               process_adv_report(hdev, ev->evt_type, &ev->bdaddr,
-                                  ev->bdaddr_type, NULL, 0, rssi,
-                                  ev->data, ev->length);
+               if (ev->length <= HCI_MAX_AD_LENGTH) {
+                       rssi = ev->data[ev->length];
+                       process_adv_report(hdev, ev->evt_type, &ev->bdaddr,
+                                          ev->bdaddr_type, NULL, 0, rssi,
+                                          ev->data, ev->length);
+               } else {
+                       bt_dev_err(hdev, "Dropping invalid advertising data");
+               }
 
                ptr += sizeof(*ev) + ev->length + 1;
        }
index 66c0781773dffa6db811b1fb64690ddcae584ab3..e44d34734834a8c604dc88e7a190e48743d9b035 100644 (file)
@@ -122,7 +122,6 @@ void hci_req_sync_cancel(struct hci_dev *hdev, int err)
 struct sk_buff *__hci_cmd_sync_ev(struct hci_dev *hdev, u16 opcode, u32 plen,
                                  const void *param, u8 event, u32 timeout)
 {
-       DECLARE_WAITQUEUE(wait, current);
        struct hci_request req;
        struct sk_buff *skb;
        int err = 0;
@@ -135,21 +134,14 @@ struct sk_buff *__hci_cmd_sync_ev(struct hci_dev *hdev, u16 opcode, u32 plen,
 
        hdev->req_status = HCI_REQ_PEND;
 
-       add_wait_queue(&hdev->req_wait_q, &wait);
-       set_current_state(TASK_INTERRUPTIBLE);
-
        err = hci_req_run_skb(&req, hci_req_sync_complete);
-       if (err < 0) {
-               remove_wait_queue(&hdev->req_wait_q, &wait);
-               set_current_state(TASK_RUNNING);
+       if (err < 0)
                return ERR_PTR(err);
-       }
 
-       schedule_timeout(timeout);
+       err = wait_event_interruptible_timeout(hdev->req_wait_q,
+                       hdev->req_status != HCI_REQ_PEND, timeout);
 
-       remove_wait_queue(&hdev->req_wait_q, &wait);
-
-       if (signal_pending(current))
+       if (err == -ERESTARTSYS)
                return ERR_PTR(-EINTR);
 
        switch (hdev->req_status) {
@@ -197,7 +189,6 @@ int __hci_req_sync(struct hci_dev *hdev, int (*func)(struct hci_request *req,
                   unsigned long opt, u32 timeout, u8 *hci_status)
 {
        struct hci_request req;
-       DECLARE_WAITQUEUE(wait, current);
        int err = 0;
 
        BT_DBG("%s start", hdev->name);
@@ -213,16 +204,10 @@ int __hci_req_sync(struct hci_dev *hdev, int (*func)(struct hci_request *req,
                return err;
        }
 
-       add_wait_queue(&hdev->req_wait_q, &wait);
-       set_current_state(TASK_INTERRUPTIBLE);
-
        err = hci_req_run_skb(&req, hci_req_sync_complete);
        if (err < 0) {
                hdev->req_status = 0;
 
-               remove_wait_queue(&hdev->req_wait_q, &wait);
-               set_current_state(TASK_RUNNING);
-
                /* ENODATA means the HCI request command queue is empty.
                 * This can happen when a request with conditionals doesn't
                 * trigger any commands to be sent. This is normal behavior
@@ -240,11 +225,10 @@ int __hci_req_sync(struct hci_dev *hdev, int (*func)(struct hci_request *req,
                return err;
        }
 
-       schedule_timeout(timeout);
-
-       remove_wait_queue(&hdev->req_wait_q, &wait);
+       err = wait_event_interruptible_timeout(hdev->req_wait_q,
+                       hdev->req_status != HCI_REQ_PEND, timeout);
 
-       if (signal_pending(current))
+       if (err == -ERESTARTSYS)
                return -EINTR;
 
        switch (hdev->req_status) {
index 671d13c10f6f4ed8118fe31fcceb28d7cde6edf1..b0a0b82e2d91017ff5aa00ab40649764e895cba7 100644 (file)
@@ -34,6 +34,7 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
        struct net_bridge_port *p;
        struct net_bridge *br;
+       bool notified = false;
        bool changed_addr;
        int err;
 
@@ -67,7 +68,7 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
                break;
 
        case NETDEV_CHANGE:
-               br_port_carrier_check(p);
+               br_port_carrier_check(p, &notified);
                break;
 
        case NETDEV_FEAT_CHANGE:
@@ -76,8 +77,10 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
 
        case NETDEV_DOWN:
                spin_lock_bh(&br->lock);
-               if (br->dev->flags & IFF_UP)
+               if (br->dev->flags & IFF_UP) {
                        br_stp_disable_port(p);
+                       notified = true;
+               }
                spin_unlock_bh(&br->lock);
                break;
 
@@ -85,6 +88,7 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
                if (netif_running(br->dev) && netif_oper_up(dev)) {
                        spin_lock_bh(&br->lock);
                        br_stp_enable_port(p);
+                       notified = true;
                        spin_unlock_bh(&br->lock);
                }
                break;
@@ -110,8 +114,8 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
        }
 
        /* Events that may cause spanning tree to refresh */
-       if (event == NETDEV_CHANGEADDR || event == NETDEV_UP ||
-           event == NETDEV_CHANGE || event == NETDEV_DOWN)
+       if (!notified && (event == NETDEV_CHANGEADDR || event == NETDEV_UP ||
+                         event == NETDEV_CHANGE || event == NETDEV_DOWN))
                br_ifinfo_notify(RTM_NEWLINK, NULL, p);
 
        return NOTIFY_DONE;
@@ -141,7 +145,7 @@ static int br_switchdev_event(struct notifier_block *unused,
        case SWITCHDEV_FDB_ADD_TO_BRIDGE:
                fdb_info = ptr;
                err = br_fdb_external_learn_add(br, p, fdb_info->addr,
-                                               fdb_info->vid);
+                                               fdb_info->vid, false);
                if (err) {
                        err = notifier_from_errno(err);
                        break;
@@ -152,7 +156,7 @@ static int br_switchdev_event(struct notifier_block *unused,
        case SWITCHDEV_FDB_DEL_TO_BRIDGE:
                fdb_info = ptr;
                err = br_fdb_external_learn_del(br, p, fdb_info->addr,
-                                               fdb_info->vid);
+                                               fdb_info->vid, false);
                if (err)
                        err = notifier_from_errno(err);
                break;
index d9e69e4514beb20d5d8a3672e26d5cc9edfc16fa..b19e3104afd6bf512d9b98f8350cc0f37d9880ea 100644 (file)
@@ -40,7 +40,7 @@ static struct kmem_cache *br_fdb_cache __read_mostly;
 static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
                      const unsigned char *addr, u16 vid);
 static void fdb_notify(struct net_bridge *br,
-                      const struct net_bridge_fdb_entry *, int);
+                      const struct net_bridge_fdb_entry *, int, bool);
 
 int __init br_fdb_init(void)
 {
@@ -121,6 +121,28 @@ static struct net_bridge_fdb_entry *br_fdb_find(struct net_bridge *br,
        return fdb;
 }
 
+struct net_device *br_fdb_find_port(const struct net_device *br_dev,
+                                   const unsigned char *addr,
+                                   __u16 vid)
+{
+       struct net_bridge_fdb_entry *f;
+       struct net_device *dev = NULL;
+       struct net_bridge *br;
+
+       ASSERT_RTNL();
+
+       if (!netif_is_bridge_master(br_dev))
+               return NULL;
+
+       br = netdev_priv(br_dev);
+       f = br_fdb_find(br, addr, vid);
+       if (f && f->dst)
+               dev = f->dst->dev;
+
+       return dev;
+}
+EXPORT_SYMBOL_GPL(br_fdb_find_port);
+
 struct net_bridge_fdb_entry *br_fdb_find_rcu(struct net_bridge *br,
                                             const unsigned char *addr,
                                             __u16 vid)
@@ -173,7 +195,8 @@ static void fdb_del_hw_addr(struct net_bridge *br, const unsigned char *addr)
        }
 }
 
-static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f)
+static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f,
+                      bool swdev_notify)
 {
        trace_fdb_delete(br, f);
 
@@ -183,7 +206,7 @@ static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f)
        hlist_del_init_rcu(&f->fdb_node);
        rhashtable_remove_fast(&br->fdb_hash_tbl, &f->rhnode,
                               br_fdb_rht_params);
-       fdb_notify(br, f, RTM_DELNEIGH);
+       fdb_notify(br, f, RTM_DELNEIGH, swdev_notify);
        call_rcu(&f->rcu, fdb_rcu_free);
 }
 
@@ -219,7 +242,7 @@ static void fdb_delete_local(struct net_bridge *br,
                return;
        }
 
-       fdb_delete(br, f);
+       fdb_delete(br, f, true);
 }
 
 void br_fdb_find_delete_local(struct net_bridge *br,
@@ -334,7 +357,7 @@ void br_fdb_cleanup(struct work_struct *work)
                } else {
                        spin_lock_bh(&br->hash_lock);
                        if (!hlist_unhashed(&f->fdb_node))
-                               fdb_delete(br, f);
+                               fdb_delete(br, f, true);
                        spin_unlock_bh(&br->hash_lock);
                }
        }
@@ -354,7 +377,7 @@ void br_fdb_flush(struct net_bridge *br)
        spin_lock_bh(&br->hash_lock);
        hlist_for_each_entry_safe(f, tmp, &br->fdb_list, fdb_node) {
                if (!f->is_static)
-                       fdb_delete(br, f);
+                       fdb_delete(br, f, true);
        }
        spin_unlock_bh(&br->hash_lock);
 }
@@ -383,7 +406,7 @@ void br_fdb_delete_by_port(struct net_bridge *br,
                if (f->is_local)
                        fdb_delete_local(br, p, f);
                else
-                       fdb_delete(br, f);
+                       fdb_delete(br, f, true);
        }
        spin_unlock_bh(&br->hash_lock);
 }
@@ -509,7 +532,7 @@ static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
                        return 0;
                br_warn(br, "adding interface %s with same address as a received packet (addr:%pM, vlan:%u)\n",
                       source ? source->dev->name : br->dev->name, addr, vid);
-               fdb_delete(br, fdb);
+               fdb_delete(br, fdb, true);
        }
 
        fdb = fdb_create(br, source, addr, vid, 1, 1);
@@ -517,7 +540,7 @@ static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
                return -ENOMEM;
 
        fdb_add_hw_addr(br, addr);
-       fdb_notify(br, fdb, RTM_NEWNEIGH);
+       fdb_notify(br, fdb, RTM_NEWNEIGH, true);
        return 0;
 }
 
@@ -572,7 +595,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
                                fdb->added_by_user = 1;
                        if (unlikely(fdb_modified)) {
                                trace_br_fdb_update(br, source, addr, vid, added_by_user);
-                               fdb_notify(br, fdb, RTM_NEWNEIGH);
+                               fdb_notify(br, fdb, RTM_NEWNEIGH, true);
                        }
                }
        } else {
@@ -583,7 +606,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
                                fdb->added_by_user = 1;
                        trace_br_fdb_update(br, source, addr, vid,
                                            added_by_user);
-                       fdb_notify(br, fdb, RTM_NEWNEIGH);
+                       fdb_notify(br, fdb, RTM_NEWNEIGH, true);
                }
                /* else  we lose race and someone else inserts
                 * it first, don't bother updating
@@ -665,13 +688,15 @@ static inline size_t fdb_nlmsg_size(void)
 }
 
 static void fdb_notify(struct net_bridge *br,
-                      const struct net_bridge_fdb_entry *fdb, int type)
+                      const struct net_bridge_fdb_entry *fdb, int type,
+                      bool swdev_notify)
 {
        struct net *net = dev_net(br->dev);
        struct sk_buff *skb;
        int err = -ENOBUFS;
 
-       br_switchdev_fdb_notify(fdb, type);
+       if (swdev_notify)
+               br_switchdev_fdb_notify(fdb, type);
 
        skb = nlmsg_new(fdb_nlmsg_size(), GFP_ATOMIC);
        if (skb == NULL)
@@ -810,7 +835,7 @@ static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source,
        fdb->used = jiffies;
        if (modified) {
                fdb->updated = jiffies;
-               fdb_notify(br, fdb, RTM_NEWNEIGH);
+               fdb_notify(br, fdb, RTM_NEWNEIGH, true);
        }
 
        return 0;
@@ -834,7 +859,7 @@ static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge *br,
                rcu_read_unlock();
                local_bh_enable();
        } else if (ndm->ndm_flags & NTF_EXT_LEARNED) {
-               err = br_fdb_external_learn_add(br, p, addr, vid);
+               err = br_fdb_external_learn_add(br, p, addr, vid, true);
        } else {
                spin_lock_bh(&br->hash_lock);
                err = fdb_add_entry(br, p, addr, ndm->ndm_state,
@@ -923,7 +948,7 @@ static int fdb_delete_by_addr_and_port(struct net_bridge *br,
        if (!fdb || fdb->dst != p)
                return -ENOENT;
 
-       fdb_delete(br, fdb);
+       fdb_delete(br, fdb, true);
 
        return 0;
 }
@@ -1043,7 +1068,8 @@ void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p)
 }
 
 int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
-                             const unsigned char *addr, u16 vid)
+                             const unsigned char *addr, u16 vid,
+                             bool swdev_notify)
 {
        struct net_bridge_fdb_entry *fdb;
        bool modified = false;
@@ -1061,7 +1087,7 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
                        goto err_unlock;
                }
                fdb->added_by_external_learn = 1;
-               fdb_notify(br, fdb, RTM_NEWNEIGH);
+               fdb_notify(br, fdb, RTM_NEWNEIGH, swdev_notify);
        } else {
                fdb->updated = jiffies;
 
@@ -1080,7 +1106,7 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
                }
 
                if (modified)
-                       fdb_notify(br, fdb, RTM_NEWNEIGH);
+                       fdb_notify(br, fdb, RTM_NEWNEIGH, swdev_notify);
        }
 
 err_unlock:
@@ -1090,7 +1116,8 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
 }
 
 int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p,
-                             const unsigned char *addr, u16 vid)
+                             const unsigned char *addr, u16 vid,
+                             bool swdev_notify)
 {
        struct net_bridge_fdb_entry *fdb;
        int err = 0;
@@ -1099,7 +1126,7 @@ int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p,
 
        fdb = br_fdb_find(br, addr, vid);
        if (fdb && fdb->added_by_external_learn)
-               fdb_delete(br, fdb);
+               fdb_delete(br, fdb, swdev_notify);
        else
                err = -ENOENT;
 
index b4eed113d2ec89181e1451244b3a42afeed0e2c8..7a7fd672ccf2a3efbaea2ef464b09db193476b27 100644 (file)
@@ -274,8 +274,7 @@ void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
                struct net_bridge_port *port, *lport, *rport;
 
                lport = p ? p->port : NULL;
-               rport = rp ? hlist_entry(rp, struct net_bridge_port, rlist) :
-                            NULL;
+               rport = hlist_entry_safe(rp, struct net_bridge_port, rlist);
 
                if ((unsigned long)lport > (unsigned long)rport) {
                        port = lport;
index 82c1a6f430b37d8fb9b4713ad7ab370a3ef5b990..05e42d86882d69de07cdb79ec9d00e0f214e69cd 100644 (file)
@@ -64,7 +64,7 @@ static int port_cost(struct net_device *dev)
 
 
 /* Check for port carrier transitions. */
-void br_port_carrier_check(struct net_bridge_port *p)
+void br_port_carrier_check(struct net_bridge_port *p, bool *notified)
 {
        struct net_device *dev = p->dev;
        struct net_bridge *br = p->br;
@@ -73,16 +73,21 @@ void br_port_carrier_check(struct net_bridge_port *p)
            netif_running(dev) && netif_oper_up(dev))
                p->path_cost = port_cost(dev);
 
+       *notified = false;
        if (!netif_running(br->dev))
                return;
 
        spin_lock_bh(&br->lock);
        if (netif_running(dev) && netif_oper_up(dev)) {
-               if (p->state == BR_STATE_DISABLED)
+               if (p->state == BR_STATE_DISABLED) {
                        br_stp_enable_port(p);
+                       *notified = true;
+               }
        } else {
-               if (p->state != BR_STATE_DISABLED)
+               if (p->state != BR_STATE_DISABLED) {
                        br_stp_disable_port(p);
+                       *notified = true;
+               }
        }
        spin_unlock_bh(&br->lock);
 }
@@ -518,8 +523,8 @@ int br_add_if(struct net_bridge *br, struct net_device *dev,
                return -ELOOP;
        }
 
-       /* Device is already being bridged */
-       if (br_port_exists(dev))
+       /* Device has master upper dev */
+       if (netdev_master_upper_dev_get(dev))
                return -EBUSY;
 
        /* No bridging devices that dislike that (e.g. wireless) */
index a7cb3ece5031092f9df529864affdfafdfd1cf28..742f40aefdaffb10253dfeb4c7f59e9466efea5f 100644 (file)
@@ -553,9 +553,11 @@ int br_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
 int br_fdb_sync_static(struct net_bridge *br, struct net_bridge_port *p);
 void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p);
 int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
-                             const unsigned char *addr, u16 vid);
+                             const unsigned char *addr, u16 vid,
+                             bool swdev_notify);
 int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p,
-                             const unsigned char *addr, u16 vid);
+                             const unsigned char *addr, u16 vid,
+                             bool swdev_notify);
 void br_fdb_offloaded_set(struct net_bridge *br, struct net_bridge_port *p,
                          const unsigned char *addr, u16 vid);
 
@@ -573,7 +575,7 @@ void br_flood(struct net_bridge *br, struct sk_buff *skb,
              enum br_pkt_type pkt_type, bool local_rcv, bool local_orig);
 
 /* br_if.c */
-void br_port_carrier_check(struct net_bridge_port *p);
+void br_port_carrier_check(struct net_bridge_port *p, bool *notified);
 int br_add_bridge(struct net *net, const char *name);
 int br_del_bridge(struct net *net, const char *name);
 int br_add_if(struct net_bridge *br, struct net_device *dev,
@@ -594,11 +596,22 @@ static inline bool br_rx_handler_check_rcu(const struct net_device *dev)
        return rcu_dereference(dev->rx_handler) == br_handle_frame;
 }
 
+static inline bool br_rx_handler_check_rtnl(const struct net_device *dev)
+{
+       return rcu_dereference_rtnl(dev->rx_handler) == br_handle_frame;
+}
+
 static inline struct net_bridge_port *br_port_get_check_rcu(const struct net_device *dev)
 {
        return br_rx_handler_check_rcu(dev) ? br_port_get_rcu(dev) : NULL;
 }
 
+static inline struct net_bridge_port *
+br_port_get_check_rtnl(const struct net_device *dev)
+{
+       return br_rx_handler_check_rtnl(dev) ? br_port_get_rtnl_rcu(dev) : NULL;
+}
+
 /* br_ioctl.c */
 int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
 int br_ioctl_deviceless_stub(struct net *net, unsigned int cmd,
index ee775f4ff76c222fea3ffa2ad322eb9bd4013706..35474d49555d86ce5512a039fa2ae10a439b684c 100644 (file)
@@ -102,13 +102,15 @@ int br_switchdev_set_port_flag(struct net_bridge_port *p,
 
 static void
 br_switchdev_fdb_call_notifiers(bool adding, const unsigned char *mac,
-                               u16 vid, struct net_device *dev)
+                               u16 vid, struct net_device *dev,
+                               bool added_by_user)
 {
        struct switchdev_notifier_fdb_info info;
        unsigned long notifier_type;
 
        info.addr = mac;
        info.vid = vid;
+       info.added_by_user = added_by_user;
        notifier_type = adding ? SWITCHDEV_FDB_ADD_TO_DEVICE : SWITCHDEV_FDB_DEL_TO_DEVICE;
        call_switchdev_notifiers(notifier_type, dev, &info.info);
 }
@@ -116,19 +118,21 @@ br_switchdev_fdb_call_notifiers(bool adding, const unsigned char *mac,
 void
 br_switchdev_fdb_notify(const struct net_bridge_fdb_entry *fdb, int type)
 {
-       if (!fdb->added_by_user || !fdb->dst)
+       if (!fdb->dst)
                return;
 
        switch (type) {
        case RTM_DELNEIGH:
                br_switchdev_fdb_call_notifiers(false, fdb->key.addr.addr,
                                                fdb->key.vlan_id,
-                                               fdb->dst->dev);
+                                               fdb->dst->dev,
+                                               fdb->added_by_user);
                break;
        case RTM_NEWNEIGH:
                br_switchdev_fdb_call_notifiers(true, fdb->key.addr.addr,
                                                fdb->key.vlan_id,
-                                               fdb->dst->dev);
+                                               fdb->dst->dev,
+                                               fdb->added_by_user);
                break;
        }
 }
index 9896f4975353db00af2ebf7432633c5e84ff7f7d..dc832c0934c6cf65799296dc401f640a389fdd79 100644 (file)
@@ -1149,3 +1149,44 @@ void br_vlan_get_stats(const struct net_bridge_vlan *v,
                stats->tx_packets += txpackets;
        }
 }
+
+int br_vlan_get_pvid(const struct net_device *dev, u16 *p_pvid)
+{
+       struct net_bridge_vlan_group *vg;
+
+       ASSERT_RTNL();
+       if (netif_is_bridge_master(dev))
+               vg = br_vlan_group(netdev_priv(dev));
+       else
+               return -EINVAL;
+
+       *p_pvid = br_get_pvid(vg);
+       return 0;
+}
+EXPORT_SYMBOL_GPL(br_vlan_get_pvid);
+
+int br_vlan_get_info(const struct net_device *dev, u16 vid,
+                    struct bridge_vlan_info *p_vinfo)
+{
+       struct net_bridge_vlan_group *vg;
+       struct net_bridge_vlan *v;
+       struct net_bridge_port *p;
+
+       ASSERT_RTNL();
+       p = br_port_get_check_rtnl(dev);
+       if (p)
+               vg = nbp_vlan_group(p);
+       else if (netif_is_bridge_master(dev))
+               vg = br_vlan_group(netdev_priv(dev));
+       else
+               return -EINVAL;
+
+       v = br_vlan_find(vg, vid);
+       if (!v)
+               return -ENOENT;
+
+       p_vinfo->vid = vid;
+       p_vinfo->flags = v->flags;
+       return 0;
+}
+EXPORT_SYMBOL_GPL(br_vlan_get_info);
index f212447794bd5c0223af0194206415b28be88df3..9a0159aebe1ac3e79a34984168d66fbd0331aac0 100644 (file)
@@ -8,13 +8,6 @@ menuconfig NF_TABLES_BRIDGE
        bool "Ethernet Bridge nf_tables support"
 
 if NF_TABLES_BRIDGE
-
-config NFT_BRIDGE_META
-       tristate "Netfilter nf_table bridge meta support"
-       depends on NFT_META
-       help
-         Add support for bridge dedicated meta key.
-
 config NFT_BRIDGE_REJECT
        tristate "Netfilter nf_tables bridge reject support"
        depends on NFT_REJECT && NFT_REJECT_IPV4 && NFT_REJECT_IPV6
index 4bc758dd4a8c13e76fc3043d72510b7a8cda63ed..9b868861f21ad14b360bb3131ad7557a12b052de 100644 (file)
@@ -3,7 +3,6 @@
 # Makefile for the netfilter modules for Link Layer filtering on a bridge.
 #
 
-obj-$(CONFIG_NFT_BRIDGE_META)  += nft_meta_bridge.o
 obj-$(CONFIG_NFT_BRIDGE_REJECT)  += nft_reject_bridge.o
 
 # packet logging
index 47ba98db145dd4ff05017e1f779b03e42ee19e98..46c1fe7637ea8f5f2b68c83998664fd37f090d49 100644 (file)
@@ -161,8 +161,8 @@ static int ebt_stp_mt_check(const struct xt_mtchk_param *par)
        /* Make sure the match only receives stp frames */
        if (!par->nft_compat &&
            (!ether_addr_equal(e->destmac, eth_stp_addr) ||
-            !is_broadcast_ether_addr(e->destmsk) ||
-            !(e->bitmask & EBT_DESTMAC)))
+            !(e->bitmask & EBT_DESTMAC) ||
+            !is_broadcast_ether_addr(e->destmsk)))
                return -EINVAL;
 
        return 0;
index 032e0fe459408a734f8d10b326bf41f59e9f9606..b286ed5596c3eba8a9e4421919de0f27bfbf287b 100644 (file)
@@ -101,7 +101,7 @@ ebt_do_match(struct ebt_entry_match *m, const struct sk_buff *skb,
 {
        par->match     = m->u.match;
        par->matchinfo = m->data;
-       return m->u.match->match(skb, par) ? EBT_MATCH : EBT_NOMATCH;
+       return !m->u.match->match(skb, par);
 }
 
 static inline int
@@ -177,6 +177,12 @@ struct ebt_entry *ebt_next_entry(const struct ebt_entry *entry)
        return (void *)entry + entry->next_offset;
 }
 
+static inline const struct ebt_entry_target *
+ebt_get_target_c(const struct ebt_entry *e)
+{
+       return ebt_get_target((struct ebt_entry *)e);
+}
+
 /* Do some firewalling */
 unsigned int ebt_do_table(struct sk_buff *skb,
                          const struct nf_hook_state *state,
@@ -230,8 +236,7 @@ unsigned int ebt_do_table(struct sk_buff *skb,
                 */
                EBT_WATCHER_ITERATE(point, ebt_do_watcher, skb, &acpar);
 
-               t = (struct ebt_entry_target *)
-                  (((char *)point) + point->target_offset);
+               t = ebt_get_target_c(point);
                /* standard target */
                if (!t->u.target->target)
                        verdict = ((struct ebt_standard_target *)t)->verdict;
@@ -343,6 +348,16 @@ find_table_lock(struct net *net, const char *name, int *error,
                                "ebtable_", error, mutex);
 }
 
+static inline void ebt_free_table_info(struct ebt_table_info *info)
+{
+       int i;
+
+       if (info->chainstack) {
+               for_each_possible_cpu(i)
+                       vfree(info->chainstack[i]);
+               vfree(info->chainstack);
+       }
+}
 static inline int
 ebt_check_match(struct ebt_entry_match *m, struct xt_mtchk_param *par,
                unsigned int *cnt)
@@ -627,7 +642,7 @@ ebt_cleanup_entry(struct ebt_entry *e, struct net *net, unsigned int *cnt)
                return 1;
        EBT_WATCHER_ITERATE(e, ebt_cleanup_watcher, net, NULL);
        EBT_MATCH_ITERATE(e, ebt_cleanup_match, net, NULL);
-       t = (struct ebt_entry_target *)(((char *)e) + e->target_offset);
+       t = ebt_get_target(e);
 
        par.net      = net;
        par.target   = t->u.target;
@@ -706,7 +721,7 @@ ebt_check_entry(struct ebt_entry *e, struct net *net,
        ret = EBT_WATCHER_ITERATE(e, ebt_check_watcher, &tgpar, &j);
        if (ret != 0)
                goto cleanup_watchers;
-       t = (struct ebt_entry_target *)(((char *)e) + e->target_offset);
+       t = ebt_get_target(e);
        gap = e->next_offset - e->target_offset;
 
        target = xt_request_find_target(NFPROTO_BRIDGE, t->u.name, 0);
@@ -779,8 +794,7 @@ static int check_chainloops(const struct ebt_entries *chain, struct ebt_cl_stack
                        if (pos == nentries)
                                continue;
                }
-               t = (struct ebt_entry_target *)
-                  (((char *)e) + e->target_offset);
+               t = ebt_get_target_c(e);
                if (strcmp(t->u.name, EBT_STANDARD_TARGET))
                        goto letscontinue;
                if (e->target_offset + sizeof(struct ebt_standard_target) >
@@ -975,7 +989,7 @@ static void get_counters(const struct ebt_counter *oldcounters,
 static int do_replace_finish(struct net *net, struct ebt_replace *repl,
                              struct ebt_table_info *newinfo)
 {
-       int ret, i;
+       int ret;
        struct ebt_counter *counterstmp = NULL;
        /* used to be able to unlock earlier */
        struct ebt_table_info *table;
@@ -1051,13 +1065,8 @@ static int do_replace_finish(struct net *net, struct ebt_replace *repl,
                          ebt_cleanup_entry, net, NULL);
 
        vfree(table->entries);
-       if (table->chainstack) {
-               for_each_possible_cpu(i)
-                       vfree(table->chainstack[i]);
-               vfree(table->chainstack);
-       }
+       ebt_free_table_info(table);
        vfree(table);
-
        vfree(counterstmp);
 
 #ifdef CONFIG_AUDIT
@@ -1078,11 +1087,7 @@ static int do_replace_finish(struct net *net, struct ebt_replace *repl,
 free_counterstmp:
        vfree(counterstmp);
        /* can be initialized in translate_table() */
-       if (newinfo->chainstack) {
-               for_each_possible_cpu(i)
-                       vfree(newinfo->chainstack[i]);
-               vfree(newinfo->chainstack);
-       }
+       ebt_free_table_info(newinfo);
        return ret;
 }
 
@@ -1147,8 +1152,6 @@ static int do_replace(struct net *net, const void __user *user,
 
 static void __ebt_unregister_table(struct net *net, struct ebt_table *table)
 {
-       int i;
-
        mutex_lock(&ebt_mutex);
        list_del(&table->list);
        mutex_unlock(&ebt_mutex);
@@ -1157,11 +1160,7 @@ static void __ebt_unregister_table(struct net *net, struct ebt_table *table)
        if (table->private->nentries)
                module_put(table->me);
        vfree(table->private->entries);
-       if (table->private->chainstack) {
-               for_each_possible_cpu(i)
-                       vfree(table->private->chainstack[i]);
-               vfree(table->private->chainstack);
-       }
+       ebt_free_table_info(table->private);
        vfree(table->private);
        kfree(table);
 }
@@ -1263,11 +1262,7 @@ int ebt_register_table(struct net *net, const struct ebt_table *input_table,
 free_unlock:
        mutex_unlock(&ebt_mutex);
 free_chainstack:
-       if (newinfo->chainstack) {
-               for_each_possible_cpu(i)
-                       vfree(newinfo->chainstack[i]);
-               vfree(newinfo->chainstack);
-       }
+       ebt_free_table_info(newinfo);
        vfree(newinfo->entries);
 free_newinfo:
        vfree(newinfo);
@@ -1405,7 +1400,7 @@ static inline int ebt_entry_to_user(struct ebt_entry *e, const char *base,
                return -EFAULT;
 
        hlp = ubase + (((char *)e + e->target_offset) - base);
-       t = (struct ebt_entry_target *)(((char *)e) + e->target_offset);
+       t = ebt_get_target_c(e);
 
        ret = EBT_MATCH_ITERATE(e, ebt_match_to_user, base, ubase);
        if (ret != 0)
@@ -1746,7 +1741,7 @@ static int compat_copy_entry_to_user(struct ebt_entry *e, void __user **dstptr,
                return ret;
        target_offset = e->target_offset - (origsize - *size);
 
-       t = (struct ebt_entry_target *) ((char *) e + e->target_offset);
+       t = ebt_get_target(e);
 
        ret = compat_target_to_user(t, dstptr, size);
        if (ret)
@@ -1794,7 +1789,7 @@ static int compat_calc_entry(const struct ebt_entry *e,
        EBT_MATCH_ITERATE(e, compat_calc_match, &off);
        EBT_WATCHER_ITERATE(e, compat_calc_watcher, &off);
 
-       t = (const struct ebt_entry_target *) ((char *) e + e->target_offset);
+       t = ebt_get_target_c(e);
 
        off += xt_compat_target_offset(t->u.target);
        off += ebt_compat_entry_padsize();
@@ -1825,13 +1820,14 @@ static int compat_table_info(const struct ebt_table_info *info,
 {
        unsigned int size = info->entries_size;
        const void *entries = info->entries;
-       int ret;
 
        newinfo->entries_size = size;
-
-       ret = xt_compat_init_offsets(NFPROTO_BRIDGE, info->nentries);
-       if (ret)
-               return ret;
+       if (info->nentries) {
+               int ret = xt_compat_init_offsets(NFPROTO_BRIDGE,
+                                                info->nentries);
+               if (ret)
+                       return ret;
+       }
 
        return EBT_ENTRY_ITERATE(entries, size, compat_calc_entry, info,
                                                        entries, newinfo);
diff --git a/net/bridge/netfilter/nft_meta_bridge.c b/net/bridge/netfilter/nft_meta_bridge.c
deleted file mode 100644 (file)
index bb63c9a..0000000
+++ /dev/null
@@ -1,135 +0,0 @@
-/*
- * Copyright (c) 2014 Intel Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/netlink.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter/nf_tables.h>
-#include <net/netfilter/nf_tables.h>
-#include <net/netfilter/nft_meta.h>
-
-#include "../br_private.h"
-
-static void nft_meta_bridge_get_eval(const struct nft_expr *expr,
-                                    struct nft_regs *regs,
-                                    const struct nft_pktinfo *pkt)
-{
-       const struct nft_meta *priv = nft_expr_priv(expr);
-       const struct net_device *in = nft_in(pkt), *out = nft_out(pkt);
-       u32 *dest = &regs->data[priv->dreg];
-       const struct net_bridge_port *p;
-
-       switch (priv->key) {
-       case NFT_META_BRI_IIFNAME:
-               if (in == NULL || (p = br_port_get_rcu(in)) == NULL)
-                       goto err;
-               break;
-       case NFT_META_BRI_OIFNAME:
-               if (out == NULL || (p = br_port_get_rcu(out)) == NULL)
-                       goto err;
-               break;
-       default:
-               goto out;
-       }
-
-       strncpy((char *)dest, p->br->dev->name, IFNAMSIZ);
-       return;
-out:
-       return nft_meta_get_eval(expr, regs, pkt);
-err:
-       regs->verdict.code = NFT_BREAK;
-}
-
-static int nft_meta_bridge_get_init(const struct nft_ctx *ctx,
-                                   const struct nft_expr *expr,
-                                   const struct nlattr * const tb[])
-{
-       struct nft_meta *priv = nft_expr_priv(expr);
-       unsigned int len;
-
-       priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY]));
-       switch (priv->key) {
-       case NFT_META_BRI_IIFNAME:
-       case NFT_META_BRI_OIFNAME:
-               len = IFNAMSIZ;
-               break;
-       default:
-               return nft_meta_get_init(ctx, expr, tb);
-       }
-
-       priv->dreg = nft_parse_register(tb[NFTA_META_DREG]);
-       return nft_validate_register_store(ctx, priv->dreg, NULL,
-                                          NFT_DATA_VALUE, len);
-}
-
-static struct nft_expr_type nft_meta_bridge_type;
-static const struct nft_expr_ops nft_meta_bridge_get_ops = {
-       .type           = &nft_meta_bridge_type,
-       .size           = NFT_EXPR_SIZE(sizeof(struct nft_meta)),
-       .eval           = nft_meta_bridge_get_eval,
-       .init           = nft_meta_bridge_get_init,
-       .dump           = nft_meta_get_dump,
-};
-
-static const struct nft_expr_ops nft_meta_bridge_set_ops = {
-       .type           = &nft_meta_bridge_type,
-       .size           = NFT_EXPR_SIZE(sizeof(struct nft_meta)),
-       .eval           = nft_meta_set_eval,
-       .init           = nft_meta_set_init,
-       .destroy        = nft_meta_set_destroy,
-       .dump           = nft_meta_set_dump,
-       .validate       = nft_meta_set_validate,
-};
-
-static const struct nft_expr_ops *
-nft_meta_bridge_select_ops(const struct nft_ctx *ctx,
-                          const struct nlattr * const tb[])
-{
-       if (tb[NFTA_META_KEY] == NULL)
-               return ERR_PTR(-EINVAL);
-
-       if (tb[NFTA_META_DREG] && tb[NFTA_META_SREG])
-               return ERR_PTR(-EINVAL);
-
-       if (tb[NFTA_META_DREG])
-               return &nft_meta_bridge_get_ops;
-
-       if (tb[NFTA_META_SREG])
-               return &nft_meta_bridge_set_ops;
-
-       return ERR_PTR(-EINVAL);
-}
-
-static struct nft_expr_type nft_meta_bridge_type __read_mostly = {
-       .family         = NFPROTO_BRIDGE,
-       .name           = "meta",
-       .select_ops     = nft_meta_bridge_select_ops,
-       .policy         = nft_meta_policy,
-       .maxattr        = NFTA_META_MAX,
-       .owner          = THIS_MODULE,
-};
-
-static int __init nft_meta_bridge_module_init(void)
-{
-       return nft_register_expr(&nft_meta_bridge_type);
-}
-
-static void __exit nft_meta_bridge_module_exit(void)
-{
-       nft_unregister_expr(&nft_meta_bridge_type);
-}
-
-module_init(nft_meta_bridge_module_init);
-module_exit(nft_meta_bridge_module_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Tomasz Bursztyka <tomasz.bursztyka@linux.intel.com>");
-MODULE_ALIAS_NFT_AF_EXPR(AF_BRIDGE, "meta");
index fcb40c12b1f838e24b2f2a1a58b632522acab8b4..3b3d33ea9ed830373c08889f3e38d7bc695f313b 100644 (file)
@@ -2569,6 +2569,11 @@ static int try_write(struct ceph_connection *con)
        int ret = 1;
 
        dout("try_write start %p state %lu\n", con, con->state);
+       if (con->state != CON_STATE_PREOPEN &&
+           con->state != CON_STATE_CONNECTING &&
+           con->state != CON_STATE_NEGOTIATING &&
+           con->state != CON_STATE_OPEN)
+               return 0;
 
 more:
        dout("try_write out_kvec_bytes %d\n", con->out_kvec_bytes);
@@ -2594,6 +2599,8 @@ static int try_write(struct ceph_connection *con)
        }
 
 more_kvec:
+       BUG_ON(!con->sock);
+
        /* kvec data queued? */
        if (con->out_kvec_left) {
                ret = write_partial_kvec(con);
index b3dac24412d34cbe2e3616371db9fe9838ce550f..21ac6e3b96bba0f34625d4e8c92afd61042f7c87 100644 (file)
@@ -209,6 +209,14 @@ static void reopen_session(struct ceph_mon_client *monc)
        __open_session(monc);
 }
 
+static void un_backoff(struct ceph_mon_client *monc)
+{
+       monc->hunt_mult /= 2; /* reduce by 50% */
+       if (monc->hunt_mult < 1)
+               monc->hunt_mult = 1;
+       dout("%s hunt_mult now %d\n", __func__, monc->hunt_mult);
+}
+
 /*
  * Reschedule delayed work timer.
  */
@@ -963,6 +971,7 @@ static void delayed_work(struct work_struct *work)
                if (!monc->hunting) {
                        ceph_con_keepalive(&monc->con);
                        __validate_auth(monc);
+                       un_backoff(monc);
                }
 
                if (is_auth &&
@@ -1123,9 +1132,8 @@ static void finish_hunting(struct ceph_mon_client *monc)
                dout("%s found mon%d\n", __func__, monc->cur_mon);
                monc->hunting = false;
                monc->had_a_connection = true;
-               monc->hunt_mult /= 2; /* reduce by 50% */
-               if (monc->hunt_mult < 1)
-                       monc->hunt_mult = 1;
+               un_backoff(monc);
+               __schedule_delayed(monc);
        }
 }
 
index ea2a6c9fb7cef01b54eb86a1a7c580625feb4b1e..d2667e5dddc3acf4524ae280bee3ac832accdf0f 100644 (file)
@@ -157,10 +157,12 @@ static void ceph_osd_data_bio_init(struct ceph_osd_data *osd_data,
 #endif /* CONFIG_BLOCK */
 
 static void ceph_osd_data_bvecs_init(struct ceph_osd_data *osd_data,
-                                    struct ceph_bvec_iter *bvec_pos)
+                                    struct ceph_bvec_iter *bvec_pos,
+                                    u32 num_bvecs)
 {
        osd_data->type = CEPH_OSD_DATA_TYPE_BVECS;
        osd_data->bvec_pos = *bvec_pos;
+       osd_data->num_bvecs = num_bvecs;
 }
 
 #define osd_req_op_data(oreq, whch, typ, fld)                          \
@@ -237,6 +239,22 @@ void osd_req_op_extent_osd_data_bio(struct ceph_osd_request *osd_req,
 EXPORT_SYMBOL(osd_req_op_extent_osd_data_bio);
 #endif /* CONFIG_BLOCK */
 
+void osd_req_op_extent_osd_data_bvecs(struct ceph_osd_request *osd_req,
+                                     unsigned int which,
+                                     struct bio_vec *bvecs, u32 num_bvecs,
+                                     u32 bytes)
+{
+       struct ceph_osd_data *osd_data;
+       struct ceph_bvec_iter it = {
+               .bvecs = bvecs,
+               .iter = { .bi_size = bytes },
+       };
+
+       osd_data = osd_req_op_data(osd_req, which, extent, osd_data);
+       ceph_osd_data_bvecs_init(osd_data, &it, num_bvecs);
+}
+EXPORT_SYMBOL(osd_req_op_extent_osd_data_bvecs);
+
 void osd_req_op_extent_osd_data_bvec_pos(struct ceph_osd_request *osd_req,
                                         unsigned int which,
                                         struct ceph_bvec_iter *bvec_pos)
@@ -244,7 +262,7 @@ void osd_req_op_extent_osd_data_bvec_pos(struct ceph_osd_request *osd_req,
        struct ceph_osd_data *osd_data;
 
        osd_data = osd_req_op_data(osd_req, which, extent, osd_data);
-       ceph_osd_data_bvecs_init(osd_data, bvec_pos);
+       ceph_osd_data_bvecs_init(osd_data, bvec_pos, 0);
 }
 EXPORT_SYMBOL(osd_req_op_extent_osd_data_bvec_pos);
 
@@ -287,7 +305,8 @@ EXPORT_SYMBOL(osd_req_op_cls_request_data_pages);
 
 void osd_req_op_cls_request_data_bvecs(struct ceph_osd_request *osd_req,
                                       unsigned int which,
-                                      struct bio_vec *bvecs, u32 bytes)
+                                      struct bio_vec *bvecs, u32 num_bvecs,
+                                      u32 bytes)
 {
        struct ceph_osd_data *osd_data;
        struct ceph_bvec_iter it = {
@@ -296,7 +315,7 @@ void osd_req_op_cls_request_data_bvecs(struct ceph_osd_request *osd_req,
        };
 
        osd_data = osd_req_op_data(osd_req, which, cls, request_data);
-       ceph_osd_data_bvecs_init(osd_data, &it);
+       ceph_osd_data_bvecs_init(osd_data, &it, num_bvecs);
        osd_req->r_ops[which].cls.indata_len += bytes;
        osd_req->r_ops[which].indata_len += bytes;
 }
index 5ae7437d38538b64b209ad3700472c25afe0c3b4..7242cce5631bdcac0cddb9a44cee22720fdeef07 100644 (file)
@@ -377,7 +377,8 @@ static int compat_sock_setsockopt(struct socket *sock, int level, int optname,
            optname == SO_ATTACH_REUSEPORT_CBPF)
                return do_set_attach_filter(sock, level, optname,
                                            optval, optlen);
-       if (optname == SO_RCVTIMEO || optname == SO_SNDTIMEO)
+       if (!COMPAT_USE_64BIT_TIME &&
+           (optname == SO_RCVTIMEO || optname == SO_SNDTIMEO))
                return do_set_sock_timeout(sock, level, optname, optval, optlen);
 
        return sock_setsockopt(sock, level, optname, optval, optlen);
@@ -448,7 +449,8 @@ static int do_get_sock_timeout(struct socket *sock, int level, int optname,
 static int compat_sock_getsockopt(struct socket *sock, int level, int optname,
                                char __user *optval, int __user *optlen)
 {
-       if (optname == SO_RCVTIMEO || optname == SO_SNDTIMEO)
+       if (!COMPAT_USE_64BIT_TIME &&
+           (optname == SO_RCVTIMEO || optname == SO_SNDTIMEO))
                return do_get_sock_timeout(sock, level, optname, optval, optlen);
        return sock_getsockopt(sock, level, optname, optval, optlen);
 }
index c624a04dad1fbbd8ea7ad0055b2d47d11c6caa1b..1844d9bc571466d7d0116e45965793cd117cbdfb 100644 (file)
@@ -1587,7 +1587,7 @@ const char *netdev_cmd_to_name(enum netdev_cmd cmd)
        N(UDP_TUNNEL_DROP_INFO) N(CHANGE_TX_QUEUE_LEN)
        N(CVLAN_FILTER_PUSH_INFO) N(CVLAN_FILTER_DROP_INFO)
        N(SVLAN_FILTER_PUSH_INFO) N(SVLAN_FILTER_DROP_INFO)
-       };
+       }
 #undef N
        return "UNKNOWN_NETDEV_EVENT";
 }
@@ -1755,38 +1755,38 @@ int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
 EXPORT_SYMBOL(call_netdevice_notifiers);
 
 #ifdef CONFIG_NET_INGRESS
-static struct static_key ingress_needed __read_mostly;
+static DEFINE_STATIC_KEY_FALSE(ingress_needed_key);
 
 void net_inc_ingress_queue(void)
 {
-       static_key_slow_inc(&ingress_needed);
+       static_branch_inc(&ingress_needed_key);
 }
 EXPORT_SYMBOL_GPL(net_inc_ingress_queue);
 
 void net_dec_ingress_queue(void)
 {
-       static_key_slow_dec(&ingress_needed);
+       static_branch_dec(&ingress_needed_key);
 }
 EXPORT_SYMBOL_GPL(net_dec_ingress_queue);
 #endif
 
 #ifdef CONFIG_NET_EGRESS
-static struct static_key egress_needed __read_mostly;
+static DEFINE_STATIC_KEY_FALSE(egress_needed_key);
 
 void net_inc_egress_queue(void)
 {
-       static_key_slow_inc(&egress_needed);
+       static_branch_inc(&egress_needed_key);
 }
 EXPORT_SYMBOL_GPL(net_inc_egress_queue);
 
 void net_dec_egress_queue(void)
 {
-       static_key_slow_dec(&egress_needed);
+       static_branch_dec(&egress_needed_key);
 }
 EXPORT_SYMBOL_GPL(net_dec_egress_queue);
 #endif
 
-static struct static_key netstamp_needed __read_mostly;
+static DEFINE_STATIC_KEY_FALSE(netstamp_needed_key);
 #ifdef HAVE_JUMP_LABEL
 static atomic_t netstamp_needed_deferred;
 static atomic_t netstamp_wanted;
@@ -1797,9 +1797,9 @@ static void netstamp_clear(struct work_struct *work)
 
        wanted = atomic_add_return(deferred, &netstamp_wanted);
        if (wanted > 0)
-               static_key_enable(&netstamp_needed);
+               static_branch_enable(&netstamp_needed_key);
        else
-               static_key_disable(&netstamp_needed);
+               static_branch_disable(&netstamp_needed_key);
 }
 static DECLARE_WORK(netstamp_work, netstamp_clear);
 #endif
@@ -1819,7 +1819,7 @@ void net_enable_timestamp(void)
        atomic_inc(&netstamp_needed_deferred);
        schedule_work(&netstamp_work);
 #else
-       static_key_slow_inc(&netstamp_needed);
+       static_branch_inc(&netstamp_needed_key);
 #endif
 }
 EXPORT_SYMBOL(net_enable_timestamp);
@@ -1839,7 +1839,7 @@ void net_disable_timestamp(void)
        atomic_dec(&netstamp_needed_deferred);
        schedule_work(&netstamp_work);
 #else
-       static_key_slow_dec(&netstamp_needed);
+       static_branch_dec(&netstamp_needed_key);
 #endif
 }
 EXPORT_SYMBOL(net_disable_timestamp);
@@ -1847,15 +1847,15 @@ EXPORT_SYMBOL(net_disable_timestamp);
 static inline void net_timestamp_set(struct sk_buff *skb)
 {
        skb->tstamp = 0;
-       if (static_key_false(&netstamp_needed))
+       if (static_branch_unlikely(&netstamp_needed_key))
                __net_timestamp(skb);
 }
 
-#define net_timestamp_check(COND, SKB)                 \
-       if (static_key_false(&netstamp_needed)) {               \
-               if ((COND) && !(SKB)->tstamp)   \
-                       __net_timestamp(SKB);           \
-       }                                               \
+#define net_timestamp_check(COND, SKB)                         \
+       if (static_branch_unlikely(&netstamp_needed_key)) {     \
+               if ((COND) && !(SKB)->tstamp)                   \
+                       __net_timestamp(SKB);                   \
+       }                                                       \
 
 bool is_skb_forwardable(const struct net_device *dev, const struct sk_buff *skb)
 {
@@ -2125,7 +2125,7 @@ static bool remove_xps_queue_cpu(struct net_device *dev,
                int i, j;
 
                for (i = count, j = offset; i--; j++) {
-                       if (!remove_xps_queue(dev_maps, cpu, j))
+                       if (!remove_xps_queue(dev_maps, tci, j))
                                break;
                }
 
@@ -2615,17 +2615,16 @@ EXPORT_SYMBOL(netif_device_attach);
  * Returns a Tx hash based on the given packet descriptor a Tx queues' number
  * to be used as a distribution range.
  */
-u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb,
-                 unsigned int num_tx_queues)
+static u16 skb_tx_hash(const struct net_device *dev, struct sk_buff *skb)
 {
        u32 hash;
        u16 qoffset = 0;
-       u16 qcount = num_tx_queues;
+       u16 qcount = dev->real_num_tx_queues;
 
        if (skb_rx_queue_recorded(skb)) {
                hash = skb_get_rx_queue(skb);
-               while (unlikely(hash >= num_tx_queues))
-                       hash -= num_tx_queues;
+               while (unlikely(hash >= qcount))
+                       hash -= qcount;
                return hash;
        }
 
@@ -2638,7 +2637,6 @@ u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb,
 
        return (u16) reciprocal_scale(skb_get_hash(skb), qcount) + qoffset;
 }
-EXPORT_SYMBOL(__skb_tx_hash);
 
 static void skb_warn_bad_offload(const struct sk_buff *skb)
 {
@@ -3114,6 +3112,10 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device
        if (unlikely(!skb))
                goto out_null;
 
+       skb = sk_validate_xmit_skb(skb, dev);
+       if (unlikely(!skb))
+               goto out_null;
+
        if (netif_needs_gso(skb, features)) {
                struct sk_buff *segs;
 
@@ -3242,7 +3244,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
                        rc = NET_XMIT_DROP;
                } else {
                        rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
-                       __qdisc_run(q);
+                       qdisc_run(q);
                }
 
                if (unlikely(to_free))
@@ -3530,7 +3532,7 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
 #ifdef CONFIG_NET_CLS_ACT
        skb->tc_at_ingress = 0;
 # ifdef CONFIG_NET_EGRESS
-       if (static_key_false(&egress_needed)) {
+       if (static_branch_unlikely(&egress_needed_key)) {
                skb = sch_handle_egress(skb, &rc, dev);
                if (!skb)
                        goto out;
@@ -3625,6 +3627,44 @@ int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv)
 }
 EXPORT_SYMBOL(dev_queue_xmit_accel);
 
+int dev_direct_xmit(struct sk_buff *skb, u16 queue_id)
+{
+       struct net_device *dev = skb->dev;
+       struct sk_buff *orig_skb = skb;
+       struct netdev_queue *txq;
+       int ret = NETDEV_TX_BUSY;
+       bool again = false;
+
+       if (unlikely(!netif_running(dev) ||
+                    !netif_carrier_ok(dev)))
+               goto drop;
+
+       skb = validate_xmit_skb_list(skb, dev, &again);
+       if (skb != orig_skb)
+               goto drop;
+
+       skb_set_queue_mapping(skb, queue_id);
+       txq = skb_get_tx_queue(dev, skb);
+
+       local_bh_disable();
+
+       HARD_TX_LOCK(dev, txq, smp_processor_id());
+       if (!netif_xmit_frozen_or_drv_stopped(txq))
+               ret = netdev_start_xmit(skb, dev, txq, false);
+       HARD_TX_UNLOCK(dev, txq);
+
+       local_bh_enable();
+
+       if (!dev_xmit_complete(ret))
+               kfree_skb(skb);
+
+       return ret;
+drop:
+       atomic_long_inc(&dev->tx_dropped);
+       kfree_skb_list(skb);
+       return NET_XMIT_DROP;
+}
+EXPORT_SYMBOL(dev_direct_xmit);
 
 /*************************************************************************
  *                     Receiver routines
@@ -3994,12 +4034,12 @@ static struct netdev_rx_queue *netif_get_rxqueue(struct sk_buff *skb)
 }
 
 static u32 netif_receive_generic_xdp(struct sk_buff *skb,
+                                    struct xdp_buff *xdp,
                                     struct bpf_prog *xdp_prog)
 {
        struct netdev_rx_queue *rxqueue;
        void *orig_data, *orig_data_end;
        u32 metalen, act = XDP_DROP;
-       struct xdp_buff xdp;
        int hlen, off;
        u32 mac_len;
 
@@ -4034,19 +4074,19 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
         */
        mac_len = skb->data - skb_mac_header(skb);
        hlen = skb_headlen(skb) + mac_len;
-       xdp.data = skb->data - mac_len;
-       xdp.data_meta = xdp.data;
-       xdp.data_end = xdp.data + hlen;
-       xdp.data_hard_start = skb->data - skb_headroom(skb);
-       orig_data_end = xdp.data_end;
-       orig_data = xdp.data;
+       xdp->data = skb->data - mac_len;
+       xdp->data_meta = xdp->data;
+       xdp->data_end = xdp->data + hlen;
+       xdp->data_hard_start = skb->data - skb_headroom(skb);
+       orig_data_end = xdp->data_end;
+       orig_data = xdp->data;
 
        rxqueue = netif_get_rxqueue(skb);
-       xdp.rxq = &rxqueue->xdp_rxq;
+       xdp->rxq = &rxqueue->xdp_rxq;
 
-       act = bpf_prog_run_xdp(xdp_prog, &xdp);
+       act = bpf_prog_run_xdp(xdp_prog, xdp);
 
-       off = xdp.data - orig_data;
+       off = xdp->data - orig_data;
        if (off > 0)
                __skb_pull(skb, off);
        else if (off < 0)
@@ -4056,9 +4096,12 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
        /* check if bpf_xdp_adjust_tail was used. it can only "shrink"
         * pckt.
         */
-       off = orig_data_end - xdp.data_end;
-       if (off != 0)
-               skb_set_tail_pointer(skb, xdp.data_end - xdp.data);
+       off = orig_data_end - xdp->data_end;
+       if (off != 0) {
+               skb_set_tail_pointer(skb, xdp->data_end - xdp->data);
+               skb->len -= off;
+
+       }
 
        switch (act) {
        case XDP_REDIRECT:
@@ -4066,7 +4109,7 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
                __skb_push(skb, mac_len);
                break;
        case XDP_PASS:
-               metalen = xdp.data - xdp.data_meta;
+               metalen = xdp->data - xdp->data_meta;
                if (metalen)
                        skb_metadata_set(skb, metalen);
                break;
@@ -4111,22 +4154,24 @@ void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog)
 }
 EXPORT_SYMBOL_GPL(generic_xdp_tx);
 
-static struct static_key generic_xdp_needed __read_mostly;
+static DEFINE_STATIC_KEY_FALSE(generic_xdp_needed_key);
 
 int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb)
 {
        if (xdp_prog) {
-               u32 act = netif_receive_generic_xdp(skb, xdp_prog);
+               struct xdp_buff xdp;
+               u32 act;
                int err;
 
+               act = netif_receive_generic_xdp(skb, &xdp, xdp_prog);
                if (act != XDP_PASS) {
                        switch (act) {
                        case XDP_REDIRECT:
                                err = xdp_do_generic_redirect(skb->dev, skb,
-                                                             xdp_prog);
+                                                             &xdp, xdp_prog);
                                if (err)
                                        goto out_redir;
-                       /* fallthru to submit skb */
+                               break;
                        case XDP_TX:
                                generic_xdp_tx(skb, xdp_prog);
                                break;
@@ -4149,7 +4194,7 @@ static int netif_rx_internal(struct sk_buff *skb)
 
        trace_netif_rx(skb);
 
-       if (static_key_false(&generic_xdp_needed)) {
+       if (static_branch_unlikely(&generic_xdp_needed_key)) {
                int ret;
 
                preempt_disable();
@@ -4521,7 +4566,7 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)
 
 skip_taps:
 #ifdef CONFIG_NET_INGRESS
-       if (static_key_false(&ingress_needed)) {
+       if (static_branch_unlikely(&ingress_needed_key)) {
                skb = sch_handle_ingress(skb, &pt_prev, &ret, orig_dev);
                if (!skb)
                        goto out;
@@ -4681,9 +4726,9 @@ static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp)
                        bpf_prog_put(old);
 
                if (old && !new) {
-                       static_key_slow_dec(&generic_xdp_needed);
+                       static_branch_dec(&generic_xdp_needed_key);
                } else if (new && !old) {
-                       static_key_slow_inc(&generic_xdp_needed);
+                       static_branch_inc(&generic_xdp_needed_key);
                        dev_disable_lro(dev);
                        dev_disable_gro_hw(dev);
                }
@@ -4711,7 +4756,7 @@ static int netif_receive_skb_internal(struct sk_buff *skb)
        if (skb_defer_rx_timestamp(skb))
                return NET_RX_SUCCESS;
 
-       if (static_key_false(&generic_xdp_needed)) {
+       if (static_branch_unlikely(&generic_xdp_needed_key)) {
                int ret;
 
                preempt_disable();
@@ -7879,6 +7924,8 @@ int register_netdevice(struct net_device *dev)
        int ret;
        struct net *net = dev_net(dev);
 
+       BUILD_BUG_ON(sizeof(netdev_features_t) * BITS_PER_BYTE <
+                    NETDEV_FEATURE_COUNT);
        BUG_ON(dev_boot_phase);
        ASSERT_RTNL();
 
index ad1317376798cc79735e491b86f7f0e4790ac432..5c8a40e1a01ebb143516baaac6eecec55d996d26 100644 (file)
@@ -453,6 +453,27 @@ static void devlink_notify(struct devlink *devlink, enum devlink_command cmd)
                                msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
 }
 
+static int devlink_nl_port_attrs_put(struct sk_buff *msg,
+                                    struct devlink_port *devlink_port)
+{
+       struct devlink_port_attrs *attrs = &devlink_port->attrs;
+
+       if (!attrs->set)
+               return 0;
+       if (nla_put_u16(msg, DEVLINK_ATTR_PORT_FLAVOUR, attrs->flavour))
+               return -EMSGSIZE;
+       if (nla_put_u32(msg, DEVLINK_ATTR_PORT_NUMBER, attrs->port_number))
+               return -EMSGSIZE;
+       if (!attrs->split)
+               return 0;
+       if (nla_put_u32(msg, DEVLINK_ATTR_PORT_SPLIT_GROUP, attrs->port_number))
+               return -EMSGSIZE;
+       if (nla_put_u32(msg, DEVLINK_ATTR_PORT_SPLIT_SUBPORT_NUMBER,
+                       attrs->split_subport_number))
+               return -EMSGSIZE;
+       return 0;
+}
+
 static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink,
                                struct devlink_port *devlink_port,
                                enum devlink_command cmd, u32 portid,
@@ -492,9 +513,7 @@ static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink,
                                   ibdev->name))
                        goto nla_put_failure;
        }
-       if (devlink_port->split &&
-           nla_put_u32(msg, DEVLINK_ATTR_PORT_SPLIT_GROUP,
-                       devlink_port->split_group))
+       if (devlink_nl_port_attrs_put(msg, devlink_port))
                goto nla_put_failure;
 
        genlmsg_end(msg, hdr);
@@ -2971,19 +2990,64 @@ void devlink_port_type_clear(struct devlink_port *devlink_port)
 EXPORT_SYMBOL_GPL(devlink_port_type_clear);
 
 /**
- *     devlink_port_split_set - Set port is split
+ *     devlink_port_attrs_set - Set port attributes
  *
  *     @devlink_port: devlink port
- *     @split_group: split group - identifies group split port is part of
+ *     @flavour: flavour of the port
+ *     @port_number: number of the port that is facing user, for example
+ *                   the front panel port number
+ *     @split: indicates if this is split port
+ *     @split_subport_number: if the port is split, this is the number
+ *                            of subport.
  */
-void devlink_port_split_set(struct devlink_port *devlink_port,
-                           u32 split_group)
-{
-       devlink_port->split = true;
-       devlink_port->split_group = split_group;
+void devlink_port_attrs_set(struct devlink_port *devlink_port,
+                           enum devlink_port_flavour flavour,
+                           u32 port_number, bool split,
+                           u32 split_subport_number)
+{
+       struct devlink_port_attrs *attrs = &devlink_port->attrs;
+
+       attrs->set = true;
+       attrs->flavour = flavour;
+       attrs->port_number = port_number;
+       attrs->split = split;
+       attrs->split_subport_number = split_subport_number;
        devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
 }
-EXPORT_SYMBOL_GPL(devlink_port_split_set);
+EXPORT_SYMBOL_GPL(devlink_port_attrs_set);
+
+int devlink_port_get_phys_port_name(struct devlink_port *devlink_port,
+                                   char *name, size_t len)
+{
+       struct devlink_port_attrs *attrs = &devlink_port->attrs;
+       int n = 0;
+
+       if (!attrs->set)
+               return -EOPNOTSUPP;
+
+       switch (attrs->flavour) {
+       case DEVLINK_PORT_FLAVOUR_PHYSICAL:
+               if (!attrs->split)
+                       n = snprintf(name, len, "p%u", attrs->port_number);
+               else
+                       n = snprintf(name, len, "p%us%u", attrs->port_number,
+                                    attrs->split_subport_number);
+               break;
+       case DEVLINK_PORT_FLAVOUR_CPU:
+       case DEVLINK_PORT_FLAVOUR_DSA:
+               /* As CPU and DSA ports do not have a netdevice associated
+                * case should not ever happen.
+                */
+               WARN_ON(1);
+               return -EINVAL;
+       }
+
+       if (n >= len)
+               return -EINVAL;
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_port_get_phys_port_name);
 
 int devlink_sb_register(struct devlink *devlink, unsigned int sb_index,
                        u32 size, u16 ingress_pools_count,
index 03416e6dd5d7b0c2bb4dbfc805832e4fb67fba92..c15075dc7572cdd993e17e7ccc765c22c23bc75a 100644 (file)
@@ -92,6 +92,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
        [NETIF_F_GSO_PARTIAL_BIT] =      "tx-gso-partial",
        [NETIF_F_GSO_SCTP_BIT] =         "tx-sctp-segmentation",
        [NETIF_F_GSO_ESP_BIT] =          "tx-esp-segmentation",
+       [NETIF_F_GSO_UDP_L4_BIT] =       "tx-udp-segmentation",
 
        [NETIF_F_FCOE_CRC_BIT] =         "tx-checksum-fcoe-crc",
        [NETIF_F_SCTP_CRC_BIT] =        "tx-checksum-sctp",
@@ -109,6 +110,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
        [NETIF_F_HW_ESP_TX_CSUM_BIT] =   "esp-tx-csum-hw-offload",
        [NETIF_F_RX_UDP_TUNNEL_PORT_BIT] =       "rx-udp_tunnel-port-offload",
        [NETIF_F_HW_TLS_RECORD_BIT] =   "tls-hw-record",
+       [NETIF_F_HW_TLS_TX_BIT] =        "tls-hw-tx-offload",
 };
 
 static const char
@@ -210,23 +212,6 @@ static int ethtool_set_features(struct net_device *dev, void __user *useraddr)
        return ret;
 }
 
-static int phy_get_sset_count(struct phy_device *phydev)
-{
-       int ret;
-
-       if (phydev->drv->get_sset_count &&
-           phydev->drv->get_strings &&
-           phydev->drv->get_stats) {
-               mutex_lock(&phydev->lock);
-               ret = phydev->drv->get_sset_count(phydev);
-               mutex_unlock(&phydev->lock);
-
-               return ret;
-       }
-
-       return -EOPNOTSUPP;
-}
-
 static int __ethtool_get_sset_count(struct net_device *dev, int sset)
 {
        const struct ethtool_ops *ops = dev->ethtool_ops;
@@ -243,12 +228,9 @@ static int __ethtool_get_sset_count(struct net_device *dev, int sset)
        if (sset == ETH_SS_PHY_TUNABLES)
                return ARRAY_SIZE(phy_tunable_strings);
 
-       if (sset == ETH_SS_PHY_STATS) {
-               if (dev->phydev)
-                       return phy_get_sset_count(dev->phydev);
-               else
-                       return -EOPNOTSUPP;
-       }
+       if (sset == ETH_SS_PHY_STATS && dev->phydev &&
+           !ops->get_ethtool_phy_stats)
+               return phy_ethtool_get_sset_count(dev->phydev);
 
        if (ops->get_sset_count && ops->get_strings)
                return ops->get_sset_count(dev, sset);
@@ -271,17 +253,10 @@ static void __ethtool_get_strings(struct net_device *dev,
                memcpy(data, tunable_strings, sizeof(tunable_strings));
        else if (stringset == ETH_SS_PHY_TUNABLES)
                memcpy(data, phy_tunable_strings, sizeof(phy_tunable_strings));
-       else if (stringset == ETH_SS_PHY_STATS) {
-               struct phy_device *phydev = dev->phydev;
-
-               if (phydev) {
-                       mutex_lock(&phydev->lock);
-                       phydev->drv->get_strings(phydev, data);
-                       mutex_unlock(&phydev->lock);
-               } else {
-                       return;
-               }
-       } else
+       else if (stringset == ETH_SS_PHY_STATS && dev->phydev &&
+                !ops->get_ethtool_phy_stats)
+               phy_ethtool_get_strings(dev->phydev, data);
+       else
                /* ops->get_strings is valid because checked earlier */
                ops->get_strings(dev, stringset, data);
 }
@@ -1032,6 +1007,11 @@ static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev,
                info_size = sizeof(info);
                if (copy_from_user(&info, useraddr, info_size))
                        return -EFAULT;
+               /* Since malicious users may modify the original data,
+                * we need to check whether FLOW_RSS is still requested.
+                */
+               if (!(info.flow_type & FLOW_RSS))
+                       return -EINVAL;
        }
 
        if (info.cmd == ETHTOOL_GRXCLSRLALL) {
@@ -1993,15 +1973,19 @@ static int ethtool_get_stats(struct net_device *dev, void __user *useraddr)
 
 static int ethtool_get_phy_stats(struct net_device *dev, void __user *useraddr)
 {
-       struct ethtool_stats stats;
+       const struct ethtool_ops *ops = dev->ethtool_ops;
        struct phy_device *phydev = dev->phydev;
+       struct ethtool_stats stats;
        u64 *data;
        int ret, n_stats;
 
-       if (!phydev)
+       if (!phydev && (!ops->get_ethtool_phy_stats || !ops->get_sset_count))
                return -EOPNOTSUPP;
 
-       n_stats = phy_get_sset_count(phydev);
+       if (dev->phydev && !ops->get_ethtool_phy_stats)
+               n_stats = phy_ethtool_get_sset_count(dev->phydev);
+       else
+               n_stats = ops->get_sset_count(dev, ETH_SS_PHY_STATS);
        if (n_stats < 0)
                return n_stats;
        if (n_stats > S32_MAX / sizeof(u64))
@@ -2016,9 +2000,13 @@ static int ethtool_get_phy_stats(struct net_device *dev, void __user *useraddr)
        if (n_stats && !data)
                return -ENOMEM;
 
-       mutex_lock(&phydev->lock);
-       phydev->drv->get_stats(phydev, &stats, data);
-       mutex_unlock(&phydev->lock);
+       if (dev->phydev && !ops->get_ethtool_phy_stats) {
+               ret = phy_ethtool_get_stats(dev->phydev, &stats, data);
+               if (ret < 0)
+                       return ret;
+       } else {
+               ops->get_ethtool_phy_stats(dev, &stats, data);
+       }
 
        ret = -EFAULT;
        if (copy_to_user(useraddr, &stats, sizeof(stats)))
index 2271c80fd9675decba0526444af59109c6f785c4..126ffc5bc630cb412e4bcf1a48869ec6711fda54 100644 (file)
@@ -454,6 +454,27 @@ static struct fib_rule *rule_find(struct fib_rules_ops *ops,
        return NULL;
 }
 
+#ifdef CONFIG_NET_L3_MASTER_DEV
+static int fib_nl2rule_l3mdev(struct nlattr *nla, struct fib_rule *nlrule,
+                             struct netlink_ext_ack *extack)
+{
+       nlrule->l3mdev = nla_get_u8(nla);
+       if (nlrule->l3mdev != 1) {
+               NL_SET_ERR_MSG(extack, "Invalid l3mdev attribute");
+               return -1;
+       }
+
+       return 0;
+}
+#else
+static int fib_nl2rule_l3mdev(struct nlattr *nla, struct fib_rule *nlrule,
+                             struct netlink_ext_ack *extack)
+{
+       NL_SET_ERR_MSG(extack, "l3mdev support is not enabled in kernel");
+       return -1;
+}
+#endif
+
 static int fib_nl2rule(struct sk_buff *skb, struct nlmsghdr *nlh,
                       struct netlink_ext_ack *extack,
                       struct fib_rules_ops *ops,
@@ -536,16 +557,9 @@ static int fib_nl2rule(struct sk_buff *skb, struct nlmsghdr *nlh,
                nlrule->tun_id = nla_get_be64(tb[FRA_TUN_ID]);
 
        err = -EINVAL;
-       if (tb[FRA_L3MDEV]) {
-#ifdef CONFIG_NET_L3_MASTER_DEV
-               nlrule->l3mdev = nla_get_u8(tb[FRA_L3MDEV]);
-               if (nlrule->l3mdev != 1)
-#endif
-               {
-                       NL_SET_ERR_MSG(extack, "Invalid l3mdev");
-                       goto errout_free;
-               }
-       }
+       if (tb[FRA_L3MDEV] &&
+           fib_nl2rule_l3mdev(tb[FRA_L3MDEV], nlrule, extack) < 0)
+               goto errout_free;
 
        nlrule->action = frh->action;
        nlrule->flags = frh->flags;
index e25bc4a3aa1a144dd3b83d01ecbac3c05932b0e3..51ea7ddb2d8d140eca3565fb52a23ceaa9a8ce03 100644 (file)
 #include <net/sock_reuseport.h>
 #include <net/busy_poll.h>
 #include <net/tcp.h>
+#include <net/xfrm.h>
 #include <linux/bpf_trace.h>
+#include <net/xdp_sock.h>
+#include <linux/inetdevice.h>
+#include <net/ip_fib.h>
+#include <net/flow.h>
+#include <net/arp.h>
 
 /**
  *     sk_filter_trim_cap - run a packet through a socket filter
@@ -111,12 +117,12 @@ int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap)
 }
 EXPORT_SYMBOL(sk_filter_trim_cap);
 
-BPF_CALL_1(__skb_get_pay_offset, struct sk_buff *, skb)
+BPF_CALL_1(bpf_skb_get_pay_offset, struct sk_buff *, skb)
 {
        return skb_get_poff(skb);
 }
 
-BPF_CALL_3(__skb_get_nlattr, struct sk_buff *, skb, u32, a, u32, x)
+BPF_CALL_3(bpf_skb_get_nlattr, struct sk_buff *, skb, u32, a, u32, x)
 {
        struct nlattr *nla;
 
@@ -136,7 +142,7 @@ BPF_CALL_3(__skb_get_nlattr, struct sk_buff *, skb, u32, a, u32, x)
        return 0;
 }
 
-BPF_CALL_3(__skb_get_nlattr_nest, struct sk_buff *, skb, u32, a, u32, x)
+BPF_CALL_3(bpf_skb_get_nlattr_nest, struct sk_buff *, skb, u32, a, u32, x)
 {
        struct nlattr *nla;
 
@@ -160,13 +166,94 @@ BPF_CALL_3(__skb_get_nlattr_nest, struct sk_buff *, skb, u32, a, u32, x)
        return 0;
 }
 
-BPF_CALL_0(__get_raw_cpu_id)
+BPF_CALL_4(bpf_skb_load_helper_8, const struct sk_buff *, skb, const void *,
+          data, int, headlen, int, offset)
+{
+       u8 tmp, *ptr;
+       const int len = sizeof(tmp);
+
+       if (offset >= 0) {
+               if (headlen - offset >= len)
+                       return *(u8 *)(data + offset);
+               if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp)))
+                       return tmp;
+       } else {
+               ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len);
+               if (likely(ptr))
+                       return *(u8 *)ptr;
+       }
+
+       return -EFAULT;
+}
+
+BPF_CALL_2(bpf_skb_load_helper_8_no_cache, const struct sk_buff *, skb,
+          int, offset)
+{
+       return ____bpf_skb_load_helper_8(skb, skb->data, skb->len - skb->data_len,
+                                        offset);
+}
+
+BPF_CALL_4(bpf_skb_load_helper_16, const struct sk_buff *, skb, const void *,
+          data, int, headlen, int, offset)
+{
+       u16 tmp, *ptr;
+       const int len = sizeof(tmp);
+
+       if (offset >= 0) {
+               if (headlen - offset >= len)
+                       return get_unaligned_be16(data + offset);
+               if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp)))
+                       return be16_to_cpu(tmp);
+       } else {
+               ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len);
+               if (likely(ptr))
+                       return get_unaligned_be16(ptr);
+       }
+
+       return -EFAULT;
+}
+
+BPF_CALL_2(bpf_skb_load_helper_16_no_cache, const struct sk_buff *, skb,
+          int, offset)
+{
+       return ____bpf_skb_load_helper_16(skb, skb->data, skb->len - skb->data_len,
+                                         offset);
+}
+
+BPF_CALL_4(bpf_skb_load_helper_32, const struct sk_buff *, skb, const void *,
+          data, int, headlen, int, offset)
+{
+       u32 tmp, *ptr;
+       const int len = sizeof(tmp);
+
+       if (likely(offset >= 0)) {
+               if (headlen - offset >= len)
+                       return get_unaligned_be32(data + offset);
+               if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp)))
+                       return be32_to_cpu(tmp);
+       } else {
+               ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len);
+               if (likely(ptr))
+                       return get_unaligned_be32(ptr);
+       }
+
+       return -EFAULT;
+}
+
+BPF_CALL_2(bpf_skb_load_helper_32_no_cache, const struct sk_buff *, skb,
+          int, offset)
+{
+       return ____bpf_skb_load_helper_32(skb, skb->data, skb->len - skb->data_len,
+                                         offset);
+}
+
+BPF_CALL_0(bpf_get_raw_cpu_id)
 {
        return raw_smp_processor_id();
 }
 
 static const struct bpf_func_proto bpf_get_raw_smp_processor_id_proto = {
-       .func           = __get_raw_cpu_id,
+       .func           = bpf_get_raw_cpu_id,
        .gpl_only       = false,
        .ret_type       = RET_INTEGER,
 };
@@ -316,16 +403,16 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
                /* Emit call(arg1=CTX, arg2=A, arg3=X) */
                switch (fp->k) {
                case SKF_AD_OFF + SKF_AD_PAY_OFFSET:
-                       *insn = BPF_EMIT_CALL(__skb_get_pay_offset);
+                       *insn = BPF_EMIT_CALL(bpf_skb_get_pay_offset);
                        break;
                case SKF_AD_OFF + SKF_AD_NLATTR:
-                       *insn = BPF_EMIT_CALL(__skb_get_nlattr);
+                       *insn = BPF_EMIT_CALL(bpf_skb_get_nlattr);
                        break;
                case SKF_AD_OFF + SKF_AD_NLATTR_NEST:
-                       *insn = BPF_EMIT_CALL(__skb_get_nlattr_nest);
+                       *insn = BPF_EMIT_CALL(bpf_skb_get_nlattr_nest);
                        break;
                case SKF_AD_OFF + SKF_AD_CPU:
-                       *insn = BPF_EMIT_CALL(__get_raw_cpu_id);
+                       *insn = BPF_EMIT_CALL(bpf_get_raw_cpu_id);
                        break;
                case SKF_AD_OFF + SKF_AD_RANDOM:
                        *insn = BPF_EMIT_CALL(bpf_user_rnd_u32);
@@ -352,26 +439,87 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
        return true;
 }
 
+static bool convert_bpf_ld_abs(struct sock_filter *fp, struct bpf_insn **insnp)
+{
+       const bool unaligned_ok = IS_BUILTIN(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS);
+       int size = bpf_size_to_bytes(BPF_SIZE(fp->code));
+       bool endian = BPF_SIZE(fp->code) == BPF_H ||
+                     BPF_SIZE(fp->code) == BPF_W;
+       bool indirect = BPF_MODE(fp->code) == BPF_IND;
+       const int ip_align = NET_IP_ALIGN;
+       struct bpf_insn *insn = *insnp;
+       int offset = fp->k;
+
+       if (!indirect &&
+           ((unaligned_ok && offset >= 0) ||
+            (!unaligned_ok && offset >= 0 &&
+             offset + ip_align >= 0 &&
+             offset + ip_align % size == 0))) {
+               *insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_H);
+               *insn++ = BPF_ALU64_IMM(BPF_SUB, BPF_REG_TMP, offset);
+               *insn++ = BPF_JMP_IMM(BPF_JSLT, BPF_REG_TMP, size, 2 + endian);
+               *insn++ = BPF_LDX_MEM(BPF_SIZE(fp->code), BPF_REG_A, BPF_REG_D,
+                                     offset);
+               if (endian)
+                       *insn++ = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, size * 8);
+               *insn++ = BPF_JMP_A(8);
+       }
+
+       *insn++ = BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_CTX);
+       *insn++ = BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_D);
+       *insn++ = BPF_MOV64_REG(BPF_REG_ARG3, BPF_REG_H);
+       if (!indirect) {
+               *insn++ = BPF_MOV64_IMM(BPF_REG_ARG4, offset);
+       } else {
+               *insn++ = BPF_MOV64_REG(BPF_REG_ARG4, BPF_REG_X);
+               if (fp->k)
+                       *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_ARG4, offset);
+       }
+
+       switch (BPF_SIZE(fp->code)) {
+       case BPF_B:
+               *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_8);
+               break;
+       case BPF_H:
+               *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_16);
+               break;
+       case BPF_W:
+               *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_32);
+               break;
+       default:
+               return false;
+       }
+
+       *insn++ = BPF_JMP_IMM(BPF_JSGE, BPF_REG_A, 0, 2);
+       *insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_A);
+       *insn   = BPF_EXIT_INSN();
+
+       *insnp = insn;
+       return true;
+}
+
 /**
  *     bpf_convert_filter - convert filter program
  *     @prog: the user passed filter program
  *     @len: the length of the user passed filter program
  *     @new_prog: allocated 'struct bpf_prog' or NULL
  *     @new_len: pointer to store length of converted program
+ *     @seen_ld_abs: bool whether we've seen ld_abs/ind
  *
  * Remap 'sock_filter' style classic BPF (cBPF) instruction set to 'bpf_insn'
  * style extended BPF (eBPF).
  * Conversion workflow:
  *
  * 1) First pass for calculating the new program length:
- *   bpf_convert_filter(old_prog, old_len, NULL, &new_len)
+ *   bpf_convert_filter(old_prog, old_len, NULL, &new_len, &seen_ld_abs)
  *
  * 2) 2nd pass to remap in two passes: 1st pass finds new
  *    jump offsets, 2nd pass remapping:
- *   bpf_convert_filter(old_prog, old_len, new_prog, &new_len);
+ *   bpf_convert_filter(old_prog, old_len, new_prog, &new_len, &seen_ld_abs)
  */
 static int bpf_convert_filter(struct sock_filter *prog, int len,
-                             struct bpf_prog *new_prog, int *new_len)
+                             struct bpf_prog *new_prog, int *new_len,
+                             bool *seen_ld_abs)
 {
        int new_flen = 0, pass = 0, target, i, stack_off;
        struct bpf_insn *new_insn, *first_insn = NULL;
@@ -410,12 +558,27 @@ static int bpf_convert_filter(struct sock_filter *prog, int len,
                 * do this ourself. Initial CTX is present in BPF_REG_ARG1.
                 */
                *new_insn++ = BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1);
+               if (*seen_ld_abs) {
+                       /* For packet access in classic BPF, cache skb->data
+                        * in callee-saved BPF R8 and skb->len - skb->data_len
+                        * (headlen) in BPF R9. Since classic BPF is read-only
+                        * on CTX, we only need to cache it once.
+                        */
+                       *new_insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data),
+                                                 BPF_REG_D, BPF_REG_CTX,
+                                                 offsetof(struct sk_buff, data));
+                       *new_insn++ = BPF_LDX_MEM(BPF_W, BPF_REG_H, BPF_REG_CTX,
+                                                 offsetof(struct sk_buff, len));
+                       *new_insn++ = BPF_LDX_MEM(BPF_W, BPF_REG_TMP, BPF_REG_CTX,
+                                                 offsetof(struct sk_buff, data_len));
+                       *new_insn++ = BPF_ALU32_REG(BPF_SUB, BPF_REG_H, BPF_REG_TMP);
+               }
        } else {
                new_insn += 3;
        }
 
        for (i = 0; i < len; fp++, i++) {
-               struct bpf_insn tmp_insns[6] = { };
+               struct bpf_insn tmp_insns[32] = { };
                struct bpf_insn *insn = tmp_insns;
 
                if (addrs)
@@ -458,6 +621,11 @@ static int bpf_convert_filter(struct sock_filter *prog, int len,
                            BPF_MODE(fp->code) == BPF_ABS &&
                            convert_bpf_extensions(fp, &insn))
                                break;
+                       if (BPF_CLASS(fp->code) == BPF_LD &&
+                           convert_bpf_ld_abs(fp, &insn)) {
+                               *seen_ld_abs = true;
+                               break;
+                       }
 
                        if (fp->code == (BPF_ALU | BPF_DIV | BPF_X) ||
                            fp->code == (BPF_ALU | BPF_MOD | BPF_X)) {
@@ -481,11 +649,18 @@ static int bpf_convert_filter(struct sock_filter *prog, int len,
 
 #define BPF_EMIT_JMP                                                   \
        do {                                                            \
+               const s32 off_min = S16_MIN, off_max = S16_MAX;         \
+               s32 off;                                                \
+                                                                       \
                if (target >= len || target < 0)                        \
                        goto err;                                       \
-               insn->off = addrs ? addrs[target] - addrs[i] - 1 : 0;   \
+               off = addrs ? addrs[target] - addrs[i] - 1 : 0;         \
                /* Adjust pc relative offset for 2nd or 3rd insn. */    \
-               insn->off -= insn - tmp_insns;                          \
+               off -= insn - tmp_insns;                                \
+               /* Reject anything not fitting into insn->off. */       \
+               if (off < off_min || off > off_max)                     \
+                       goto err;                                       \
+               insn->off = off;                                        \
        } while (0)
 
                case BPF_JMP | BPF_JA:
@@ -560,21 +735,31 @@ static int bpf_convert_filter(struct sock_filter *prog, int len,
                        break;
 
                /* ldxb 4 * ([14] & 0xf) is remaped into 6 insns. */
-               case BPF_LDX | BPF_MSH | BPF_B:
-                       /* tmp = A */
-                       *insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_A);
+               case BPF_LDX | BPF_MSH | BPF_B: {
+                       struct sock_filter tmp = {
+                               .code   = BPF_LD | BPF_ABS | BPF_B,
+                               .k      = fp->k,
+                       };
+
+                       *seen_ld_abs = true;
+
+                       /* X = A */
+                       *insn++ = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A);
                        /* A = BPF_R0 = *(u8 *) (skb->data + K) */
-                       *insn++ = BPF_LD_ABS(BPF_B, fp->k);
+                       convert_bpf_ld_abs(&tmp, &insn);
+                       insn++;
                        /* A &= 0xf */
                        *insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 0xf);
                        /* A <<= 2 */
                        *insn++ = BPF_ALU32_IMM(BPF_LSH, BPF_REG_A, 2);
+                       /* tmp = X */
+                       *insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_X);
                        /* X = A */
                        *insn++ = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A);
                        /* A = tmp */
                        *insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_TMP);
                        break;
-
+               }
                /* RET_K is remaped into 2 insns. RET_A case doesn't need an
                 * extra mov as BPF_REG_0 is already mapped into BPF_REG_A.
                 */
@@ -656,6 +841,8 @@ static int bpf_convert_filter(struct sock_filter *prog, int len,
        if (!new_prog) {
                /* Only calculating new length. */
                *new_len = new_insn - first_insn;
+               if (*seen_ld_abs)
+                       *new_len += 4; /* Prologue bits. */
                return 0;
        }
 
@@ -1017,6 +1204,7 @@ static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp)
        struct sock_filter *old_prog;
        struct bpf_prog *old_fp;
        int err, new_len, old_len = fp->len;
+       bool seen_ld_abs = false;
 
        /* We are free to overwrite insns et al right here as it
         * won't be used at this point in time anymore internally
@@ -1038,7 +1226,8 @@ static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp)
        }
 
        /* 1st pass: calculate the new program length. */
-       err = bpf_convert_filter(old_prog, old_len, NULL, &new_len);
+       err = bpf_convert_filter(old_prog, old_len, NULL, &new_len,
+                                &seen_ld_abs);
        if (err)
                goto out_err_free;
 
@@ -1057,7 +1246,8 @@ static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp)
        fp->len = new_len;
 
        /* 2nd pass: remap sock_filter insns into bpf_insn insns. */
-       err = bpf_convert_filter(old_prog, old_len, fp, &new_len);
+       err = bpf_convert_filter(old_prog, old_len, fp, &new_len,
+                                &seen_ld_abs);
        if (err)
                /* 2nd bpf_convert_filter() can fail only if it fails
                 * to allocate memory, remapping must succeed. Note,
@@ -1505,6 +1695,47 @@ static const struct bpf_func_proto bpf_skb_load_bytes_proto = {
        .arg4_type      = ARG_CONST_SIZE,
 };
 
+BPF_CALL_5(bpf_skb_load_bytes_relative, const struct sk_buff *, skb,
+          u32, offset, void *, to, u32, len, u32, start_header)
+{
+       u8 *ptr;
+
+       if (unlikely(offset > 0xffff || len > skb_headlen(skb)))
+               goto err_clear;
+
+       switch (start_header) {
+       case BPF_HDR_START_MAC:
+               ptr = skb_mac_header(skb) + offset;
+               break;
+       case BPF_HDR_START_NET:
+               ptr = skb_network_header(skb) + offset;
+               break;
+       default:
+               goto err_clear;
+       }
+
+       if (likely(ptr >= skb_mac_header(skb) &&
+                  ptr + len <= skb_tail_pointer(skb))) {
+               memcpy(to, ptr, len);
+               return 0;
+       }
+
+err_clear:
+       memset(to, 0, len);
+       return -EFAULT;
+}
+
+static const struct bpf_func_proto bpf_skb_load_bytes_relative_proto = {
+       .func           = bpf_skb_load_bytes_relative,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_CTX,
+       .arg2_type      = ARG_ANYTHING,
+       .arg3_type      = ARG_PTR_TO_UNINIT_MEM,
+       .arg4_type      = ARG_CONST_SIZE,
+       .arg5_type      = ARG_ANYTHING,
+};
+
 BPF_CALL_2(bpf_skb_pull_data, struct sk_buff *, skb, u32, len)
 {
        /* Idea is the following: should the needed direct read/write
@@ -1850,6 +2081,33 @@ static const struct bpf_func_proto bpf_redirect_proto = {
        .arg2_type      = ARG_ANYTHING,
 };
 
+BPF_CALL_4(bpf_sk_redirect_hash, struct sk_buff *, skb,
+          struct bpf_map *, map, void *, key, u64, flags)
+{
+       struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
+
+       /* If user passes invalid input drop the packet. */
+       if (unlikely(flags & ~(BPF_F_INGRESS)))
+               return SK_DROP;
+
+       tcb->bpf.flags = flags;
+       tcb->bpf.sk_redir = __sock_hash_lookup_elem(map, key);
+       if (!tcb->bpf.sk_redir)
+               return SK_DROP;
+
+       return SK_PASS;
+}
+
+static const struct bpf_func_proto bpf_sk_redirect_hash_proto = {
+       .func           = bpf_sk_redirect_hash,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_CTX,
+       .arg2_type      = ARG_CONST_MAP_PTR,
+       .arg3_type      = ARG_PTR_TO_MAP_KEY,
+       .arg4_type      = ARG_ANYTHING,
+};
+
 BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb,
           struct bpf_map *, map, u32, key, u64, flags)
 {
@@ -1859,9 +2117,10 @@ BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb,
        if (unlikely(flags & ~(BPF_F_INGRESS)))
                return SK_DROP;
 
-       tcb->bpf.key = key;
        tcb->bpf.flags = flags;
-       tcb->bpf.map = map;
+       tcb->bpf.sk_redir = __sock_map_lookup_elem(map, key);
+       if (!tcb->bpf.sk_redir)
+               return SK_DROP;
 
        return SK_PASS;
 }
@@ -1869,16 +2128,8 @@ BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb,
 struct sock *do_sk_redirect_map(struct sk_buff *skb)
 {
        struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
-       struct sock *sk = NULL;
 
-       if (tcb->bpf.map) {
-               sk = __sock_map_lookup_elem(tcb->bpf.map, tcb->bpf.key);
-
-               tcb->bpf.key = 0;
-               tcb->bpf.map = NULL;
-       }
-
-       return sk;
+       return tcb->bpf.sk_redir;
 }
 
 static const struct bpf_func_proto bpf_sk_redirect_map_proto = {
@@ -1891,32 +2142,49 @@ static const struct bpf_func_proto bpf_sk_redirect_map_proto = {
        .arg4_type      = ARG_ANYTHING,
 };
 
-BPF_CALL_4(bpf_msg_redirect_map, struct sk_msg_buff *, msg,
-          struct bpf_map *, map, u32, key, u64, flags)
+BPF_CALL_4(bpf_msg_redirect_hash, struct sk_msg_buff *, msg,
+          struct bpf_map *, map, void *, key, u64, flags)
 {
        /* If user passes invalid input drop the packet. */
        if (unlikely(flags & ~(BPF_F_INGRESS)))
                return SK_DROP;
 
-       msg->key = key;
        msg->flags = flags;
-       msg->map = map;
+       msg->sk_redir = __sock_hash_lookup_elem(map, key);
+       if (!msg->sk_redir)
+               return SK_DROP;
 
        return SK_PASS;
 }
 
-struct sock *do_msg_redirect_map(struct sk_msg_buff *msg)
+static const struct bpf_func_proto bpf_msg_redirect_hash_proto = {
+       .func           = bpf_msg_redirect_hash,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_CTX,
+       .arg2_type      = ARG_CONST_MAP_PTR,
+       .arg3_type      = ARG_PTR_TO_MAP_KEY,
+       .arg4_type      = ARG_ANYTHING,
+};
+
+BPF_CALL_4(bpf_msg_redirect_map, struct sk_msg_buff *, msg,
+          struct bpf_map *, map, u32, key, u64, flags)
 {
-       struct sock *sk = NULL;
+       /* If user passes invalid input drop the packet. */
+       if (unlikely(flags & ~(BPF_F_INGRESS)))
+               return SK_DROP;
 
-       if (msg->map) {
-               sk = __sock_map_lookup_elem(msg->map, msg->key);
+       msg->flags = flags;
+       msg->sk_redir = __sock_map_lookup_elem(map, key);
+       if (!msg->sk_redir)
+               return SK_DROP;
 
-               msg->key = 0;
-               msg->map = NULL;
-       }
+       return SK_PASS;
+}
 
-       return sk;
+struct sock *do_msg_redirect_map(struct sk_msg_buff *msg)
+{
+       return msg->sk_redir;
 }
 
 static const struct bpf_func_proto bpf_msg_redirect_map_proto = {
@@ -2179,7 +2447,7 @@ BPF_CALL_3(bpf_skb_vlan_push, struct sk_buff *, skb, __be16, vlan_proto,
        return ret;
 }
 
-const struct bpf_func_proto bpf_skb_vlan_push_proto = {
+static const struct bpf_func_proto bpf_skb_vlan_push_proto = {
        .func           = bpf_skb_vlan_push,
        .gpl_only       = false,
        .ret_type       = RET_INTEGER,
@@ -2187,7 +2455,6 @@ const struct bpf_func_proto bpf_skb_vlan_push_proto = {
        .arg2_type      = ARG_ANYTHING,
        .arg3_type      = ARG_ANYTHING,
 };
-EXPORT_SYMBOL_GPL(bpf_skb_vlan_push_proto);
 
 BPF_CALL_1(bpf_skb_vlan_pop, struct sk_buff *, skb)
 {
@@ -2201,13 +2468,12 @@ BPF_CALL_1(bpf_skb_vlan_pop, struct sk_buff *, skb)
        return ret;
 }
 
-const struct bpf_func_proto bpf_skb_vlan_pop_proto = {
+static const struct bpf_func_proto bpf_skb_vlan_pop_proto = {
        .func           = bpf_skb_vlan_pop,
        .gpl_only       = false,
        .ret_type       = RET_INTEGER,
        .arg1_type      = ARG_PTR_TO_CTX,
 };
-EXPORT_SYMBOL_GPL(bpf_skb_vlan_pop_proto);
 
 static int bpf_skb_generic_push(struct sk_buff *skb, u32 off, u32 len)
 {
@@ -2800,7 +3066,8 @@ static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd,
 {
        int err;
 
-       if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
+       switch (map->map_type) {
+       case BPF_MAP_TYPE_DEVMAP: {
                struct net_device *dev = fwd;
                struct xdp_frame *xdpf;
 
@@ -2818,14 +3085,25 @@ static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd,
                if (err)
                        return err;
                __dev_map_insert_ctx(map, index);
-
-       } else if (map->map_type == BPF_MAP_TYPE_CPUMAP) {
+               break;
+       }
+       case BPF_MAP_TYPE_CPUMAP: {
                struct bpf_cpu_map_entry *rcpu = fwd;
 
                err = cpu_map_enqueue(rcpu, xdp, dev_rx);
                if (err)
                        return err;
                __cpu_map_insert_ctx(map, index);
+               break;
+       }
+       case BPF_MAP_TYPE_XSKMAP: {
+               struct xdp_sock *xs = fwd;
+
+               err = __xsk_map_redirect(map, xdp, xs);
+               return err;
+       }
+       default:
+               break;
        }
        return 0;
 }
@@ -2844,6 +3122,9 @@ void xdp_do_flush_map(void)
                case BPF_MAP_TYPE_CPUMAP:
                        __cpu_map_flush(map);
                        break;
+               case BPF_MAP_TYPE_XSKMAP:
+                       __xsk_map_flush(map);
+                       break;
                default:
                        break;
                }
@@ -2858,6 +3139,8 @@ static void *__xdp_map_lookup_elem(struct bpf_map *map, u32 index)
                return __dev_map_lookup_elem(map, index);
        case BPF_MAP_TYPE_CPUMAP:
                return __cpu_map_lookup_elem(map, index);
+       case BPF_MAP_TYPE_XSKMAP:
+               return __xsk_map_lookup_elem(map, index);
        default:
                return NULL;
        }
@@ -2955,13 +3238,14 @@ static int __xdp_generic_ok_fwd_dev(struct sk_buff *skb, struct net_device *fwd)
 
 static int xdp_do_generic_redirect_map(struct net_device *dev,
                                       struct sk_buff *skb,
+                                      struct xdp_buff *xdp,
                                       struct bpf_prog *xdp_prog)
 {
        struct redirect_info *ri = this_cpu_ptr(&redirect_info);
        unsigned long map_owner = ri->map_owner;
        struct bpf_map *map = ri->map;
-       struct net_device *fwd = NULL;
        u32 index = ri->ifindex;
+       void *fwd = NULL;
        int err = 0;
 
        ri->ifindex = 0;
@@ -2983,6 +3267,14 @@ static int xdp_do_generic_redirect_map(struct net_device *dev,
                if (unlikely((err = __xdp_generic_ok_fwd_dev(skb, fwd))))
                        goto err;
                skb->dev = fwd;
+               generic_xdp_tx(skb, xdp_prog);
+       } else if (map->map_type == BPF_MAP_TYPE_XSKMAP) {
+               struct xdp_sock *xs = fwd;
+
+               err = xsk_generic_rcv(xs, xdp);
+               if (err)
+                       goto err;
+               consume_skb(skb);
        } else {
                /* TODO: Handle BPF_MAP_TYPE_CPUMAP */
                err = -EBADRQC;
@@ -2997,7 +3289,7 @@ static int xdp_do_generic_redirect_map(struct net_device *dev,
 }
 
 int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
-                           struct bpf_prog *xdp_prog)
+                           struct xdp_buff *xdp, struct bpf_prog *xdp_prog)
 {
        struct redirect_info *ri = this_cpu_ptr(&redirect_info);
        u32 index = ri->ifindex;
@@ -3005,7 +3297,7 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
        int err = 0;
 
        if (ri->map)
-               return xdp_do_generic_redirect_map(dev, skb, xdp_prog);
+               return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog);
 
        ri->ifindex = 0;
        fwd = dev_get_by_index_rcu(dev_net(dev), index);
@@ -3019,6 +3311,7 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
 
        skb->dev = fwd;
        _trace_xdp_redirect(dev, xdp_prog, index);
+       generic_xdp_tx(skb, xdp_prog);
        return 0;
 err:
        _trace_xdp_redirect_err(dev, xdp_prog, index, err);
@@ -3280,6 +3573,7 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb,
        skb_dst_set(skb, (struct dst_entry *) md);
 
        info = &md->u.tun_info;
+       memset(info, 0, sizeof(*info));
        info->mode = IP_TUNNEL_INFO_TX;
 
        info->key.tun_flags = TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_NOCACHE;
@@ -3743,6 +4037,308 @@ static const struct bpf_func_proto bpf_bind_proto = {
        .arg3_type      = ARG_CONST_SIZE,
 };
 
+#ifdef CONFIG_XFRM
+BPF_CALL_5(bpf_skb_get_xfrm_state, struct sk_buff *, skb, u32, index,
+          struct bpf_xfrm_state *, to, u32, size, u64, flags)
+{
+       const struct sec_path *sp = skb_sec_path(skb);
+       const struct xfrm_state *x;
+
+       if (!sp || unlikely(index >= sp->len || flags))
+               goto err_clear;
+
+       x = sp->xvec[index];
+
+       if (unlikely(size != sizeof(struct bpf_xfrm_state)))
+               goto err_clear;
+
+       to->reqid = x->props.reqid;
+       to->spi = x->id.spi;
+       to->family = x->props.family;
+       if (to->family == AF_INET6) {
+               memcpy(to->remote_ipv6, x->props.saddr.a6,
+                      sizeof(to->remote_ipv6));
+       } else {
+               to->remote_ipv4 = x->props.saddr.a4;
+       }
+
+       return 0;
+err_clear:
+       memset(to, 0, size);
+       return -EINVAL;
+}
+
+static const struct bpf_func_proto bpf_skb_get_xfrm_state_proto = {
+       .func           = bpf_skb_get_xfrm_state,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_CTX,
+       .arg2_type      = ARG_ANYTHING,
+       .arg3_type      = ARG_PTR_TO_UNINIT_MEM,
+       .arg4_type      = ARG_CONST_SIZE,
+       .arg5_type      = ARG_ANYTHING,
+};
+#endif
+
+#if IS_ENABLED(CONFIG_INET) || IS_ENABLED(CONFIG_IPV6)
+static int bpf_fib_set_fwd_params(struct bpf_fib_lookup *params,
+                                 const struct neighbour *neigh,
+                                 const struct net_device *dev)
+{
+       memcpy(params->dmac, neigh->ha, ETH_ALEN);
+       memcpy(params->smac, dev->dev_addr, ETH_ALEN);
+       params->h_vlan_TCI = 0;
+       params->h_vlan_proto = 0;
+
+       return dev->ifindex;
+}
+#endif
+
+#if IS_ENABLED(CONFIG_INET)
+static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
+                              u32 flags)
+{
+       struct in_device *in_dev;
+       struct neighbour *neigh;
+       struct net_device *dev;
+       struct fib_result res;
+       struct fib_nh *nh;
+       struct flowi4 fl4;
+       int err;
+
+       dev = dev_get_by_index_rcu(net, params->ifindex);
+       if (unlikely(!dev))
+               return -ENODEV;
+
+       /* verify forwarding is enabled on this interface */
+       in_dev = __in_dev_get_rcu(dev);
+       if (unlikely(!in_dev || !IN_DEV_FORWARD(in_dev)))
+               return 0;
+
+       if (flags & BPF_FIB_LOOKUP_OUTPUT) {
+               fl4.flowi4_iif = 1;
+               fl4.flowi4_oif = params->ifindex;
+       } else {
+               fl4.flowi4_iif = params->ifindex;
+               fl4.flowi4_oif = 0;
+       }
+       fl4.flowi4_tos = params->tos & IPTOS_RT_MASK;
+       fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
+       fl4.flowi4_flags = 0;
+
+       fl4.flowi4_proto = params->l4_protocol;
+       fl4.daddr = params->ipv4_dst;
+       fl4.saddr = params->ipv4_src;
+       fl4.fl4_sport = params->sport;
+       fl4.fl4_dport = params->dport;
+
+       if (flags & BPF_FIB_LOOKUP_DIRECT) {
+               u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN;
+               struct fib_table *tb;
+
+               tb = fib_get_table(net, tbid);
+               if (unlikely(!tb))
+                       return 0;
+
+               err = fib_table_lookup(tb, &fl4, &res, FIB_LOOKUP_NOREF);
+       } else {
+               fl4.flowi4_mark = 0;
+               fl4.flowi4_secid = 0;
+               fl4.flowi4_tun_key.tun_id = 0;
+               fl4.flowi4_uid = sock_net_uid(net, NULL);
+
+               err = fib_lookup(net, &fl4, &res, FIB_LOOKUP_NOREF);
+       }
+
+       if (err || res.type != RTN_UNICAST)
+               return 0;
+
+       if (res.fi->fib_nhs > 1)
+               fib_select_path(net, &res, &fl4, NULL);
+
+       nh = &res.fi->fib_nh[res.nh_sel];
+
+       /* do not handle lwt encaps right now */
+       if (nh->nh_lwtstate)
+               return 0;
+
+       dev = nh->nh_dev;
+       if (unlikely(!dev))
+               return 0;
+
+       if (nh->nh_gw)
+               params->ipv4_dst = nh->nh_gw;
+
+       params->rt_metric = res.fi->fib_priority;
+
+       /* xdp and cls_bpf programs are run in RCU-bh so
+        * rcu_read_lock_bh is not needed here
+        */
+       neigh = __ipv4_neigh_lookup_noref(dev, (__force u32)params->ipv4_dst);
+       if (neigh)
+               return bpf_fib_set_fwd_params(params, neigh, dev);
+
+       return 0;
+}
+#endif
+
+#if IS_ENABLED(CONFIG_IPV6)
+static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
+                              u32 flags)
+{
+       struct in6_addr *src = (struct in6_addr *) params->ipv6_src;
+       struct in6_addr *dst = (struct in6_addr *) params->ipv6_dst;
+       struct neighbour *neigh;
+       struct net_device *dev;
+       struct inet6_dev *idev;
+       struct fib6_info *f6i;
+       struct flowi6 fl6;
+       int strict = 0;
+       int oif;
+
+       /* link local addresses are never forwarded */
+       if (rt6_need_strict(dst) || rt6_need_strict(src))
+               return 0;
+
+       dev = dev_get_by_index_rcu(net, params->ifindex);
+       if (unlikely(!dev))
+               return -ENODEV;
+
+       idev = __in6_dev_get_safely(dev);
+       if (unlikely(!idev || !net->ipv6.devconf_all->forwarding))
+               return 0;
+
+       if (flags & BPF_FIB_LOOKUP_OUTPUT) {
+               fl6.flowi6_iif = 1;
+               oif = fl6.flowi6_oif = params->ifindex;
+       } else {
+               oif = fl6.flowi6_iif = params->ifindex;
+               fl6.flowi6_oif = 0;
+               strict = RT6_LOOKUP_F_HAS_SADDR;
+       }
+       fl6.flowlabel = params->flowlabel;
+       fl6.flowi6_scope = 0;
+       fl6.flowi6_flags = 0;
+       fl6.mp_hash = 0;
+
+       fl6.flowi6_proto = params->l4_protocol;
+       fl6.daddr = *dst;
+       fl6.saddr = *src;
+       fl6.fl6_sport = params->sport;
+       fl6.fl6_dport = params->dport;
+
+       if (flags & BPF_FIB_LOOKUP_DIRECT) {
+               u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN;
+               struct fib6_table *tb;
+
+               tb = ipv6_stub->fib6_get_table(net, tbid);
+               if (unlikely(!tb))
+                       return 0;
+
+               f6i = ipv6_stub->fib6_table_lookup(net, tb, oif, &fl6, strict);
+       } else {
+               fl6.flowi6_mark = 0;
+               fl6.flowi6_secid = 0;
+               fl6.flowi6_tun_key.tun_id = 0;
+               fl6.flowi6_uid = sock_net_uid(net, NULL);
+
+               f6i = ipv6_stub->fib6_lookup(net, oif, &fl6, strict);
+       }
+
+       if (unlikely(IS_ERR_OR_NULL(f6i) || f6i == net->ipv6.fib6_null_entry))
+               return 0;
+
+       if (unlikely(f6i->fib6_flags & RTF_REJECT ||
+           f6i->fib6_type != RTN_UNICAST))
+               return 0;
+
+       if (f6i->fib6_nsiblings && fl6.flowi6_oif == 0)
+               f6i = ipv6_stub->fib6_multipath_select(net, f6i, &fl6,
+                                                      fl6.flowi6_oif, NULL,
+                                                      strict);
+
+       if (f6i->fib6_nh.nh_lwtstate)
+               return 0;
+
+       if (f6i->fib6_flags & RTF_GATEWAY)
+               *dst = f6i->fib6_nh.nh_gw;
+
+       dev = f6i->fib6_nh.nh_dev;
+       params->rt_metric = f6i->fib6_metric;
+
+       /* xdp and cls_bpf programs are run in RCU-bh so rcu_read_lock_bh is
+        * not needed here. Can not use __ipv6_neigh_lookup_noref here
+        * because we need to get nd_tbl via the stub
+        */
+       neigh = ___neigh_lookup_noref(ipv6_stub->nd_tbl, neigh_key_eq128,
+                                     ndisc_hashfn, dst, dev);
+       if (neigh)
+               return bpf_fib_set_fwd_params(params, neigh, dev);
+
+       return 0;
+}
+#endif
+
+BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx,
+          struct bpf_fib_lookup *, params, int, plen, u32, flags)
+{
+       if (plen < sizeof(*params))
+               return -EINVAL;
+
+       switch (params->family) {
+#if IS_ENABLED(CONFIG_INET)
+       case AF_INET:
+               return bpf_ipv4_fib_lookup(dev_net(ctx->rxq->dev), params,
+                                          flags);
+#endif
+#if IS_ENABLED(CONFIG_IPV6)
+       case AF_INET6:
+               return bpf_ipv6_fib_lookup(dev_net(ctx->rxq->dev), params,
+                                          flags);
+#endif
+       }
+       return 0;
+}
+
+static const struct bpf_func_proto bpf_xdp_fib_lookup_proto = {
+       .func           = bpf_xdp_fib_lookup,
+       .gpl_only       = true,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_CTX,
+       .arg2_type      = ARG_PTR_TO_MEM,
+       .arg3_type      = ARG_CONST_SIZE,
+       .arg4_type      = ARG_ANYTHING,
+};
+
+BPF_CALL_4(bpf_skb_fib_lookup, struct sk_buff *, skb,
+          struct bpf_fib_lookup *, params, int, plen, u32, flags)
+{
+       if (plen < sizeof(*params))
+               return -EINVAL;
+
+       switch (params->family) {
+#if IS_ENABLED(CONFIG_INET)
+       case AF_INET:
+               return bpf_ipv4_fib_lookup(dev_net(skb->dev), params, flags);
+#endif
+#if IS_ENABLED(CONFIG_IPV6)
+       case AF_INET6:
+               return bpf_ipv6_fib_lookup(dev_net(skb->dev), params, flags);
+#endif
+       }
+       return -ENOTSUPP;
+}
+
+static const struct bpf_func_proto bpf_skb_fib_lookup_proto = {
+       .func           = bpf_skb_fib_lookup,
+       .gpl_only       = true,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_CTX,
+       .arg2_type      = ARG_PTR_TO_MEM,
+       .arg3_type      = ARG_CONST_SIZE,
+       .arg4_type      = ARG_ANYTHING,
+};
+
 static const struct bpf_func_proto *
 bpf_base_func_proto(enum bpf_func_id func_id)
 {
@@ -3813,6 +4409,8 @@ sk_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
        switch (func_id) {
        case BPF_FUNC_skb_load_bytes:
                return &bpf_skb_load_bytes_proto;
+       case BPF_FUNC_skb_load_bytes_relative:
+               return &bpf_skb_load_bytes_relative_proto;
        case BPF_FUNC_get_socket_cookie:
                return &bpf_get_socket_cookie_proto;
        case BPF_FUNC_get_socket_uid:
@@ -3830,6 +4428,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
                return &bpf_skb_store_bytes_proto;
        case BPF_FUNC_skb_load_bytes:
                return &bpf_skb_load_bytes_proto;
+       case BPF_FUNC_skb_load_bytes_relative:
+               return &bpf_skb_load_bytes_relative_proto;
        case BPF_FUNC_skb_pull_data:
                return &bpf_skb_pull_data_proto;
        case BPF_FUNC_csum_diff:
@@ -3884,6 +4484,12 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
                return &bpf_get_socket_cookie_proto;
        case BPF_FUNC_get_socket_uid:
                return &bpf_get_socket_uid_proto;
+#ifdef CONFIG_XFRM
+       case BPF_FUNC_skb_get_xfrm_state:
+               return &bpf_skb_get_xfrm_state_proto;
+#endif
+       case BPF_FUNC_fib_lookup:
+               return &bpf_skb_fib_lookup_proto;
        default:
                return bpf_base_func_proto(func_id);
        }
@@ -3909,6 +4515,8 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
                return &bpf_xdp_redirect_map_proto;
        case BPF_FUNC_xdp_adjust_tail:
                return &bpf_xdp_adjust_tail_proto;
+       case BPF_FUNC_fib_lookup:
+               return &bpf_xdp_fib_lookup_proto;
        default:
                return bpf_base_func_proto(func_id);
        }
@@ -3953,6 +4561,8 @@ sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
                return &bpf_sock_ops_cb_flags_set_proto;
        case BPF_FUNC_sock_map_update:
                return &bpf_sock_map_update_proto;
+       case BPF_FUNC_sock_hash_update:
+               return &bpf_sock_hash_update_proto;
        default:
                return bpf_base_func_proto(func_id);
        }
@@ -3964,6 +4574,8 @@ sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
        switch (func_id) {
        case BPF_FUNC_msg_redirect_map:
                return &bpf_msg_redirect_map_proto;
+       case BPF_FUNC_msg_redirect_hash:
+               return &bpf_msg_redirect_hash_proto;
        case BPF_FUNC_msg_apply_bytes:
                return &bpf_msg_apply_bytes_proto;
        case BPF_FUNC_msg_cork_bytes:
@@ -3995,6 +4607,8 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
                return &bpf_get_socket_uid_proto;
        case BPF_FUNC_sk_redirect_map:
                return &bpf_sk_redirect_map_proto;
+       case BPF_FUNC_sk_redirect_hash:
+               return &bpf_sk_redirect_hash_proto;
        default:
                return bpf_base_func_proto(func_id);
        }
@@ -4255,6 +4869,41 @@ static int bpf_unclone_prologue(struct bpf_insn *insn_buf, bool direct_write,
        return insn - insn_buf;
 }
 
+static int bpf_gen_ld_abs(const struct bpf_insn *orig,
+                         struct bpf_insn *insn_buf)
+{
+       bool indirect = BPF_MODE(orig->code) == BPF_IND;
+       struct bpf_insn *insn = insn_buf;
+
+       /* We're guaranteed here that CTX is in R6. */
+       *insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_CTX);
+       if (!indirect) {
+               *insn++ = BPF_MOV64_IMM(BPF_REG_2, orig->imm);
+       } else {
+               *insn++ = BPF_MOV64_REG(BPF_REG_2, orig->src_reg);
+               if (orig->imm)
+                       *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, orig->imm);
+       }
+
+       switch (BPF_SIZE(orig->code)) {
+       case BPF_B:
+               *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_8_no_cache);
+               break;
+       case BPF_H:
+               *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_16_no_cache);
+               break;
+       case BPF_W:
+               *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_32_no_cache);
+               break;
+       }
+
+       *insn++ = BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 2);
+       *insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_0, BPF_REG_0);
+       *insn++ = BPF_EXIT_INSN();
+
+       return insn - insn_buf;
+}
+
 static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write,
                               const struct bpf_prog *prog)
 {
@@ -4313,8 +4962,15 @@ static bool xdp_is_valid_access(int off, int size,
                                const struct bpf_prog *prog,
                                struct bpf_insn_access_aux *info)
 {
-       if (type == BPF_WRITE)
+       if (type == BPF_WRITE) {
+               if (bpf_prog_is_dev_bound(prog->aux)) {
+                       switch (off) {
+                       case offsetof(struct xdp_md, rx_queue_index):
+                               return __is_valid_xdp_access(off, size);
+                       }
+               }
                return false;
+       }
 
        switch (off) {
        case offsetof(struct xdp_md, data):
@@ -5524,6 +6180,7 @@ const struct bpf_verifier_ops sk_filter_verifier_ops = {
        .get_func_proto         = sk_filter_func_proto,
        .is_valid_access        = sk_filter_is_valid_access,
        .convert_ctx_access     = bpf_convert_ctx_access,
+       .gen_ld_abs             = bpf_gen_ld_abs,
 };
 
 const struct bpf_prog_ops sk_filter_prog_ops = {
@@ -5535,6 +6192,7 @@ const struct bpf_verifier_ops tc_cls_act_verifier_ops = {
        .is_valid_access        = tc_cls_act_is_valid_access,
        .convert_ctx_access     = tc_cls_act_convert_ctx_access,
        .gen_prologue           = tc_cls_act_prologue,
+       .gen_ld_abs             = bpf_gen_ld_abs,
 };
 
 const struct bpf_prog_ops tc_cls_act_prog_ops = {
index d29f09bc5ff90c457513c46200310cb00dd0c188..4fc1e84d77ec1d8469903463e2226fe667d4ece1 100644 (file)
@@ -1253,7 +1253,7 @@ __u32 skb_get_hash_perturb(const struct sk_buff *skb, u32 perturb)
 EXPORT_SYMBOL(skb_get_hash_perturb);
 
 u32 __skb_get_poff(const struct sk_buff *skb, void *data,
-                  const struct flow_keys *keys, int hlen)
+                  const struct flow_keys_basic *keys, int hlen)
 {
        u32 poff = keys->control.thoff;
 
@@ -1314,9 +1314,9 @@ u32 __skb_get_poff(const struct sk_buff *skb, void *data,
  */
 u32 skb_get_poff(const struct sk_buff *skb)
 {
-       struct flow_keys keys;
+       struct flow_keys_basic keys;
 
-       if (!skb_flow_dissect_flow_keys(skb, &keys, 0))
+       if (!skb_flow_dissect_flow_keys_basic(skb, &keys, NULL, 0, 0, 0, 0))
                return 0;
 
        return __skb_get_poff(skb, skb->data, &keys, skb_headlen(skb));
@@ -1403,7 +1403,7 @@ static const struct flow_dissector_key flow_keys_dissector_symmetric_keys[] = {
        },
 };
 
-static const struct flow_dissector_key flow_keys_buf_dissector_keys[] = {
+static const struct flow_dissector_key flow_keys_basic_dissector_keys[] = {
        {
                .key_id = FLOW_DISSECTOR_KEY_CONTROL,
                .offset = offsetof(struct flow_keys, control),
@@ -1417,7 +1417,8 @@ static const struct flow_dissector_key flow_keys_buf_dissector_keys[] = {
 struct flow_dissector flow_keys_dissector __read_mostly;
 EXPORT_SYMBOL(flow_keys_dissector);
 
-struct flow_dissector flow_keys_buf_dissector __read_mostly;
+struct flow_dissector flow_keys_basic_dissector __read_mostly;
+EXPORT_SYMBOL(flow_keys_basic_dissector);
 
 static int __init init_default_flow_dissectors(void)
 {
@@ -1427,9 +1428,9 @@ static int __init init_default_flow_dissectors(void)
        skb_flow_dissector_init(&flow_keys_dissector_symmetric,
                                flow_keys_dissector_symmetric_keys,
                                ARRAY_SIZE(flow_keys_dissector_symmetric_keys));
-       skb_flow_dissector_init(&flow_keys_buf_dissector,
-                               flow_keys_buf_dissector_keys,
-                               ARRAY_SIZE(flow_keys_buf_dissector_keys));
+       skb_flow_dissector_init(&flow_keys_basic_dissector,
+                               flow_keys_basic_dissector_keys,
+                               ARRAY_SIZE(flow_keys_basic_dissector_keys));
        return 0;
 }
 
index ce519861be5992b504c519ca36edb05d2ecc02b7..5afae29367c1b75b80b7cdb4194b66a0fd49b487 100644 (file)
@@ -820,7 +820,8 @@ static void neigh_periodic_work(struct work_struct *work)
                        write_lock(&n->lock);
 
                        state = n->nud_state;
-                       if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
+                       if ((state & (NUD_PERMANENT | NUD_IN_TIMER)) ||
+                           (n->flags & NTF_EXT_LEARNED)) {
                                write_unlock(&n->lock);
                                goto next_elt;
                        }
@@ -1136,6 +1137,8 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
        if (neigh->dead)
                goto out;
 
+       neigh_update_ext_learned(neigh, flags, &notify);
+
        if (!(new & NUD_VALID)) {
                neigh_del_timer(neigh);
                if (old & NUD_CONNECTED)
@@ -1781,6 +1784,9 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
                        flags &= ~NEIGH_UPDATE_F_OVERRIDE;
        }
 
+       if (ndm->ndm_flags & NTF_EXT_LEARNED)
+               flags |= NEIGH_UPDATE_F_EXT_LEARNED;
+
        if (ndm->ndm_flags & NTF_USE) {
                neigh_event_send(neigh, NULL);
                err = 0;
index ff49e352deea1d07049f5c5ac425fbcdb4fd96a8..c642304f178ce0a4e1358d59e45032a39f76fb3f 100644 (file)
@@ -1305,7 +1305,7 @@ static void skb_headers_offset_update(struct sk_buff *skb, int off)
        skb->inner_mac_header += off;
 }
 
-static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
+void skb_copy_header(struct sk_buff *new, const struct sk_buff *old)
 {
        __copy_skb_header(new, old);
 
@@ -1313,6 +1313,7 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
        skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs;
        skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type;
 }
+EXPORT_SYMBOL(skb_copy_header);
 
 static inline int skb_alloc_rx_flag(const struct sk_buff *skb)
 {
@@ -1355,7 +1356,7 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask)
 
        BUG_ON(skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len));
 
-       copy_skb_header(n, skb);
+       skb_copy_header(n, skb);
        return n;
 }
 EXPORT_SYMBOL(skb_copy);
@@ -1419,7 +1420,7 @@ struct sk_buff *__pskb_copy_fclone(struct sk_buff *skb, int headroom,
                skb_clone_fraglist(n);
        }
 
-       copy_skb_header(n, skb);
+       skb_copy_header(n, skb);
 out:
        return n;
 }
@@ -1599,7 +1600,7 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
        BUG_ON(skb_copy_bits(skb, -head_copy_len, n->head + head_copy_off,
                             skb->len + head_copy_len));
 
-       copy_skb_header(n, skb);
+       skb_copy_header(n, skb);
 
        skb_headers_offset_update(n, newheadroom - oldheadroom);
 
@@ -4940,6 +4941,8 @@ static unsigned int skb_gso_transport_seglen(const struct sk_buff *skb)
                thlen = tcp_hdrlen(skb);
        } else if (unlikely(skb_is_gso_sctp(skb))) {
                thlen = sizeof(struct sctphdr);
+       } else if (shinfo->gso_type & SKB_GSO_UDP_L4) {
+               thlen = sizeof(struct udphdr);
        }
        /* UFO sets gso_size to the size of the fragmentation
         * payload, i.e. the size of the L4 (UDP) header is already
index b2c3db169ca1892c4d624fc5e30af12f4eed0adb..435a0ba85e52d0cca5004bb2c8a75229cc14a584 100644 (file)
@@ -226,7 +226,8 @@ static struct lock_class_key af_family_kern_slock_keys[AF_MAX];
   x "AF_RXRPC" ,       x "AF_ISDN"     ,       x "AF_PHONET"   , \
   x "AF_IEEE802154",   x "AF_CAIF"     ,       x "AF_ALG"      , \
   x "AF_NFC"   ,       x "AF_VSOCK"    ,       x "AF_KCM"      , \
-  x "AF_QIPCRTR",      x "AF_SMC"      ,       x "AF_MAX"
+  x "AF_QIPCRTR",      x "AF_SMC"      ,       x "AF_XDP"      , \
+  x "AF_MAX"
 
 static const char *const af_family_key_strings[AF_MAX+1] = {
        _sock_locks("sk_lock-")
@@ -262,7 +263,8 @@ static const char *const af_family_rlock_key_strings[AF_MAX+1] = {
   "rlock-AF_RXRPC" , "rlock-AF_ISDN"     , "rlock-AF_PHONET"   ,
   "rlock-AF_IEEE802154", "rlock-AF_CAIF" , "rlock-AF_ALG"      ,
   "rlock-AF_NFC"   , "rlock-AF_VSOCK"    , "rlock-AF_KCM"      ,
-  "rlock-AF_QIPCRTR", "rlock-AF_SMC"     , "rlock-AF_MAX"
+  "rlock-AF_QIPCRTR", "rlock-AF_SMC"     , "rlock-AF_XDP"      ,
+  "rlock-AF_MAX"
 };
 static const char *const af_family_wlock_key_strings[AF_MAX+1] = {
   "wlock-AF_UNSPEC", "wlock-AF_UNIX"     , "wlock-AF_INET"     ,
@@ -279,7 +281,8 @@ static const char *const af_family_wlock_key_strings[AF_MAX+1] = {
   "wlock-AF_RXRPC" , "wlock-AF_ISDN"     , "wlock-AF_PHONET"   ,
   "wlock-AF_IEEE802154", "wlock-AF_CAIF" , "wlock-AF_ALG"      ,
   "wlock-AF_NFC"   , "wlock-AF_VSOCK"    , "wlock-AF_KCM"      ,
-  "wlock-AF_QIPCRTR", "wlock-AF_SMC"     , "wlock-AF_MAX"
+  "wlock-AF_QIPCRTR", "wlock-AF_SMC"     , "wlock-AF_XDP"      ,
+  "wlock-AF_MAX"
 };
 static const char *const af_family_elock_key_strings[AF_MAX+1] = {
   "elock-AF_UNSPEC", "elock-AF_UNIX"     , "elock-AF_INET"     ,
@@ -296,7 +299,8 @@ static const char *const af_family_elock_key_strings[AF_MAX+1] = {
   "elock-AF_RXRPC" , "elock-AF_ISDN"     , "elock-AF_PHONET"   ,
   "elock-AF_IEEE802154", "elock-AF_CAIF" , "elock-AF_ALG"      ,
   "elock-AF_NFC"   , "elock-AF_VSOCK"    , "elock-AF_KCM"      ,
-  "elock-AF_QIPCRTR", "elock-AF_SMC"     , "elock-AF_MAX"
+  "elock-AF_QIPCRTR", "elock-AF_SMC"     , "elock-AF_XDP"      ,
+  "elock-AF_MAX"
 };
 
 /*
@@ -323,8 +327,8 @@ EXPORT_SYMBOL(sysctl_optmem_max);
 
 int sysctl_tstamp_allow_data __read_mostly = 1;
 
-struct static_key memalloc_socks = STATIC_KEY_INIT_FALSE;
-EXPORT_SYMBOL_GPL(memalloc_socks);
+DEFINE_STATIC_KEY_FALSE(memalloc_socks_key);
+EXPORT_SYMBOL_GPL(memalloc_socks_key);
 
 /**
  * sk_set_memalloc - sets %SOCK_MEMALLOC
@@ -338,7 +342,7 @@ void sk_set_memalloc(struct sock *sk)
 {
        sock_set_flag(sk, SOCK_MEMALLOC);
        sk->sk_allocation |= __GFP_MEMALLOC;
-       static_key_slow_inc(&memalloc_socks);
+       static_branch_inc(&memalloc_socks_key);
 }
 EXPORT_SYMBOL_GPL(sk_set_memalloc);
 
@@ -346,7 +350,7 @@ void sk_clear_memalloc(struct sock *sk)
 {
        sock_reset_flag(sk, SOCK_MEMALLOC);
        sk->sk_allocation &= ~__GFP_MEMALLOC;
-       static_key_slow_dec(&memalloc_socks);
+       static_branch_dec(&memalloc_socks_key);
 
        /*
         * SOCK_MEMALLOC is allowed to ignore rmem limits to ensure forward
@@ -1609,7 +1613,7 @@ static void __sk_free(struct sock *sk)
        if (likely(sk->sk_net_refcnt))
                sock_inuse_add(sock_net(sk), -1);
 
-       if (unlikely(sock_diag_has_destroy_listeners(sk) && sk->sk_net_refcnt))
+       if (unlikely(sk->sk_net_refcnt && sock_diag_has_destroy_listeners(sk)))
                sock_diag_broadcast_destroy(sk);
        else
                sk_destruct(sk);
index 0c86b53a3a63104c482fd3abfe5027bddbeeb99e..bf6758f7433951e56659f8e84fc5a33e1d2de432 100644 (file)
@@ -308,11 +308,9 @@ int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
 }
 EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model);
 
-void xdp_return_frame(struct xdp_frame *xdpf)
+static void xdp_return(void *data, struct xdp_mem_info *mem)
 {
-       struct xdp_mem_info *mem = &xdpf->mem;
        struct xdp_mem_allocator *xa;
-       void *data = xdpf->data;
        struct page *page;
 
        switch (mem->type) {
@@ -339,4 +337,15 @@ void xdp_return_frame(struct xdp_frame *xdpf)
                break;
        }
 }
+
+void xdp_return_frame(struct xdp_frame *xdpf)
+{
+       xdp_return(xdpf->data, &xdpf->mem);
+}
 EXPORT_SYMBOL_GPL(xdp_return_frame);
+
+void xdp_return_buff(struct xdp_buff *xdp)
+{
+       xdp_return(xdp->data, &xdp->rxq->mem);
+}
+EXPORT_SYMBOL_GPL(xdp_return_buff);
index 92d016e87816e3bc40a629691d0db2a6915d2c28..385f153fe0318a160120e98a093260a55e485317 100644 (file)
@@ -126,6 +126,16 @@ static void ccid2_change_l_seq_window(struct sock *sk, u64 val)
                                                  DCCPF_SEQ_WMAX));
 }
 
+static void dccp_tasklet_schedule(struct sock *sk)
+{
+       struct tasklet_struct *t = &dccp_sk(sk)->dccps_xmitlet;
+
+       if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
+               sock_hold(sk);
+               __tasklet_schedule(t);
+       }
+}
+
 static void ccid2_hc_tx_rto_expire(struct timer_list *t)
 {
        struct ccid2_hc_tx_sock *hc = from_timer(hc, t, tx_rtotimer);
@@ -166,7 +176,7 @@ static void ccid2_hc_tx_rto_expire(struct timer_list *t)
 
        /* if we were blocked before, we may now send cwnd=1 packet */
        if (sender_was_blocked)
-               tasklet_schedule(&dccp_sk(sk)->dccps_xmitlet);
+               dccp_tasklet_schedule(sk);
        /* restart backed-off timer */
        sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
 out:
@@ -706,7 +716,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 done:
        /* check if incoming Acks allow pending packets to be sent */
        if (sender_was_blocked && !ccid2_cwnd_network_limited(hc))
-               tasklet_schedule(&dccp_sk(sk)->dccps_xmitlet);
+               dccp_tasklet_schedule(sk);
        dccp_ackvec_parsed_cleanup(&hc->tx_av_chunks);
 }
 
index b50a8732ff434db3d015202ddd8c06d7edfc88cf..1501a20a94ca0c6084d81fcdfa1ec6b630ad6143 100644 (file)
@@ -232,6 +232,7 @@ static void dccp_write_xmitlet(unsigned long data)
        else
                dccp_write_xmit(sk);
        bh_unlock_sock(sk);
+       sock_put(sk);
 }
 
 static void dccp_write_xmit_timer(struct timer_list *t)
@@ -240,7 +241,6 @@ static void dccp_write_xmit_timer(struct timer_list *t)
        struct sock *sk = &dp->dccps_inet_connection.icsk_inet.sk;
 
        dccp_write_xmitlet((unsigned long)sk);
-       sock_put(sk);
 }
 
 void dccp_init_xmit_timers(struct sock *sk)
index bbf2c82cf7b29a2fdc727b0304208f5271486d9e..4183e4ba27a50c3cf52bff9756d0552da612d619 100644 (file)
@@ -9,7 +9,7 @@ config NET_DSA
        depends on HAVE_NET_DSA && MAY_USE_DEVLINK
        depends on BRIDGE || BRIDGE=n
        select NET_SWITCHDEV
-       select PHYLIB
+       select PHYLINK
        ---help---
          Say Y if you want to enable support for the hardware switches supported
          by the Distributed Switch Architecture.
index adf50fbc4c13e7de8baa63881f2b5fc5314dc874..dc5d9af3dc80d29a8a2bc9bca37c349bd99d2e2a 100644 (file)
@@ -258,11 +258,13 @@ static void dsa_tree_teardown_default_cpu(struct dsa_switch_tree *dst)
 static int dsa_port_setup(struct dsa_port *dp)
 {
        struct dsa_switch *ds = dp->ds;
-       int err;
+       int err = 0;
 
        memset(&dp->devlink_port, 0, sizeof(dp->devlink_port));
 
-       err = devlink_port_register(ds->devlink, &dp->devlink_port, dp->index);
+       if (dp->type != DSA_PORT_TYPE_UNUSED)
+               err = devlink_port_register(ds->devlink, &dp->devlink_port,
+                                           dp->index);
        if (err)
                return err;
 
@@ -270,7 +272,28 @@ static int dsa_port_setup(struct dsa_port *dp)
        case DSA_PORT_TYPE_UNUSED:
                break;
        case DSA_PORT_TYPE_CPU:
+               /* dp->index is used now as port_number. However
+                * CPU ports should have separate numbering
+                * independent from front panel port numbers.
+                */
+               devlink_port_attrs_set(&dp->devlink_port,
+                                      DEVLINK_PORT_FLAVOUR_CPU,
+                                      dp->index, false, 0);
+               err = dsa_port_link_register_of(dp);
+               if (err) {
+                       dev_err(ds->dev, "failed to setup link for port %d.%d\n",
+                               ds->index, dp->index);
+                       return err;
+               }
+               break;
        case DSA_PORT_TYPE_DSA:
+               /* dp->index is used now as port_number. However
+                * DSA ports should have separate numbering
+                * independent from front panel port numbers.
+                */
+               devlink_port_attrs_set(&dp->devlink_port,
+                                      DEVLINK_PORT_FLAVOUR_DSA,
+                                      dp->index, false, 0);
                err = dsa_port_link_register_of(dp);
                if (err) {
                        dev_err(ds->dev, "failed to setup link for port %d.%d\n",
@@ -279,6 +302,9 @@ static int dsa_port_setup(struct dsa_port *dp)
                }
                break;
        case DSA_PORT_TYPE_USER:
+               devlink_port_attrs_set(&dp->devlink_port,
+                                      DEVLINK_PORT_FLAVOUR_PHYSICAL,
+                                      dp->index, false, 0);
                err = dsa_slave_create(dp);
                if (err)
                        dev_err(ds->dev, "failed to create slave for port %d.%d\n",
@@ -293,7 +319,8 @@ static int dsa_port_setup(struct dsa_port *dp)
 
 static void dsa_port_teardown(struct dsa_port *dp)
 {
-       devlink_port_unregister(&dp->devlink_port);
+       if (dp->type != DSA_PORT_TYPE_UNUSED)
+               devlink_port_unregister(&dp->devlink_port);
 
        switch (dp->type) {
        case DSA_PORT_TYPE_UNUSED:
index 053731473c9932a8e32a12c43f743c772a661e85..3964c6f7a7c0d7c6436672c221c2d58802f7227c 100644 (file)
@@ -75,15 +75,6 @@ struct dsa_slave_priv {
        /* DSA port data, such as switch, port index, etc. */
        struct dsa_port         *dp;
 
-       /*
-        * The phylib phy_device pointer for the PHY connected
-        * to this port.
-        */
-       phy_interface_t         phy_interface;
-       int                     old_link;
-       int                     old_pause;
-       int                     old_duplex;
-
 #ifdef CONFIG_NET_POLL_CONTROLLER
        struct netpoll          *netpoll;
 #endif
index 90e6df0351eb218a6621151960ca2109c3698580..c90ee3227deab6281c3df301d14bb6e81a6e1011 100644 (file)
@@ -22,7 +22,7 @@ static void dsa_master_get_ethtool_stats(struct net_device *dev,
        int port = cpu_dp->index;
        int count = 0;
 
-       if (ops && ops->get_sset_count && ops->get_ethtool_stats) {
+       if (ops->get_sset_count && ops->get_ethtool_stats) {
                count = ops->get_sset_count(dev, ETH_SS_STATS);
                ops->get_ethtool_stats(dev, stats, data);
        }
@@ -31,6 +31,32 @@ static void dsa_master_get_ethtool_stats(struct net_device *dev,
                ds->ops->get_ethtool_stats(ds, port, data + count);
 }
 
+static void dsa_master_get_ethtool_phy_stats(struct net_device *dev,
+                                            struct ethtool_stats *stats,
+                                            uint64_t *data)
+{
+       struct dsa_port *cpu_dp = dev->dsa_ptr;
+       const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops;
+       struct dsa_switch *ds = cpu_dp->ds;
+       int port = cpu_dp->index;
+       int count = 0;
+
+       if (dev->phydev && !ops->get_ethtool_phy_stats) {
+               count = phy_ethtool_get_sset_count(dev->phydev);
+               if (count >= 0)
+                       phy_ethtool_get_stats(dev->phydev, stats, data);
+       } else if (ops->get_sset_count && ops->get_ethtool_phy_stats) {
+               count = ops->get_sset_count(dev, ETH_SS_PHY_STATS);
+               ops->get_ethtool_phy_stats(dev, stats, data);
+       }
+
+       if (count < 0)
+               count = 0;
+
+       if (ds->ops->get_ethtool_phy_stats)
+               ds->ops->get_ethtool_phy_stats(ds, port, data + count);
+}
+
 static int dsa_master_get_sset_count(struct net_device *dev, int sset)
 {
        struct dsa_port *cpu_dp = dev->dsa_ptr;
@@ -38,11 +64,17 @@ static int dsa_master_get_sset_count(struct net_device *dev, int sset)
        struct dsa_switch *ds = cpu_dp->ds;
        int count = 0;
 
-       if (ops && ops->get_sset_count)
-               count += ops->get_sset_count(dev, sset);
+       if (sset == ETH_SS_PHY_STATS && dev->phydev &&
+           !ops->get_ethtool_phy_stats)
+               count = phy_ethtool_get_sset_count(dev->phydev);
+       else if (ops->get_sset_count)
+               count = ops->get_sset_count(dev, sset);
+
+       if (count < 0)
+               count = 0;
 
-       if (sset == ETH_SS_STATS && ds->ops->get_sset_count)
-               count += ds->ops->get_sset_count(ds, cpu_dp->index);
+       if (ds->ops->get_sset_count)
+               count += ds->ops->get_sset_count(ds, cpu_dp->index, sset);
 
        return count;
 }
@@ -64,19 +96,28 @@ static void dsa_master_get_strings(struct net_device *dev, uint32_t stringset,
        /* We do not want to be NULL-terminated, since this is a prefix */
        pfx[sizeof(pfx) - 1] = '_';
 
-       if (ops && ops->get_sset_count && ops->get_strings) {
-               mcount = ops->get_sset_count(dev, ETH_SS_STATS);
+       if (stringset == ETH_SS_PHY_STATS && dev->phydev &&
+           !ops->get_ethtool_phy_stats) {
+               mcount = phy_ethtool_get_sset_count(dev->phydev);
+               if (mcount < 0)
+                       mcount = 0;
+               else
+                       phy_ethtool_get_strings(dev->phydev, data);
+       } else if (ops->get_sset_count && ops->get_strings) {
+               mcount = ops->get_sset_count(dev, stringset);
+               if (mcount < 0)
+                       mcount = 0;
                ops->get_strings(dev, stringset, data);
        }
 
-       if (stringset == ETH_SS_STATS && ds->ops->get_strings) {
+       if (ds->ops->get_strings) {
                ndata = data + mcount * len;
                /* This function copies ETH_GSTRINGS_LEN bytes, we will mangle
                 * the output after to prepend our CPU port prefix we
                 * constructed earlier
                 */
-               ds->ops->get_strings(ds, port, ndata);
-               count = ds->ops->get_sset_count(ds, port);
+               ds->ops->get_strings(ds, port, stringset, ndata);
+               count = ds->ops->get_sset_count(ds, port, stringset);
                for (i = 0; i < count; i++) {
                        memmove(ndata + (i * len + sizeof(pfx)),
                                ndata + i * len, len - sizeof(pfx));
@@ -102,6 +143,7 @@ static int dsa_master_ethtool_setup(struct net_device *dev)
        ops->get_sset_count = dsa_master_get_sset_count;
        ops->get_ethtool_stats = dsa_master_get_ethtool_stats;
        ops->get_strings = dsa_master_get_strings;
+       ops->get_ethtool_phy_stats = dsa_master_get_ethtool_phy_stats;
 
        dev->ethtool_ops = ops;
 
index 7acc1169d75e17e1babeb302e10d640a48c4674d..2413beb995be4afa10f7afbc8d28179a518d84b4 100644 (file)
@@ -273,25 +273,38 @@ int dsa_port_vlan_del(struct dsa_port *dp,
        return 0;
 }
 
-static int dsa_port_setup_phy_of(struct dsa_port *dp, bool enable)
+static struct phy_device *dsa_port_get_phy_device(struct dsa_port *dp)
 {
-       struct device_node *port_dn = dp->dn;
        struct device_node *phy_dn;
-       struct dsa_switch *ds = dp->ds;
        struct phy_device *phydev;
-       int port = dp->index;
-       int err = 0;
 
-       phy_dn = of_parse_phandle(port_dn, "phy-handle", 0);
+       phy_dn = of_parse_phandle(dp->dn, "phy-handle", 0);
        if (!phy_dn)
-               return 0;
+               return NULL;
 
        phydev = of_phy_find_device(phy_dn);
        if (!phydev) {
-               err = -EPROBE_DEFER;
-               goto err_put_of;
+               of_node_put(phy_dn);
+               return ERR_PTR(-EPROBE_DEFER);
        }
 
+       return phydev;
+}
+
+static int dsa_port_setup_phy_of(struct dsa_port *dp, bool enable)
+{
+       struct dsa_switch *ds = dp->ds;
+       struct phy_device *phydev;
+       int port = dp->index;
+       int err = 0;
+
+       phydev = dsa_port_get_phy_device(dp);
+       if (!phydev)
+               return 0;
+
+       if (IS_ERR(phydev))
+               return PTR_ERR(phydev);
+
        if (enable) {
                err = genphy_config_init(phydev);
                if (err < 0)
@@ -317,8 +330,6 @@ static int dsa_port_setup_phy_of(struct dsa_port *dp, bool enable)
 
 err_put_dev:
        put_device(&phydev->mdio.dev);
-err_put_of:
-       of_node_put(phy_dn);
        return err;
 }
 
@@ -372,3 +383,60 @@ void dsa_port_link_unregister_of(struct dsa_port *dp)
        else
                dsa_port_setup_phy_of(dp, false);
 }
+
+int dsa_port_get_phy_strings(struct dsa_port *dp, uint8_t *data)
+{
+       struct phy_device *phydev;
+       int ret = -EOPNOTSUPP;
+
+       if (of_phy_is_fixed_link(dp->dn))
+               return ret;
+
+       phydev = dsa_port_get_phy_device(dp);
+       if (IS_ERR_OR_NULL(phydev))
+               return ret;
+
+       ret = phy_ethtool_get_strings(phydev, data);
+       put_device(&phydev->mdio.dev);
+
+       return ret;
+}
+EXPORT_SYMBOL_GPL(dsa_port_get_phy_strings);
+
+int dsa_port_get_ethtool_phy_stats(struct dsa_port *dp, uint64_t *data)
+{
+       struct phy_device *phydev;
+       int ret = -EOPNOTSUPP;
+
+       if (of_phy_is_fixed_link(dp->dn))
+               return ret;
+
+       phydev = dsa_port_get_phy_device(dp);
+       if (IS_ERR_OR_NULL(phydev))
+               return ret;
+
+       ret = phy_ethtool_get_stats(phydev, NULL, data);
+       put_device(&phydev->mdio.dev);
+
+       return ret;
+}
+EXPORT_SYMBOL_GPL(dsa_port_get_ethtool_phy_stats);
+
+int dsa_port_get_phy_sset_count(struct dsa_port *dp)
+{
+       struct phy_device *phydev;
+       int ret = -EOPNOTSUPP;
+
+       if (of_phy_is_fixed_link(dp->dn))
+               return ret;
+
+       phydev = dsa_port_get_phy_device(dp);
+       if (IS_ERR_OR_NULL(phydev))
+               return ret;
+
+       ret = phy_ethtool_get_sset_count(phydev);
+       put_device(&phydev->mdio.dev);
+
+       return ret;
+}
+EXPORT_SYMBOL_GPL(dsa_port_get_phy_sset_count);
index 18561af7a8f1da0ec92773376ecac9a72a64751c..1e3b6a6d8a40dcf69200ead186a6ab8919e63db6 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/netdevice.h>
 #include <linux/phy.h>
 #include <linux/phy_fixed.h>
+#include <linux/phylink.h>
 #include <linux/of_net.h>
 #include <linux/of_mdio.h>
 #include <linux/mdio.h>
@@ -97,8 +98,7 @@ static int dsa_slave_open(struct net_device *dev)
        if (err)
                goto clear_promisc;
 
-       if (dev->phydev)
-               phy_start(dev->phydev);
+       phylink_start(dp->pl);
 
        return 0;
 
@@ -120,8 +120,7 @@ static int dsa_slave_close(struct net_device *dev)
        struct net_device *master = dsa_slave_to_master(dev);
        struct dsa_port *dp = dsa_slave_to_port(dev);
 
-       if (dev->phydev)
-               phy_stop(dev->phydev);
+       phylink_stop(dp->pl);
 
        dsa_port_disable(dp, dev->phydev);
 
@@ -272,10 +271,7 @@ static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
                break;
        }
 
-       if (!dev->phydev)
-               return -ENODEV;
-
-       return phy_mii_ioctl(dev->phydev, ifr, cmd);
+       return phylink_mii_ioctl(p->dp->pl, ifr, cmd);
 }
 
 static int dsa_slave_port_attr_set(struct net_device *dev,
@@ -498,14 +494,11 @@ dsa_slave_get_regs(struct net_device *dev, struct ethtool_regs *regs, void *_p)
                ds->ops->get_regs(ds, dp->index, regs, _p);
 }
 
-static u32 dsa_slave_get_link(struct net_device *dev)
+static int dsa_slave_nway_reset(struct net_device *dev)
 {
-       if (!dev->phydev)
-               return -ENODEV;
-
-       genphy_update_link(dev->phydev);
+       struct dsa_port *dp = dsa_slave_to_port(dev);
 
-       return dev->phydev->link;
+       return phylink_ethtool_nway_reset(dp->pl);
 }
 
 static int dsa_slave_get_eeprom_len(struct net_device *dev)
@@ -560,7 +553,8 @@ static void dsa_slave_get_strings(struct net_device *dev,
                strncpy(data + 2 * len, "rx_packets", len);
                strncpy(data + 3 * len, "rx_bytes", len);
                if (ds->ops->get_strings)
-                       ds->ops->get_strings(ds, dp->index, data + 4 * len);
+                       ds->ops->get_strings(ds, dp->index, stringset,
+                                            data + 4 * len);
        }
 }
 
@@ -605,7 +599,7 @@ static int dsa_slave_get_sset_count(struct net_device *dev, int sset)
 
                count = 4;
                if (ds->ops->get_sset_count)
-                       count += ds->ops->get_sset_count(ds, dp->index);
+                       count += ds->ops->get_sset_count(ds, dp->index, sset);
 
                return count;
        }
@@ -618,6 +612,8 @@ static void dsa_slave_get_wol(struct net_device *dev, struct ethtool_wolinfo *w)
        struct dsa_port *dp = dsa_slave_to_port(dev);
        struct dsa_switch *ds = dp->ds;
 
+       phylink_ethtool_get_wol(dp->pl, w);
+
        if (ds->ops->get_wol)
                ds->ops->get_wol(ds, dp->index, w);
 }
@@ -628,6 +624,8 @@ static int dsa_slave_set_wol(struct net_device *dev, struct ethtool_wolinfo *w)
        struct dsa_switch *ds = dp->ds;
        int ret = -EOPNOTSUPP;
 
+       phylink_ethtool_set_wol(dp->pl, w);
+
        if (ds->ops->set_wol)
                ret = ds->ops->set_wol(ds, dp->index, w);
 
@@ -651,13 +649,7 @@ static int dsa_slave_set_eee(struct net_device *dev, struct ethtool_eee *e)
        if (ret)
                return ret;
 
-       if (e->eee_enabled) {
-               ret = phy_init_eee(dev->phydev, 0);
-               if (ret)
-                       return ret;
-       }
-
-       return phy_ethtool_set_eee(dev->phydev, e);
+       return phylink_ethtool_set_eee(dp->pl, e);
 }
 
 static int dsa_slave_get_eee(struct net_device *dev, struct ethtool_eee *e)
@@ -677,7 +669,23 @@ static int dsa_slave_get_eee(struct net_device *dev, struct ethtool_eee *e)
        if (ret)
                return ret;
 
-       return phy_ethtool_get_eee(dev->phydev, e);
+       return phylink_ethtool_get_eee(dp->pl, e);
+}
+
+static int dsa_slave_get_link_ksettings(struct net_device *dev,
+                                       struct ethtool_link_ksettings *cmd)
+{
+       struct dsa_port *dp = dsa_slave_to_port(dev);
+
+       return phylink_ethtool_ksettings_get(dp->pl, cmd);
+}
+
+static int dsa_slave_set_link_ksettings(struct net_device *dev,
+                                       const struct ethtool_link_ksettings *cmd)
+{
+       struct dsa_port *dp = dsa_slave_to_port(dev);
+
+       return phylink_ethtool_ksettings_set(dp->pl, cmd);
 }
 
 #ifdef CONFIG_NET_POLL_CONTROLLER
@@ -980,8 +988,8 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = {
        .get_drvinfo            = dsa_slave_get_drvinfo,
        .get_regs_len           = dsa_slave_get_regs_len,
        .get_regs               = dsa_slave_get_regs,
-       .nway_reset             = phy_ethtool_nway_reset,
-       .get_link               = dsa_slave_get_link,
+       .nway_reset             = dsa_slave_nway_reset,
+       .get_link               = ethtool_op_get_link,
        .get_eeprom_len         = dsa_slave_get_eeprom_len,
        .get_eeprom             = dsa_slave_get_eeprom,
        .set_eeprom             = dsa_slave_set_eeprom,
@@ -992,8 +1000,8 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = {
        .get_wol                = dsa_slave_get_wol,
        .set_eee                = dsa_slave_set_eee,
        .get_eee                = dsa_slave_get_eee,
-       .get_link_ksettings     = phy_ethtool_get_link_ksettings,
-       .set_link_ksettings     = phy_ethtool_set_link_ksettings,
+       .get_link_ksettings     = dsa_slave_get_link_ksettings,
+       .set_link_ksettings     = dsa_slave_set_link_ksettings,
        .get_rxnfc              = dsa_slave_get_rxnfc,
        .set_rxnfc              = dsa_slave_set_rxnfc,
        .get_ts_info            = dsa_slave_get_ts_info,
@@ -1052,56 +1060,122 @@ static struct device_type dsa_type = {
        .name   = "dsa",
 };
 
-static void dsa_slave_adjust_link(struct net_device *dev)
+static void dsa_slave_phylink_validate(struct net_device *dev,
+                                      unsigned long *supported,
+                                      struct phylink_link_state *state)
 {
        struct dsa_port *dp = dsa_slave_to_port(dev);
-       struct dsa_slave_priv *p = netdev_priv(dev);
        struct dsa_switch *ds = dp->ds;
-       unsigned int status_changed = 0;
 
-       if (p->old_link != dev->phydev->link) {
-               status_changed = 1;
-               p->old_link = dev->phydev->link;
-       }
+       if (!ds->ops->phylink_validate)
+               return;
 
-       if (p->old_duplex != dev->phydev->duplex) {
-               status_changed = 1;
-               p->old_duplex = dev->phydev->duplex;
-       }
+       ds->ops->phylink_validate(ds, dp->index, supported, state);
+}
 
-       if (p->old_pause != dev->phydev->pause) {
-               status_changed = 1;
-               p->old_pause = dev->phydev->pause;
-       }
+static int dsa_slave_phylink_mac_link_state(struct net_device *dev,
+                                           struct phylink_link_state *state)
+{
+       struct dsa_port *dp = dsa_slave_to_port(dev);
+       struct dsa_switch *ds = dp->ds;
+
+       /* Only called for SGMII and 802.3z */
+       if (!ds->ops->phylink_mac_link_state)
+               return -EOPNOTSUPP;
+
+       return ds->ops->phylink_mac_link_state(ds, dp->index, state);
+}
+
+static void dsa_slave_phylink_mac_config(struct net_device *dev,
+                                        unsigned int mode,
+                                        const struct phylink_link_state *state)
+{
+       struct dsa_port *dp = dsa_slave_to_port(dev);
+       struct dsa_switch *ds = dp->ds;
+
+       if (!ds->ops->phylink_mac_config)
+               return;
+
+       ds->ops->phylink_mac_config(ds, dp->index, mode, state);
+}
 
-       if (ds->ops->adjust_link && status_changed)
-               ds->ops->adjust_link(ds, dp->index, dev->phydev);
+static void dsa_slave_phylink_mac_an_restart(struct net_device *dev)
+{
+       struct dsa_port *dp = dsa_slave_to_port(dev);
+       struct dsa_switch *ds = dp->ds;
 
-       if (status_changed)
-               phy_print_status(dev->phydev);
+       if (!ds->ops->phylink_mac_an_restart)
+               return;
+
+       ds->ops->phylink_mac_an_restart(ds, dp->index);
 }
 
-static int dsa_slave_fixed_link_update(struct net_device *dev,
-                                      struct fixed_phy_status *status)
+static void dsa_slave_phylink_mac_link_down(struct net_device *dev,
+                                           unsigned int mode,
+                                           phy_interface_t interface)
 {
-       struct dsa_switch *ds;
-       struct dsa_port *dp;
+       struct dsa_port *dp = dsa_slave_to_port(dev);
+       struct dsa_switch *ds = dp->ds;
 
-       if (dev) {
-               dp = dsa_slave_to_port(dev);
-               ds = dp->ds;
-               if (ds->ops->fixed_link_update)
-                       ds->ops->fixed_link_update(ds, dp->index, status);
+       if (!ds->ops->phylink_mac_link_down) {
+               if (ds->ops->adjust_link && dev->phydev)
+                       ds->ops->adjust_link(ds, dp->index, dev->phydev);
+               return;
        }
 
-       return 0;
+       ds->ops->phylink_mac_link_down(ds, dp->index, mode, interface);
+}
+
+static void dsa_slave_phylink_mac_link_up(struct net_device *dev,
+                                         unsigned int mode,
+                                         phy_interface_t interface,
+                                         struct phy_device *phydev)
+{
+       struct dsa_port *dp = dsa_slave_to_port(dev);
+       struct dsa_switch *ds = dp->ds;
+
+       if (!ds->ops->phylink_mac_link_up) {
+               if (ds->ops->adjust_link && dev->phydev)
+                       ds->ops->adjust_link(ds, dp->index, dev->phydev);
+               return;
+       }
+
+       ds->ops->phylink_mac_link_up(ds, dp->index, mode, interface, phydev);
+}
+
+static const struct phylink_mac_ops dsa_slave_phylink_mac_ops = {
+       .validate = dsa_slave_phylink_validate,
+       .mac_link_state = dsa_slave_phylink_mac_link_state,
+       .mac_config = dsa_slave_phylink_mac_config,
+       .mac_an_restart = dsa_slave_phylink_mac_an_restart,
+       .mac_link_down = dsa_slave_phylink_mac_link_down,
+       .mac_link_up = dsa_slave_phylink_mac_link_up,
+};
+
+void dsa_port_phylink_mac_change(struct dsa_switch *ds, int port, bool up)
+{
+       const struct dsa_port *dp = dsa_to_port(ds, port);
+
+       phylink_mac_change(dp->pl, up);
+}
+EXPORT_SYMBOL_GPL(dsa_port_phylink_mac_change);
+
+static void dsa_slave_phylink_fixed_state(struct net_device *dev,
+                                         struct phylink_link_state *state)
+{
+       struct dsa_port *dp = dsa_slave_to_port(dev);
+       struct dsa_switch *ds = dp->ds;
+
+       /* No need to check that this operation is valid, the callback would
+        * not be called if it was not.
+        */
+       ds->ops->phylink_fixed_state(ds, dp->index, state);
 }
 
 /* slave device setup *******************************************************/
 static int dsa_slave_phy_connect(struct net_device *slave_dev, int addr)
 {
        struct dsa_port *dp = dsa_slave_to_port(slave_dev);
-       struct dsa_slave_priv *p = netdev_priv(slave_dev);
        struct dsa_switch *ds = dp->ds;
 
        slave_dev->phydev = mdiobus_get_phy(ds->slave_mii_bus, addr);
@@ -1110,75 +1184,54 @@ static int dsa_slave_phy_connect(struct net_device *slave_dev, int addr)
                return -ENODEV;
        }
 
-       /* Use already configured phy mode */
-       if (p->phy_interface == PHY_INTERFACE_MODE_NA)
-               p->phy_interface = slave_dev->phydev->interface;
-
-       return phy_connect_direct(slave_dev, slave_dev->phydev,
-                                 dsa_slave_adjust_link, p->phy_interface);
+       return phylink_connect_phy(dp->pl, slave_dev->phydev);
 }
 
 static int dsa_slave_phy_setup(struct net_device *slave_dev)
 {
        struct dsa_port *dp = dsa_slave_to_port(slave_dev);
-       struct dsa_slave_priv *p = netdev_priv(slave_dev);
        struct device_node *port_dn = dp->dn;
        struct dsa_switch *ds = dp->ds;
-       struct device_node *phy_dn;
-       bool phy_is_fixed = false;
        u32 phy_flags = 0;
        int mode, ret;
 
        mode = of_get_phy_mode(port_dn);
        if (mode < 0)
                mode = PHY_INTERFACE_MODE_NA;
-       p->phy_interface = mode;
 
-       phy_dn = of_parse_phandle(port_dn, "phy-handle", 0);
-       if (!phy_dn && of_phy_is_fixed_link(port_dn)) {
-               /* In the case of a fixed PHY, the DT node associated
-                * to the fixed PHY is the Port DT node
-                */
-               ret = of_phy_register_fixed_link(port_dn);
-               if (ret) {
-                       netdev_err(slave_dev, "failed to register fixed PHY: %d\n", ret);
-                       return ret;
-               }
-               phy_is_fixed = true;
-               phy_dn = of_node_get(port_dn);
+       dp->pl = phylink_create(slave_dev, of_fwnode_handle(port_dn), mode,
+                               &dsa_slave_phylink_mac_ops);
+       if (IS_ERR(dp->pl)) {
+               netdev_err(slave_dev,
+                          "error creating PHYLINK: %ld\n", PTR_ERR(dp->pl));
+               return PTR_ERR(dp->pl);
        }
 
+       /* Register only if the switch provides such a callback, since this
+        * callback takes precedence over polling the link GPIO in PHYLINK
+        * (see phylink_get_fixed_state).
+        */
+       if (ds->ops->phylink_fixed_state)
+               phylink_fixed_state_cb(dp->pl, dsa_slave_phylink_fixed_state);
+
        if (ds->ops->get_phy_flags)
                phy_flags = ds->ops->get_phy_flags(ds, dp->index);
 
-       if (phy_dn) {
-               slave_dev->phydev = of_phy_connect(slave_dev, phy_dn,
-                                                  dsa_slave_adjust_link,
-                                                  phy_flags,
-                                                  p->phy_interface);
-               of_node_put(phy_dn);
-       }
-
-       if (slave_dev->phydev && phy_is_fixed)
-               fixed_phy_set_link_update(slave_dev->phydev,
-                                         dsa_slave_fixed_link_update);
-
-       /* We could not connect to a designated PHY, so use the switch internal
-        * MDIO bus instead
-        */
-       if (!slave_dev->phydev) {
+       ret = phylink_of_phy_connect(dp->pl, port_dn, phy_flags);
+       if (ret == -ENODEV) {
+               /* We could not connect to a designated PHY or SFP, so use the
+                * switch internal MDIO bus instead
+                */
                ret = dsa_slave_phy_connect(slave_dev, dp->index);
                if (ret) {
-                       netdev_err(slave_dev, "failed to connect to port %d: %d\n",
+                       netdev_err(slave_dev,
+                                  "failed to connect to port %d: %d\n",
                                   dp->index, ret);
-                       if (phy_is_fixed)
-                               of_phy_deregister_fixed_link(port_dn);
+                       phylink_destroy(dp->pl);
                        return ret;
                }
        }
 
-       phy_attached_info(slave_dev->phydev);
-
        return 0;
 }
 
@@ -1193,29 +1246,26 @@ static void dsa_slave_set_lockdep_class_one(struct net_device *dev,
 
 int dsa_slave_suspend(struct net_device *slave_dev)
 {
-       struct dsa_slave_priv *p = netdev_priv(slave_dev);
+       struct dsa_port *dp = dsa_slave_to_port(slave_dev);
 
        netif_device_detach(slave_dev);
 
-       if (slave_dev->phydev) {
-               phy_stop(slave_dev->phydev);
-               p->old_pause = -1;
-               p->old_link = -1;
-               p->old_duplex = -1;
-               phy_suspend(slave_dev->phydev);
-       }
+       rtnl_lock();
+       phylink_stop(dp->pl);
+       rtnl_unlock();
 
        return 0;
 }
 
 int dsa_slave_resume(struct net_device *slave_dev)
 {
+       struct dsa_port *dp = dsa_slave_to_port(slave_dev);
+
        netif_device_attach(slave_dev);
 
-       if (slave_dev->phydev) {
-               phy_resume(slave_dev->phydev);
-               phy_start(slave_dev->phydev);
-       }
+       rtnl_lock();
+       phylink_start(dp->pl);
+       rtnl_unlock();
 
        return 0;
 }
@@ -1280,11 +1330,6 @@ int dsa_slave_create(struct dsa_port *port)
        p->dp = port;
        INIT_LIST_HEAD(&p->mall_tc_list);
        p->xmit = cpu_dp->tag_ops->xmit;
-
-       p->old_pause = -1;
-       p->old_link = -1;
-       p->old_duplex = -1;
-
        port->slave = slave_dev;
 
        netif_carrier_off(slave_dev);
@@ -1307,9 +1352,10 @@ int dsa_slave_create(struct dsa_port *port)
        return 0;
 
 out_phy:
-       phy_disconnect(slave_dev->phydev);
-       if (of_phy_is_fixed_link(port->dn))
-               of_phy_deregister_fixed_link(port->dn);
+       rtnl_lock();
+       phylink_disconnect_phy(p->dp->pl);
+       rtnl_unlock();
+       phylink_destroy(p->dp->pl);
 out_free:
        free_percpu(p->stats64);
        free_netdev(slave_dev);
@@ -1321,17 +1367,15 @@ void dsa_slave_destroy(struct net_device *slave_dev)
 {
        struct dsa_port *dp = dsa_slave_to_port(slave_dev);
        struct dsa_slave_priv *p = netdev_priv(slave_dev);
-       struct device_node *port_dn = dp->dn;
 
        netif_carrier_off(slave_dev);
-       if (slave_dev->phydev) {
-               phy_disconnect(slave_dev->phydev);
+       rtnl_lock();
+       phylink_disconnect_phy(dp->pl);
+       rtnl_unlock();
 
-               if (of_phy_is_fixed_link(port_dn))
-                       of_phy_deregister_fixed_link(port_dn);
-       }
        dsa_slave_notify(slave_dev, DSA_PORT_UNREGISTER);
        unregister_netdev(slave_dev);
+       phylink_destroy(dp->pl);
        free_percpu(p->stats64);
        free_netdev(slave_dev);
 }
@@ -1394,6 +1438,9 @@ static void dsa_slave_switchdev_event_work(struct work_struct *work)
        switch (switchdev_work->event) {
        case SWITCHDEV_FDB_ADD_TO_DEVICE:
                fdb_info = &switchdev_work->fdb_info;
+               if (!fdb_info->added_by_user)
+                       break;
+
                err = dsa_port_fdb_add(dp, fdb_info->addr, fdb_info->vid);
                if (err) {
                        netdev_dbg(dev, "fdb add failed err=%d\n", err);
@@ -1405,6 +1452,9 @@ static void dsa_slave_switchdev_event_work(struct work_struct *work)
 
        case SWITCHDEV_FDB_DEL_TO_DEVICE:
                fdb_info = &switchdev_work->fdb_info;
+               if (!fdb_info->added_by_user)
+                       break;
+
                err = dsa_port_fdb_del(dp, fdb_info->addr, fdb_info->vid);
                if (err) {
                        netdev_dbg(dev, "fdb del failed err=%d\n", err);
@@ -1457,8 +1507,7 @@ static int dsa_slave_switchdev_event(struct notifier_block *unused,
        switch (event) {
        case SWITCHDEV_FDB_ADD_TO_DEVICE: /* fall through */
        case SWITCHDEV_FDB_DEL_TO_DEVICE:
-               if (dsa_slave_switchdev_fdb_work_init(switchdev_work,
-                                                     ptr))
+               if (dsa_slave_switchdev_fdb_work_init(switchdev_work, ptr))
                        goto err_fdb_work_init;
                dev_hold(dev);
                break;
index eaeba9b99a737c424ab9e94279f87c4045ec03dc..ee28440f57c58f4eec29e67641a49efcbd36c8cd 100644 (file)
@@ -128,15 +128,15 @@ u32 eth_get_headlen(void *data, unsigned int len)
 {
        const unsigned int flags = FLOW_DISSECTOR_F_PARSE_1ST_FRAG;
        const struct ethhdr *eth = (const struct ethhdr *)data;
-       struct flow_keys keys;
+       struct flow_keys_basic keys;
 
        /* this should never happen, but better safe than sorry */
        if (unlikely(len < sizeof(*eth)))
                return len;
 
        /* parse any remaining L2/L3 headers, check for L4 */
-       if (!skb_flow_dissect_flow_keys_buf(&keys, data, eth->h_proto,
-                                           sizeof(*eth), len, flags))
+       if (!skb_flow_dissect_flow_keys_basic(NULL, &keys, data, eth->h_proto,
+                                             sizeof(*eth), len, flags))
                return max_t(u32, keys.control.thoff, sizeof(*eth));
 
        /* parse for any L4 headers */
index b8d95cb71c25dd69c8a88b2c886a3f0d2ce1174f..44a7e16bf3b5e14c03b99a806145203fc2a4af01 100644 (file)
@@ -20,8 +20,8 @@ typedef unsigned __bitwise lowpan_rx_result;
 struct frag_lowpan_compare_key {
        u16 tag;
        u16 d_size;
-       const struct ieee802154_addr src;
-       const struct ieee802154_addr dst;
+       struct ieee802154_addr src;
+       struct ieee802154_addr dst;
 };
 
 /* Equivalent of ipv4 struct ipq
index 1790b65944b3ee188608b1a76d4f9a42fb1479d5..2cc224106b6928bafd92460ba59ea69db0778761 100644 (file)
@@ -75,14 +75,14 @@ fq_find(struct net *net, const struct lowpan_802154_cb *cb,
 {
        struct netns_ieee802154_lowpan *ieee802154_lowpan =
                net_ieee802154_lowpan(net);
-       struct frag_lowpan_compare_key key = {
-               .tag = cb->d_tag,
-               .d_size = cb->d_size,
-               .src = *src,
-               .dst = *dst,
-       };
+       struct frag_lowpan_compare_key key = {};
        struct inet_frag_queue *q;
 
+       key.tag = cb->d_tag;
+       key.d_size = cb->d_size;
+       key.src = *src;
+       key.dst = *dst;
+
        q = inet_frag_find(&ieee802154_lowpan->frags, &key);
        if (!q)
                return NULL;
@@ -372,7 +372,7 @@ int lowpan_frag_rcv(struct sk_buff *skb, u8 frag_type)
        struct lowpan_frag_queue *fq;
        struct net *net = dev_net(skb->dev);
        struct lowpan_802154_cb *cb = lowpan_802154_cb(skb);
-       struct ieee802154_hdr hdr;
+       struct ieee802154_hdr hdr = {};
        int err;
 
        if (ieee802154_hdr_peek_addrs(skb, &hdr) < 0)
index 7d1ec76e7f433a2525581782a0bf14731548d4a8..13bbf8cb6a3961d422d01cf5b16a724d16107419 100644 (file)
@@ -69,6 +69,9 @@ void *ife_decode(struct sk_buff *skb, u16 *metalen)
        int total_pull;
        u16 ifehdrln;
 
+       if (!pskb_may_pull(skb, skb->dev->hard_header_len + IFE_METAHDRLEN))
+               return NULL;
+
        ifehdr = (struct ifeheadr *) (skb->data + skb->dev->hard_header_len);
        ifehdrln = ntohs(ifehdr->metalen);
        total_pull = skb->dev->hard_header_len + ifehdrln;
@@ -92,12 +95,43 @@ struct meta_tlvhdr {
        __be16 len;
 };
 
+static bool __ife_tlv_meta_valid(const unsigned char *skbdata,
+                                const unsigned char *ifehdr_end)
+{
+       const struct meta_tlvhdr *tlv;
+       u16 tlvlen;
+
+       if (unlikely(skbdata + sizeof(*tlv) > ifehdr_end))
+               return false;
+
+       tlv = (const struct meta_tlvhdr *)skbdata;
+       tlvlen = ntohs(tlv->len);
+
+       /* tlv length field is inc header, check on minimum */
+       if (tlvlen < NLA_HDRLEN)
+               return false;
+
+       /* overflow by NLA_ALIGN check */
+       if (NLA_ALIGN(tlvlen) < tlvlen)
+               return false;
+
+       if (unlikely(skbdata + NLA_ALIGN(tlvlen) > ifehdr_end))
+               return false;
+
+       return true;
+}
+
 /* Caller takes care of presenting data in network order
  */
-void *ife_tlv_meta_decode(void *skbdata, u16 *attrtype, u16 *dlen, u16 *totlen)
+void *ife_tlv_meta_decode(void *skbdata, const void *ifehdr_end, u16 *attrtype,
+                         u16 *dlen, u16 *totlen)
 {
-       struct meta_tlvhdr *tlv = (struct meta_tlvhdr *) skbdata;
+       struct meta_tlvhdr *tlv;
+
+       if (!__ife_tlv_meta_valid(skbdata, ifehdr_end))
+               return NULL;
 
+       tlv = (struct meta_tlvhdr *)skbdata;
        *dlen = ntohs(tlv->len) - NLA_HDRLEN;
        *attrtype = ntohs(tlv->type);
 
index 3ebf599cebaea4926decc1aad7274b12ec7e1566..b403499fdabea7367f65c588d957a30f5a6572b5 100644 (file)
@@ -994,7 +994,9 @@ const struct proto_ops inet_stream_ops = {
        .getsockopt        = sock_common_getsockopt,
        .sendmsg           = inet_sendmsg,
        .recvmsg           = inet_recvmsg,
+#ifdef CONFIG_MMU
        .mmap              = tcp_mmap,
+#endif
        .sendpage          = inet_sendpage,
        .splice_read       = tcp_splice_read,
        .read_sock         = tcp_read_sock,
index f05afaf3235c0500a9087eae6365b7001aa64663..4d622112bf95fabd9d0e87c64f1e27138a3c1556 100644 (file)
@@ -326,10 +326,11 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
                                 u8 tos, int oif, struct net_device *dev,
                                 int rpf, struct in_device *idev, u32 *itag)
 {
+       struct net *net = dev_net(dev);
+       struct flow_keys flkeys;
        int ret, no_addr;
        struct fib_result res;
        struct flowi4 fl4;
-       struct net *net = dev_net(dev);
        bool dev_match;
 
        fl4.flowi4_oif = 0;
@@ -347,6 +348,11 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
        no_addr = idev->ifa_list == NULL;
 
        fl4.flowi4_mark = IN_DEV_SRC_VMARK(idev) ? skb->mark : 0;
+       if (!fib4_rules_early_flow_dissect(net, skb, &fl4, &flkeys)) {
+               fl4.flowi4_proto = 0;
+               fl4.fl4_sport = 0;
+               fl4.fl4_dport = 0;
+       }
 
        trace_fib_validate_source(dev, &fl4);
 
index 881ac6d046f2da17dc7fbbdaa36553e91eac7619..33a88e045efd937ecb39dc15ab6b838ea9846657 100644 (file)
 #include <net/sock_reuseport.h>
 #include <net/addrconf.h>
 
-#ifdef INET_CSK_DEBUG
-const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n";
-EXPORT_SYMBOL(inet_csk_timer_bug_msg);
-#endif
-
 #if IS_ENABLED(CONFIG_IPV6)
 /* match_wildcard == true:  IPV6_ADDR_ANY equals to any IPv6 addresses if IPv6
  *                          only, and any IPv4 addresses if not IPv6 only
index 9c169bb2444d5990c7562692ba1c92030898bca4..2d8efeecf61976f00c0700cc7f64f749b9482a73 100644 (file)
@@ -578,6 +578,8 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev,
        int tunnel_hlen;
        int version;
        __be16 df;
+       int nhoff;
+       int thoff;
 
        tun_info = skb_tunnel_info(skb);
        if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
@@ -605,6 +607,16 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev,
                truncate = true;
        }
 
+       nhoff = skb_network_header(skb) - skb_mac_header(skb);
+       if (skb->protocol == htons(ETH_P_IP) &&
+           (ntohs(ip_hdr(skb)->tot_len) > skb->len - nhoff))
+               truncate = true;
+
+       thoff = skb_transport_header(skb) - skb_mac_header(skb);
+       if (skb->protocol == htons(ETH_P_IPV6) &&
+           (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff))
+               truncate = true;
+
        if (version == 1) {
                erspan_build_header(skb, ntohl(tunnel_id_to_key32(key->tun_id)),
                                    ntohl(md->u.index), truncate, true);
@@ -722,10 +734,12 @@ static netdev_tx_t erspan_xmit(struct sk_buff *skb,
                erspan_build_header(skb, ntohl(tunnel->parms.o_key),
                                    tunnel->index,
                                    truncate, true);
-       else
+       else if (tunnel->erspan_ver == 2)
                erspan_build_header_v2(skb, ntohl(tunnel->parms.o_key),
                                       tunnel->dir, tunnel->hwid,
                                       truncate, true);
+       else
+               goto free_skb;
 
        tunnel->parms.o_flags &= ~TUNNEL_KEY;
        __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_ERSPAN));
index 83c73bab2c3de90254e11e4126d7cb77ed998f03..af5a830ff6ad320ae68066ab86476962db978f79 100644 (file)
@@ -878,11 +878,14 @@ static int __ip_append_data(struct sock *sk,
        struct rtable *rt = (struct rtable *)cork->dst;
        unsigned int wmem_alloc_delta = 0;
        u32 tskey = 0;
+       bool paged;
 
        skb = skb_peek_tail(queue);
 
        exthdrlen = !skb ? rt->dst.header_len : 0;
-       mtu = cork->fragsize;
+       mtu = cork->gso_size ? IP_MAX_MTU : cork->fragsize;
+       paged = !!cork->gso_size;
+
        if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
            sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
                tskey = sk->sk_tskey++;
@@ -906,8 +909,8 @@ static int __ip_append_data(struct sock *sk,
        if (transhdrlen &&
            length + fragheaderlen <= mtu &&
            rt->dst.dev->features & (NETIF_F_HW_CSUM | NETIF_F_IP_CSUM) &&
-           !(flags & MSG_MORE) &&
-           !exthdrlen)
+           (!(flags & MSG_MORE) || cork->gso_size) &&
+           (!exthdrlen || (rt->dst.dev->features & NETIF_F_HW_ESP_TX_CSUM)))
                csummode = CHECKSUM_PARTIAL;
 
        cork->length += length;
@@ -933,6 +936,7 @@ static int __ip_append_data(struct sock *sk,
                        unsigned int fraglen;
                        unsigned int fraggap;
                        unsigned int alloclen;
+                       unsigned int pagedlen = 0;
                        struct sk_buff *skb_prev;
 alloc_new_skb:
                        skb_prev = skb;
@@ -953,8 +957,12 @@ static int __ip_append_data(struct sock *sk,
                        if ((flags & MSG_MORE) &&
                            !(rt->dst.dev->features&NETIF_F_SG))
                                alloclen = mtu;
-                       else
+                       else if (!paged)
                                alloclen = fraglen;
+                       else {
+                               alloclen = min_t(int, fraglen, MAX_HEADER);
+                               pagedlen = fraglen - alloclen;
+                       }
 
                        alloclen += exthdrlen;
 
@@ -998,7 +1006,7 @@ static int __ip_append_data(struct sock *sk,
                        /*
                         *      Find where to start putting bytes.
                         */
-                       data = skb_put(skb, fraglen + exthdrlen);
+                       data = skb_put(skb, fraglen + exthdrlen - pagedlen);
                        skb_set_network_header(skb, exthdrlen);
                        skb->transport_header = (skb->network_header +
                                                 fragheaderlen);
@@ -1014,7 +1022,7 @@ static int __ip_append_data(struct sock *sk,
                                pskb_trim_unique(skb_prev, maxfraglen);
                        }
 
-                       copy = datalen - transhdrlen - fraggap;
+                       copy = datalen - transhdrlen - fraggap - pagedlen;
                        if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
                                err = -EFAULT;
                                kfree_skb(skb);
@@ -1022,7 +1030,7 @@ static int __ip_append_data(struct sock *sk,
                        }
 
                        offset += copy;
-                       length -= datalen - fraggap;
+                       length -= copy + transhdrlen;
                        transhdrlen = 0;
                        exthdrlen = 0;
                        csummode = CHECKSUM_NONE;
@@ -1045,7 +1053,8 @@ static int __ip_append_data(struct sock *sk,
                if (copy > length)
                        copy = length;
 
-               if (!(rt->dst.dev->features&NETIF_F_SG)) {
+               if (!(rt->dst.dev->features&NETIF_F_SG) &&
+                   skb_tailroom(skb) >= copy) {
                        unsigned int off;
 
                        off = skb->len;
@@ -1135,6 +1144,8 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork,
        *rtp = NULL;
        cork->fragsize = ip_sk_use_pmtu(sk) ?
                         dst_mtu(&rt->dst) : rt->dst.dev->mtu;
+
+       cork->gso_size = sk->sk_type == SOCK_DGRAM ? ipc->gso_size : 0;
        cork->dst = &rt->dst;
        cork->length = 0;
        cork->ttl = ipc->ttl;
@@ -1214,7 +1225,7 @@ ssize_t   ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page,
                return -EOPNOTSUPP;
 
        hh_len = LL_RESERVED_SPACE(rt->dst.dev);
-       mtu = cork->fragsize;
+       mtu = cork->gso_size ? IP_MAX_MTU : cork->fragsize;
 
        fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
        maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
@@ -1470,9 +1481,8 @@ struct sk_buff *ip_make_skb(struct sock *sk,
                                        int len, int odd, struct sk_buff *skb),
                            void *from, int length, int transhdrlen,
                            struct ipcm_cookie *ipc, struct rtable **rtp,
-                           unsigned int flags)
+                           struct inet_cork *cork, unsigned int flags)
 {
-       struct inet_cork cork;
        struct sk_buff_head queue;
        int err;
 
@@ -1481,22 +1491,22 @@ struct sk_buff *ip_make_skb(struct sock *sk,
 
        __skb_queue_head_init(&queue);
 
-       cork.flags = 0;
-       cork.addr = 0;
-       cork.opt = NULL;
-       err = ip_setup_cork(sk, &cork, ipc, rtp);
+       cork->flags = 0;
+       cork->addr = 0;
+       cork->opt = NULL;
+       err = ip_setup_cork(sk, cork, ipc, rtp);
        if (err)
                return ERR_PTR(err);
 
-       err = __ip_append_data(sk, fl4, &queue, &cork,
+       err = __ip_append_data(sk, fl4, &queue, cork,
                               &current->task_frag, getfrag,
                               from, length, transhdrlen, flags);
        if (err) {
-               __ip_flush_pending_frames(sk, &queue, &cork);
+               __ip_flush_pending_frames(sk, &queue, cork);
                return ERR_PTR(err);
        }
 
-       return __ip_make_skb(sk, fl4, &queue, &cork);
+       return __ip_make_skb(sk, fl4, &queue, cork);
 }
 
 /*
@@ -1552,7 +1562,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
                oif = skb->skb_iif;
 
        flowi4_init_output(&fl4, oif,
-                          IP4_REPLY_MARK(net, skb->mark),
+                          IP4_REPLY_MARK(net, skb->mark) ?: sk->sk_mark,
                           RT_TOS(arg->tos),
                           RT_SCOPE_UNIVERSE, ip_hdr(skb)->protocol,
                           ip_reply_arg_flowi_flags(arg),
index 2f39479be92f4602c8cb3f95ca8bc3ca56469f25..dde671e978298b5b0239f975fe134b232db9049b 100644 (file)
@@ -423,17 +423,17 @@ void __init ip_tunnel_core_init(void)
        lwtunnel_encap_add_ops(&ip6_tun_lwt_ops, LWTUNNEL_ENCAP_IP6);
 }
 
-struct static_key ip_tunnel_metadata_cnt = STATIC_KEY_INIT_FALSE;
+DEFINE_STATIC_KEY_FALSE(ip_tunnel_metadata_cnt);
 EXPORT_SYMBOL(ip_tunnel_metadata_cnt);
 
 void ip_tunnel_need_metadata(void)
 {
-       static_key_slow_inc(&ip_tunnel_metadata_cnt);
+       static_branch_inc(&ip_tunnel_metadata_cnt);
 }
 EXPORT_SYMBOL_GPL(ip_tunnel_need_metadata);
 
 void ip_tunnel_unneed_metadata(void)
 {
-       static_key_slow_dec(&ip_tunnel_metadata_cnt);
+       static_branch_dec(&ip_tunnel_metadata_cnt);
 }
 EXPORT_SYMBOL_GPL(ip_tunnel_unneed_metadata);
index 43f620feb1c45854f847010090fc43a8e810d4ef..86c9f755de3ddddc93b1bf9ee1b6532cf6d6a7fd 100644 (file)
@@ -28,6 +28,9 @@
  *
  *  Multiple Nameservers in /proc/net/pnp
  *              --  Josef Siemes <jsiemes@web.de>, Aug 2002
+ *
+ *  NTP servers in /proc/net/ipconfig/ntp_servers
+ *              --  Chris Novakovic <chris@chrisn.me.uk>, April 2018
  */
 
 #include <linux/types.h>
@@ -93,6 +96,7 @@
 #define CONF_TIMEOUT_MAX       (HZ*30) /* Maximum allowed timeout */
 #define CONF_NAMESERVERS_MAX   3       /* Maximum number of nameservers
                                           - '3' from resolv.h */
+#define CONF_NTP_SERVERS_MAX   3       /* Maximum number of NTP servers */
 
 #define NONE cpu_to_be32(INADDR_NONE)
 #define ANY cpu_to_be32(INADDR_ANY)
@@ -152,6 +156,7 @@ static int ic_proto_used;                   /* Protocol used, if any */
 #define ic_proto_used 0
 #endif
 static __be32 ic_nameservers[CONF_NAMESERVERS_MAX]; /* DNS Server IP addresses */
+static __be32 ic_ntp_servers[CONF_NTP_SERVERS_MAX]; /* NTP server IP addresses */
 static u8 ic_domain[64];               /* DNS (not NIS) domain name */
 
 /*
@@ -576,6 +581,15 @@ static inline void __init ic_nameservers_predef(void)
                ic_nameservers[i] = NONE;
 }
 
+/* Predefine NTP servers */
+static inline void __init ic_ntp_servers_predef(void)
+{
+       int i;
+
+       for (i = 0; i < CONF_NTP_SERVERS_MAX; i++)
+               ic_ntp_servers[i] = NONE;
+}
+
 /*
  *     DHCP/BOOTP support.
  */
@@ -671,6 +685,7 @@ ic_dhcp_init_options(u8 *options, struct ic_device *d)
                        17,     /* Boot path */
                        26,     /* MTU */
                        40,     /* NIS domain name */
+                       42,     /* NTP servers */
                };
 
                *e++ = 55;      /* Parameter request list */
@@ -721,9 +736,11 @@ static void __init ic_bootp_init_ext(u8 *e)
        *e++ = 3;               /* Default gateway request */
        *e++ = 4;
        e += 4;
-       *e++ = 5;               /* Name server request */
-       *e++ = 8;
-       e += 8;
+#if CONF_NAMESERVERS_MAX > 0
+       *e++ = 6;               /* (DNS) name server request */
+       *e++ = 4 * CONF_NAMESERVERS_MAX;
+       e += 4 * CONF_NAMESERVERS_MAX;
+#endif
        *e++ = 12;              /* Host name request */
        *e++ = 32;
        e += 32;
@@ -748,7 +765,13 @@ static void __init ic_bootp_init_ext(u8 *e)
  */
 static inline void __init ic_bootp_init(void)
 {
+       /* Re-initialise all name servers and NTP servers to NONE, in case any
+        * were set via the "ip=" or "nfsaddrs=" kernel command line parameters:
+        * any IP addresses specified there will already have been decoded but
+        * are no longer needed
+        */
        ic_nameservers_predef();
+       ic_ntp_servers_predef();
 
        dev_add_pack(&bootp_packet_type);
 }
@@ -912,6 +935,15 @@ static void __init ic_do_bootp_ext(u8 *ext)
                ic_bootp_string(utsname()->domainname, ext+1, *ext,
                                __NEW_UTS_LEN);
                break;
+       case 42:        /* NTP servers */
+               servers = *ext / 4;
+               if (servers > CONF_NTP_SERVERS_MAX)
+                       servers = CONF_NTP_SERVERS_MAX;
+               for (i = 0; i < servers; i++) {
+                       if (ic_ntp_servers[i] == NONE)
+                               memcpy(&ic_ntp_servers[i], ext+1+4*i, 4);
+               }
+               break;
        }
 }
 
@@ -1257,7 +1289,10 @@ static int __init ic_dynamic(void)
 #endif /* IPCONFIG_DYNAMIC */
 
 #ifdef CONFIG_PROC_FS
+/* proc_dir_entry for /proc/net/ipconfig */
+static struct proc_dir_entry *ipconfig_dir;
 
+/* Name servers: */
 static int pnp_seq_show(struct seq_file *seq, void *v)
 {
        int i;
@@ -1294,6 +1329,62 @@ static const struct file_operations pnp_seq_fops = {
        .llseek         = seq_lseek,
        .release        = single_release,
 };
+
+/* Create the /proc/net/ipconfig directory */
+static int __init ipconfig_proc_net_init(void)
+{
+       ipconfig_dir = proc_net_mkdir(&init_net, "ipconfig", init_net.proc_net);
+       if (!ipconfig_dir)
+               return -ENOMEM;
+
+       return 0;
+}
+
+/* Create a new file under /proc/net/ipconfig */
+static int ipconfig_proc_net_create(const char *name,
+                                   const struct file_operations *fops)
+{
+       char *pname;
+       struct proc_dir_entry *p;
+
+       if (!ipconfig_dir)
+               return -ENOMEM;
+
+       pname = kasprintf(GFP_KERNEL, "%s%s", "ipconfig/", name);
+       if (!pname)
+               return -ENOMEM;
+
+       p = proc_create(pname, 0444, init_net.proc_net, fops);
+       kfree(pname);
+       if (!p)
+               return -ENOMEM;
+
+       return 0;
+}
+
+/* Write NTP server IP addresses to /proc/net/ipconfig/ntp_servers */
+static int ntp_servers_seq_show(struct seq_file *seq, void *v)
+{
+       int i;
+
+       for (i = 0; i < CONF_NTP_SERVERS_MAX; i++) {
+               if (ic_ntp_servers[i] != NONE)
+                       seq_printf(seq, "%pI4\n", &ic_ntp_servers[i]);
+       }
+       return 0;
+}
+
+static int ntp_servers_seq_open(struct inode *inode, struct file *file)
+{
+       return single_open(file, ntp_servers_seq_show, NULL);
+}
+
+static const struct file_operations ntp_servers_seq_fops = {
+       .open           = ntp_servers_seq_open,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = single_release,
+};
 #endif /* CONFIG_PROC_FS */
 
 /*
@@ -1368,8 +1459,20 @@ static int __init ip_auto_config(void)
        int err;
        unsigned int i;
 
+       /* Initialise all name servers and NTP servers to NONE (but only if the
+        * "ip=" or "nfsaddrs=" kernel command line parameters weren't decoded,
+        * otherwise we'll overwrite the IP addresses specified there)
+        */
+       if (ic_set_manually == 0) {
+               ic_nameservers_predef();
+               ic_ntp_servers_predef();
+       }
+
 #ifdef CONFIG_PROC_FS
        proc_create("pnp", 0444, init_net.proc_net, &pnp_seq_fops);
+
+       if (ipconfig_proc_net_init() == 0)
+               ipconfig_proc_net_create("ntp_servers", &ntp_servers_seq_fops);
 #endif /* CONFIG_PROC_FS */
 
        if (!ic_enable)
@@ -1481,16 +1584,32 @@ static int __init ip_auto_config(void)
                &ic_servaddr, &root_server_addr, root_server_path);
        if (ic_dev_mtu)
                pr_cont(", mtu=%d", ic_dev_mtu);
-       for (i = 0; i < CONF_NAMESERVERS_MAX; i++)
+       /* Name servers (if any): */
+       for (i = 0; i < CONF_NAMESERVERS_MAX; i++) {
                if (ic_nameservers[i] != NONE) {
-                       pr_cont("     nameserver%u=%pI4",
-                               i, &ic_nameservers[i]);
-                       break;
+                       if (i == 0)
+                               pr_info("     nameserver%u=%pI4",
+                                       i, &ic_nameservers[i]);
+                       else
+                               pr_cont(", nameserver%u=%pI4",
+                                       i, &ic_nameservers[i]);
                }
-       for (i++; i < CONF_NAMESERVERS_MAX; i++)
-               if (ic_nameservers[i] != NONE)
-                       pr_cont(", nameserver%u=%pI4", i, &ic_nameservers[i]);
-       pr_cont("\n");
+               if (i + 1 == CONF_NAMESERVERS_MAX)
+                       pr_cont("\n");
+       }
+       /* NTP servers (if any): */
+       for (i = 0; i < CONF_NTP_SERVERS_MAX; i++) {
+               if (ic_ntp_servers[i] != NONE) {
+                       if (i == 0)
+                               pr_info("     ntpserver%u=%pI4",
+                                       i, &ic_ntp_servers[i]);
+                       else
+                               pr_cont(", ntpserver%u=%pI4",
+                                       i, &ic_ntp_servers[i]);
+               }
+               if (i + 1 == CONF_NTP_SERVERS_MAX)
+                       pr_cont("\n");
+       }
 #endif /* !SILENT */
 
        /*
@@ -1588,7 +1707,9 @@ static int __init ip_auto_config_setup(char *addrs)
                return 1;
        }
 
+       /* Initialise all name servers and NTP servers to NONE */
        ic_nameservers_predef();
+       ic_ntp_servers_predef();
 
        /* Parse string for static IP assignment.  */
        ip = addrs;
@@ -1647,6 +1768,13 @@ static int __init ip_auto_config_setup(char *addrs)
                                                ic_nameservers[1] = NONE;
                                }
                                break;
+                       case 9:
+                               if (CONF_NTP_SERVERS_MAX >= 1) {
+                                       ic_ntp_servers[0] = in_aton(ip);
+                                       if (ic_ntp_servers[0] == ANY)
+                                               ic_ntp_servers[0] = NONE;
+                               }
+                               break;
                        }
                }
                ip = cp;
index 44b308d93ec2496899a74bcc378d6e50fbb41055..1ef37e2e267902f4b658a20262e00a75bea90b0a 100644 (file)
@@ -34,6 +34,7 @@
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
 MODULE_DESCRIPTION("IPv4 packet filter");
+MODULE_ALIAS("ipt_icmp");
 
 void *ipt_alloc_initial_table(const struct xt_table *info)
 {
@@ -300,7 +301,7 @@ ipt_do_table(struct sk_buff *skb,
                counter = xt_get_this_cpu_counter(&e->counters);
                ADD_COUNTER(*counter, skb->len, 1);
 
-               t = ipt_get_target(e);
+               t = ipt_get_target_c(e);
                WARN_ON(!t->u.kernel.target);
 
 #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
index a03e4e7ef5f90eb55fdf8aba91ccf3b635ffb4ff..ce1512b02cb203a549529967eb602b467644a2d5 100644 (file)
@@ -47,7 +47,7 @@ static int masquerade_tg_check(const struct xt_tgchk_param *par)
 static unsigned int
 masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
-       struct nf_nat_range range;
+       struct nf_nat_range2 range;
        const struct nf_nat_ipv4_multi_range_compat *mr;
 
        mr = par->targinfo;
index fd01f13c896a153c6ec54b2df5503da6c311cf4f..12843c9ef1421d204fba6bea42a85615e2e69cc7 100644 (file)
@@ -89,10 +89,10 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
                        return true ^ invert;
        }
 
+       memset(&flow, 0, sizeof(flow));
        flow.flowi4_iif = LOOPBACK_IFINDEX;
        flow.daddr = iph->saddr;
        flow.saddr = rpfilter_get_saddr(iph->daddr);
-       flow.flowi4_oif = 0;
        flow.flowi4_mark = info->flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0;
        flow.flowi4_tos = RT_TOS(iph->tos);
        flow.flowi4_scope = RT_SCOPE_UNIVERSE;
index 0f7255cc65ee14bc0f028e3d2bc793c33bcc57f8..529d89ec31e87c8a4528b98f81d69c8e0c9e7e41 100644 (file)
@@ -33,8 +33,7 @@ static const struct xt_table nf_nat_ipv4_table = {
 
 static unsigned int iptable_nat_do_chain(void *priv,
                                         struct sk_buff *skb,
-                                        const struct nf_hook_state *state,
-                                        struct nf_conn *ct)
+                                        const struct nf_hook_state *state)
 {
        return ipt_do_table(skb, state, state->net->ipv4.nat_table);
 }
index 0cd46bffa46914efab9f26b7d85d7612f1b41450..e1e56d7123d2c62b72e06f4e28d5a3cb3d24928b 100644 (file)
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/netfilter.h>
-#include <linux/rhashtable.h>
-#include <linux/ip.h>
-#include <linux/netdevice.h>
-#include <net/ip.h>
-#include <net/neighbour.h>
 #include <net/netfilter/nf_flow_table.h>
 #include <net/netfilter/nf_tables.h>
-/* For layer 4 checksum field offset. */
-#include <linux/tcp.h>
-#include <linux/udp.h>
-
-static int nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff,
-                             __be32 addr, __be32 new_addr)
-{
-       struct tcphdr *tcph;
-
-       if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
-           skb_try_make_writable(skb, thoff + sizeof(*tcph)))
-               return -1;
-
-       tcph = (void *)(skb_network_header(skb) + thoff);
-       inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, true);
-
-       return 0;
-}
-
-static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
-                             __be32 addr, __be32 new_addr)
-{
-       struct udphdr *udph;
-
-       if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
-           skb_try_make_writable(skb, thoff + sizeof(*udph)))
-               return -1;
-
-       udph = (void *)(skb_network_header(skb) + thoff);
-       if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
-               inet_proto_csum_replace4(&udph->check, skb, addr,
-                                        new_addr, true);
-               if (!udph->check)
-                       udph->check = CSUM_MANGLED_0;
-       }
-
-       return 0;
-}
-
-static int nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph,
-                                 unsigned int thoff, __be32 addr,
-                                 __be32 new_addr)
-{
-       switch (iph->protocol) {
-       case IPPROTO_TCP:
-               if (nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr) < 0)
-                       return NF_DROP;
-               break;
-       case IPPROTO_UDP:
-               if (nf_flow_nat_ip_udp(skb, thoff, addr, new_addr) < 0)
-                       return NF_DROP;
-               break;
-       }
-
-       return 0;
-}
-
-static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb,
-                          struct iphdr *iph, unsigned int thoff,
-                          enum flow_offload_tuple_dir dir)
-{
-       __be32 addr, new_addr;
-
-       switch (dir) {
-       case FLOW_OFFLOAD_DIR_ORIGINAL:
-               addr = iph->saddr;
-               new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr;
-               iph->saddr = new_addr;
-               break;
-       case FLOW_OFFLOAD_DIR_REPLY:
-               addr = iph->daddr;
-               new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
-               iph->daddr = new_addr;
-               break;
-       default:
-               return -1;
-       }
-       csum_replace4(&iph->check, addr, new_addr);
-
-       return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
-}
-
-static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb,
-                          struct iphdr *iph, unsigned int thoff,
-                          enum flow_offload_tuple_dir dir)
-{
-       __be32 addr, new_addr;
-
-       switch (dir) {
-       case FLOW_OFFLOAD_DIR_ORIGINAL:
-               addr = iph->daddr;
-               new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr;
-               iph->daddr = new_addr;
-               break;
-       case FLOW_OFFLOAD_DIR_REPLY:
-               addr = iph->saddr;
-               new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
-               iph->saddr = new_addr;
-               break;
-       default:
-               return -1;
-       }
-       csum_replace4(&iph->check, addr, new_addr);
-
-       return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
-}
-
-static int nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
-                         enum flow_offload_tuple_dir dir)
-{
-       struct iphdr *iph = ip_hdr(skb);
-       unsigned int thoff = iph->ihl * 4;
-
-       if (flow->flags & FLOW_OFFLOAD_SNAT &&
-           (nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
-            nf_flow_snat_ip(flow, skb, iph, thoff, dir) < 0))
-               return -1;
-       if (flow->flags & FLOW_OFFLOAD_DNAT &&
-           (nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
-            nf_flow_dnat_ip(flow, skb, iph, thoff, dir) < 0))
-               return -1;
-
-       return 0;
-}
-
-static bool ip_has_options(unsigned int thoff)
-{
-       return thoff != sizeof(struct iphdr);
-}
-
-static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
-                           struct flow_offload_tuple *tuple)
-{
-       struct flow_ports *ports;
-       unsigned int thoff;
-       struct iphdr *iph;
-
-       if (!pskb_may_pull(skb, sizeof(*iph)))
-               return -1;
-
-       iph = ip_hdr(skb);
-       thoff = iph->ihl * 4;
-
-       if (ip_is_fragment(iph) ||
-           unlikely(ip_has_options(thoff)))
-               return -1;
-
-       if (iph->protocol != IPPROTO_TCP &&
-           iph->protocol != IPPROTO_UDP)
-               return -1;
-
-       thoff = iph->ihl * 4;
-       if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
-               return -1;
-
-       ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
-
-       tuple->src_v4.s_addr    = iph->saddr;
-       tuple->dst_v4.s_addr    = iph->daddr;
-       tuple->src_port         = ports->source;
-       tuple->dst_port         = ports->dest;
-       tuple->l3proto          = AF_INET;
-       tuple->l4proto          = iph->protocol;
-       tuple->iifidx           = dev->ifindex;
-
-       return 0;
-}
-
-/* Based on ip_exceeds_mtu(). */
-static bool __nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
-{
-       if (skb->len <= mtu)
-               return false;
-
-       if ((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0)
-               return false;
-
-       if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
-               return false;
-
-       return true;
-}
-
-static bool nf_flow_exceeds_mtu(struct sk_buff *skb, const struct rtable *rt)
-{
-       u32 mtu;
-
-       mtu = ip_dst_mtu_maybe_forward(&rt->dst, true);
-       if (__nf_flow_exceeds_mtu(skb, mtu))
-               return true;
-
-       return false;
-}
-
-unsigned int
-nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
-                       const struct nf_hook_state *state)
-{
-       struct flow_offload_tuple_rhash *tuplehash;
-       struct nf_flowtable *flow_table = priv;
-       struct flow_offload_tuple tuple = {};
-       enum flow_offload_tuple_dir dir;
-       struct flow_offload *flow;
-       struct net_device *outdev;
-       const struct rtable *rt;
-       struct iphdr *iph;
-       __be32 nexthop;
-
-       if (skb->protocol != htons(ETH_P_IP))
-               return NF_ACCEPT;
-
-       if (nf_flow_tuple_ip(skb, state->in, &tuple) < 0)
-               return NF_ACCEPT;
-
-       tuplehash = flow_offload_lookup(flow_table, &tuple);
-       if (tuplehash == NULL)
-               return NF_ACCEPT;
-
-       outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx);
-       if (!outdev)
-               return NF_ACCEPT;
-
-       dir = tuplehash->tuple.dir;
-       flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
-
-       rt = (const struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
-       if (unlikely(nf_flow_exceeds_mtu(skb, rt)))
-               return NF_ACCEPT;
-
-       if (skb_try_make_writable(skb, sizeof(*iph)))
-               return NF_DROP;
-
-       if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) &&
-           nf_flow_nat_ip(flow, skb, dir) < 0)
-               return NF_DROP;
-
-       flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
-       iph = ip_hdr(skb);
-       ip_decrease_ttl(iph);
-
-       skb->dev = outdev;
-       nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
-       neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
-
-       return NF_STOLEN;
-}
-EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook);
 
 static struct nf_flowtable_type flowtable_ipv4 = {
        .family         = NFPROTO_IPV4,
-       .params         = &nf_flow_offload_rhash_params,
-       .gc             = nf_flow_offload_work_gc,
+       .init           = nf_flow_table_init,
        .free           = nf_flow_table_free,
        .hook           = nf_flow_offload_ip_hook,
        .owner          = THIS_MODULE,
index ac8342dcb55eb51680d1f4559d57c2e99509b505..4e6b53ab6c3376a8f5ef363f9b2d9ea72cb95b55 100644 (file)
@@ -395,7 +395,7 @@ static int nat_h245(struct sk_buff *skb, struct nf_conn *ct,
 static void ip_nat_q931_expect(struct nf_conn *new,
                               struct nf_conntrack_expect *this)
 {
-       struct nf_nat_range range;
+       struct nf_nat_range2 range;
 
        if (this->tuple.src.u3.ip != 0) {       /* Only accept calls from GK */
                nf_nat_follow_master(new, this);
@@ -497,7 +497,7 @@ static int nat_q931(struct sk_buff *skb, struct nf_conn *ct,
 static void ip_nat_callforwarding_expect(struct nf_conn *new,
                                         struct nf_conntrack_expect *this)
 {
-       struct nf_nat_range range;
+       struct nf_nat_range2 range;
 
        /* This must be a fresh one. */
        BUG_ON(new->status & IPS_NAT_DONE_MASK);
index f7ff6a364d7bcd556645b20f4aae74093620290b..325e02956bf59921442119818c325e5ccb10c3d9 100644 (file)
@@ -63,7 +63,7 @@ static void nf_nat_ipv4_decode_session(struct sk_buff *skb,
 #endif /* CONFIG_XFRM */
 
 static bool nf_nat_ipv4_in_range(const struct nf_conntrack_tuple *t,
-                                const struct nf_nat_range *range)
+                                const struct nf_nat_range2 *range)
 {
        return ntohl(t->src.u3.ip) >= ntohl(range->min_addr.ip) &&
               ntohl(t->src.u3.ip) <= ntohl(range->max_addr.ip);
@@ -143,7 +143,7 @@ static void nf_nat_ipv4_csum_recalc(struct sk_buff *skb,
 
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
 static int nf_nat_ipv4_nlattr_to_range(struct nlattr *tb[],
-                                      struct nf_nat_range *range)
+                                      struct nf_nat_range2 *range)
 {
        if (tb[CTA_NAT_V4_MINIP]) {
                range->min_addr.ip = nla_get_be32(tb[CTA_NAT_V4_MINIP]);
@@ -246,8 +246,7 @@ nf_nat_ipv4_fn(void *priv, struct sk_buff *skb,
               const struct nf_hook_state *state,
               unsigned int (*do_chain)(void *priv,
                                        struct sk_buff *skb,
-                                       const struct nf_hook_state *state,
-                                       struct nf_conn *ct))
+                                       const struct nf_hook_state *state))
 {
        struct nf_conn *ct;
        enum ip_conntrack_info ctinfo;
@@ -285,7 +284,7 @@ nf_nat_ipv4_fn(void *priv, struct sk_buff *skb,
                if (!nf_nat_initialized(ct, maniptype)) {
                        unsigned int ret;
 
-                       ret = do_chain(priv, skb, state, ct);
+                       ret = do_chain(priv, skb, state);
                        if (ret != NF_ACCEPT)
                                return ret;
 
@@ -326,8 +325,7 @@ nf_nat_ipv4_in(void *priv, struct sk_buff *skb,
               const struct nf_hook_state *state,
               unsigned int (*do_chain)(void *priv,
                                         struct sk_buff *skb,
-                                        const struct nf_hook_state *state,
-                                        struct nf_conn *ct))
+                                        const struct nf_hook_state *state))
 {
        unsigned int ret;
        __be32 daddr = ip_hdr(skb)->daddr;
@@ -346,8 +344,7 @@ nf_nat_ipv4_out(void *priv, struct sk_buff *skb,
                const struct nf_hook_state *state,
                unsigned int (*do_chain)(void *priv,
                                          struct sk_buff *skb,
-                                         const struct nf_hook_state *state,
-                                         struct nf_conn *ct))
+                                         const struct nf_hook_state *state))
 {
 #ifdef CONFIG_XFRM
        const struct nf_conn *ct;
@@ -383,8 +380,7 @@ nf_nat_ipv4_local_fn(void *priv, struct sk_buff *skb,
                     const struct nf_hook_state *state,
                     unsigned int (*do_chain)(void *priv,
                                               struct sk_buff *skb,
-                                              const struct nf_hook_state *state,
-                                              struct nf_conn *ct))
+                                              const struct nf_hook_state *state))
 {
        const struct nf_conn *ct;
        enum ip_conntrack_info ctinfo;
index 0c366aad89cb47bcad1e553f388825a40730f6d9..f538c500154711bd7b419b5ddc9fd544d12b6755 100644 (file)
 
 unsigned int
 nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
-                      const struct nf_nat_range *range,
+                      const struct nf_nat_range2 *range,
                       const struct net_device *out)
 {
        struct nf_conn *ct;
        struct nf_conn_nat *nat;
        enum ip_conntrack_info ctinfo;
-       struct nf_nat_range newrange;
+       struct nf_nat_range2 newrange;
        const struct rtable *rt;
        __be32 newsrc, nh;
 
index 8a69363b48846c628994e54c92a354ca46f71ebc..5d259a12e25facc28f642a0efe0b5a2d5ca464ea 100644 (file)
@@ -48,7 +48,7 @@ static void pptp_nat_expected(struct nf_conn *ct,
        struct nf_conntrack_tuple t = {};
        const struct nf_ct_pptp_master *ct_pptp_info;
        const struct nf_nat_pptp *nat_pptp_info;
-       struct nf_nat_range range;
+       struct nf_nat_range2 range;
        struct nf_conn_nat *nat;
 
        nat = nf_ct_nat_ext_add(ct);
index edf05002d674eea2feb73a835577a158d9a6fa8f..00fda6331ce5a4204d6d72d198e4517cbb8aa684 100644 (file)
@@ -41,7 +41,7 @@ MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE");
 static void
 gre_unique_tuple(const struct nf_nat_l3proto *l3proto,
                 struct nf_conntrack_tuple *tuple,
-                const struct nf_nat_range *range,
+                const struct nf_nat_range2 *range,
                 enum nf_nat_manip_type maniptype,
                 const struct nf_conn *ct)
 {
index 7b98baa13edeb1e9b944df54cffaa9ba669cef77..6d7cf1d79baf3e987d628882a542b349ae2dcb3e 100644 (file)
@@ -30,7 +30,7 @@ icmp_in_range(const struct nf_conntrack_tuple *tuple,
 static void
 icmp_unique_tuple(const struct nf_nat_l3proto *l3proto,
                  struct nf_conntrack_tuple *tuple,
-                 const struct nf_nat_range *range,
+                 const struct nf_nat_range2 *range,
                  enum nf_nat_manip_type maniptype,
                  const struct nf_conn *ct)
 {
index b5464a3f253baab1d06628fe8d039f30645c180a..285baccfbdea5f731d2c775aeb48187be9a55568 100644 (file)
@@ -28,8 +28,7 @@
 
 static unsigned int nft_nat_do_chain(void *priv,
                                      struct sk_buff *skb,
-                                     const struct nf_hook_state *state,
-                                     struct nf_conn *ct)
+                                     const struct nf_hook_state *state)
 {
        struct nft_pktinfo pkt;
 
index f18677277119305aeea043d81deb4e6ee7d20b7c..f1193e1e928aa14aaa88371d78558a46ed9c9d59 100644 (file)
@@ -21,7 +21,7 @@ static void nft_masq_ipv4_eval(const struct nft_expr *expr,
                               const struct nft_pktinfo *pkt)
 {
        struct nft_masq *priv = nft_expr_priv(expr);
-       struct nf_nat_range range;
+       struct nf_nat_range2 range;
 
        memset(&range, 0, sizeof(range));
        range.flags = priv->flags;
index 05e47d77700993568a507d8e3b8bf30d5f843d8f..56a010622f70d08ccf541f51c83e8a601c07646a 100644 (file)
@@ -775,8 +775,10 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
        ipc.addr = faddr = daddr;
 
        if (ipc.opt && ipc.opt->opt.srr) {
-               if (!daddr)
-                       return -EINVAL;
+               if (!daddr) {
+                       err = -EINVAL;
+                       goto out_free;
+               }
                faddr = ipc.opt->opt.faddr;
        }
        tos = get_rttos(&ipc, inet);
@@ -842,6 +844,7 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 
 out:
        ip_rt_put(rt);
+out_free:
        if (free)
                kfree(ipc.opt);
        if (!err) {
index 261b71d0ccc5c17c6032bf67eb8f842006766e64..6c1ff89a60fa0a3485dcc71fafc799e798d5dc11 100644 (file)
@@ -298,6 +298,7 @@ static const struct snmp_mib snmp4_net_list[] = {
        SNMP_MIB_ITEM("TCPMTUPSuccess", LINUX_MIB_TCPMTUPSUCCESS),
        SNMP_MIB_ITEM("TCPDelivered", LINUX_MIB_TCPDELIVERED),
        SNMP_MIB_ITEM("TCPDeliveredCE", LINUX_MIB_TCPDELIVEREDCE),
+       SNMP_MIB_ITEM("TCPAckCompressed", LINUX_MIB_TCPACKCOMPRESSED),
        SNMP_MIB_SENTINEL
 };
 
index ccb25d80f67956b12b3ffd57f7ac5ddc9b2cb6c0..2cfa1b518f8d6368a563c1ae14d7dff7ce43e473 100644 (file)
@@ -709,7 +709,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
                fnhe->fnhe_gw = gw;
                fnhe->fnhe_pmtu = pmtu;
                fnhe->fnhe_mtu_locked = lock;
-               fnhe->fnhe_expires = expires;
+               fnhe->fnhe_expires = max(1UL, expires);
 
                /* Exception created; mark the cached routes for the nexthop
                 * stale, so anyone caching it rechecks if this exception
@@ -1297,6 +1297,36 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst)
        return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
 }
 
+static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr)
+{
+       struct fnhe_hash_bucket *hash;
+       struct fib_nh_exception *fnhe, __rcu **fnhe_p;
+       u32 hval = fnhe_hashfun(daddr);
+
+       spin_lock_bh(&fnhe_lock);
+
+       hash = rcu_dereference_protected(nh->nh_exceptions,
+                                        lockdep_is_held(&fnhe_lock));
+       hash += hval;
+
+       fnhe_p = &hash->chain;
+       fnhe = rcu_dereference_protected(*fnhe_p, lockdep_is_held(&fnhe_lock));
+       while (fnhe) {
+               if (fnhe->fnhe_daddr == daddr) {
+                       rcu_assign_pointer(*fnhe_p, rcu_dereference_protected(
+                               fnhe->fnhe_next, lockdep_is_held(&fnhe_lock)));
+                       fnhe_flush_routes(fnhe);
+                       kfree_rcu(fnhe, rcu);
+                       break;
+               }
+               fnhe_p = &fnhe->fnhe_next;
+               fnhe = rcu_dereference_protected(fnhe->fnhe_next,
+                                                lockdep_is_held(&fnhe_lock));
+       }
+
+       spin_unlock_bh(&fnhe_lock);
+}
+
 static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
 {
        struct fnhe_hash_bucket *hash = rcu_dereference(nh->nh_exceptions);
@@ -1310,8 +1340,14 @@ static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
 
        for (fnhe = rcu_dereference(hash[hval].chain); fnhe;
             fnhe = rcu_dereference(fnhe->fnhe_next)) {
-               if (fnhe->fnhe_daddr == daddr)
+               if (fnhe->fnhe_daddr == daddr) {
+                       if (fnhe->fnhe_expires &&
+                           time_after(jiffies, fnhe->fnhe_expires)) {
+                               ip_del_fnhe(nh, daddr);
+                               break;
+                       }
                        return fnhe;
+               }
        }
        return NULL;
 }
@@ -1339,6 +1375,7 @@ static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
                        fnhe->fnhe_gw = 0;
                        fnhe->fnhe_pmtu = 0;
                        fnhe->fnhe_expires = 0;
+                       fnhe->fnhe_mtu_locked = false;
                        fnhe_flush_routes(fnhe);
                        orig = NULL;
                }
@@ -1636,36 +1673,6 @@ static void ip_handle_martian_source(struct net_device *dev,
 #endif
 }
 
-static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr)
-{
-       struct fnhe_hash_bucket *hash;
-       struct fib_nh_exception *fnhe, __rcu **fnhe_p;
-       u32 hval = fnhe_hashfun(daddr);
-
-       spin_lock_bh(&fnhe_lock);
-
-       hash = rcu_dereference_protected(nh->nh_exceptions,
-                                        lockdep_is_held(&fnhe_lock));
-       hash += hval;
-
-       fnhe_p = &hash->chain;
-       fnhe = rcu_dereference_protected(*fnhe_p, lockdep_is_held(&fnhe_lock));
-       while (fnhe) {
-               if (fnhe->fnhe_daddr == daddr) {
-                       rcu_assign_pointer(*fnhe_p, rcu_dereference_protected(
-                               fnhe->fnhe_next, lockdep_is_held(&fnhe_lock)));
-                       fnhe_flush_routes(fnhe);
-                       kfree_rcu(fnhe, rcu);
-                       break;
-               }
-               fnhe_p = &fnhe->fnhe_next;
-               fnhe = rcu_dereference_protected(fnhe->fnhe_next,
-                                                lockdep_is_held(&fnhe_lock));
-       }
-
-       spin_unlock_bh(&fnhe_lock);
-}
-
 /* called in rcu_read_lock() section */
 static int __mkroute_input(struct sk_buff *skb,
                           const struct fib_result *res,
@@ -1719,20 +1726,10 @@ static int __mkroute_input(struct sk_buff *skb,
 
        fnhe = find_exception(&FIB_RES_NH(*res), daddr);
        if (do_cache) {
-               if (fnhe) {
+               if (fnhe)
                        rth = rcu_dereference(fnhe->fnhe_rth_input);
-                       if (rth && rth->dst.expires &&
-                           time_after(jiffies, rth->dst.expires)) {
-                               ip_del_fnhe(&FIB_RES_NH(*res), daddr);
-                               fnhe = NULL;
-                       } else {
-                               goto rt_cache;
-                       }
-               }
-
-               rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
-
-rt_cache:
+               else
+                       rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
                if (rt_cache_valid(rth)) {
                        skb_dst_set_noref(skb, &rth->dst);
                        goto out;
@@ -1964,8 +1961,13 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
        fl4.saddr = saddr;
        fl4.flowi4_uid = sock_net_uid(net, NULL);
 
-       if (fib4_rules_early_flow_dissect(net, skb, &fl4, &_flkeys))
+       if (fib4_rules_early_flow_dissect(net, skb, &fl4, &_flkeys)) {
                flkeys = &_flkeys;
+       } else {
+               fl4.flowi4_proto = 0;
+               fl4.fl4_sport = 0;
+               fl4.fl4_dport = 0;
+       }
 
        err = fib_lookup(net, &fl4, res, 0);
        if (err != 0) {
@@ -2216,39 +2218,31 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
                 * the loopback interface and the IP_PKTINFO ipi_ifindex will
                 * be set to the loopback interface as well.
                 */
-               fi = NULL;
+               do_cache = false;
        }
 
        fnhe = NULL;
        do_cache &= fi != NULL;
-       if (do_cache) {
+       if (fi) {
                struct rtable __rcu **prth;
                struct fib_nh *nh = &FIB_RES_NH(*res);
 
                fnhe = find_exception(nh, fl4->daddr);
+               if (!do_cache)
+                       goto add;
                if (fnhe) {
                        prth = &fnhe->fnhe_rth_output;
-                       rth = rcu_dereference(*prth);
-                       if (rth && rth->dst.expires &&
-                           time_after(jiffies, rth->dst.expires)) {
-                               ip_del_fnhe(nh, fl4->daddr);
-                               fnhe = NULL;
-                       } else {
-                               goto rt_cache;
+               } else {
+                       if (unlikely(fl4->flowi4_flags &
+                                    FLOWI_FLAG_KNOWN_NH &&
+                                    !(nh->nh_gw &&
+                                      nh->nh_scope == RT_SCOPE_LINK))) {
+                               do_cache = false;
+                               goto add;
                        }
+                       prth = raw_cpu_ptr(nh->nh_pcpu_rth_output);
                }
-
-               if (unlikely(fl4->flowi4_flags &
-                            FLOWI_FLAG_KNOWN_NH &&
-                            !(nh->nh_gw &&
-                              nh->nh_scope == RT_SCOPE_LINK))) {
-                       do_cache = false;
-                       goto add;
-               }
-               prth = raw_cpu_ptr(nh->nh_pcpu_rth_output);
                rth = rcu_dereference(*prth);
-
-rt_cache:
                if (rt_cache_valid(rth) && dst_hold_safe(&rth->dst))
                        return rth;
        }
index 4b195bac8ac0eefe0a224528ad854338c4f8e6e3..d2eed3ddcb0a1ad9778d96d46c685f6c60b93d8d 100644 (file)
@@ -46,6 +46,7 @@ static int tcp_syn_retries_min = 1;
 static int tcp_syn_retries_max = MAX_TCP_SYNCNT;
 static int ip_ping_group_range_min[] = { 0, 0 };
 static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX };
+static int comp_sack_nr_max = 255;
 
 /* obsolete */
 static int sysctl_tcp_low_latency __read_mostly;
@@ -1151,6 +1152,22 @@ static struct ctl_table ipv4_net_table[] = {
                .proc_handler   = proc_dointvec_minmax,
                .extra1         = &one,
        },
+       {
+               .procname       = "tcp_comp_sack_delay_ns",
+               .data           = &init_net.ipv4.sysctl_tcp_comp_sack_delay_ns,
+               .maxlen         = sizeof(unsigned long),
+               .mode           = 0644,
+               .proc_handler   = proc_doulongvec_minmax,
+       },
+       {
+               .procname       = "tcp_comp_sack_nr",
+               .data           = &init_net.ipv4.sysctl_tcp_comp_sack_nr,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &zero,
+               .extra2         = &comp_sack_nr_max,
+       },
        {
                .procname       = "udp_rmem_min",
                .data           = &init_net.ipv4.sysctl_udp_rmem_min,
index dfd090ea54ad47112fc23c61180b5bf8edd2c736..0a2ea0bbf867271db05aedd7d48b193677664321 100644 (file)
@@ -697,7 +697,7 @@ static bool tcp_should_autocork(struct sock *sk, struct sk_buff *skb,
 {
        return skb->len < size_goal &&
               sock_net(sk)->ipv4.sysctl_tcp_autocorking &&
-              skb != tcp_write_queue_head(sk) &&
+              !tcp_rtx_queue_empty(sk) &&
               refcount_read(&sk->sk_wmem_alloc) > skb->truesize;
 }
 
@@ -1204,7 +1204,8 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
                        uarg->zerocopy = 0;
        }
 
-       if (unlikely(flags & MSG_FASTOPEN || inet_sk(sk)->defer_connect)) {
+       if (unlikely(flags & MSG_FASTOPEN || inet_sk(sk)->defer_connect) &&
+           !tp->repair) {
                err = tcp_sendmsg_fastopen(sk, msg, &copied_syn, size);
                if (err == -EINPROGRESS && copied_syn > 0)
                        goto out;
@@ -1726,118 +1727,113 @@ int tcp_set_rcvlowat(struct sock *sk, int val)
 }
 EXPORT_SYMBOL(tcp_set_rcvlowat);
 
-/* When user wants to mmap X pages, we first need to perform the mapping
- * before freeing any skbs in receive queue, otherwise user would be unable
- * to fallback to standard recvmsg(). This happens if some data in the
- * requested block is not exactly fitting in a page.
- *
- * We only support order-0 pages for the moment.
- * mmap() on TCP is very strict, there is no point
- * trying to accommodate with pathological layouts.
- */
+#ifdef CONFIG_MMU
+static const struct vm_operations_struct tcp_vm_ops = {
+};
+
 int tcp_mmap(struct file *file, struct socket *sock,
             struct vm_area_struct *vma)
 {
-       unsigned long size = vma->vm_end - vma->vm_start;
-       unsigned int nr_pages = size >> PAGE_SHIFT;
-       struct page **pages_array = NULL;
-       u32 seq, len, offset, nr = 0;
-       struct sock *sk = sock->sk;
-       const skb_frag_t *frags;
+       if (vma->vm_flags & (VM_WRITE | VM_EXEC))
+               return -EPERM;
+       vma->vm_flags &= ~(VM_MAYWRITE | VM_MAYEXEC);
+
+       /* Instruct vm_insert_page() to not down_read(mmap_sem) */
+       vma->vm_flags |= VM_MIXEDMAP;
+
+       vma->vm_ops = &tcp_vm_ops;
+       return 0;
+}
+EXPORT_SYMBOL(tcp_mmap);
+
+static int tcp_zerocopy_receive(struct sock *sk,
+                               struct tcp_zerocopy_receive *zc)
+{
+       unsigned long address = (unsigned long)zc->address;
+       const skb_frag_t *frags = NULL;
+       u32 length = 0, seq, offset;
+       struct vm_area_struct *vma;
+       struct sk_buff *skb = NULL;
        struct tcp_sock *tp;
-       struct sk_buff *skb;
        int ret;
 
-       if (vma->vm_pgoff || !nr_pages)
+       if (address & (PAGE_SIZE - 1) || address != zc->address)
                return -EINVAL;
 
-       if (vma->vm_flags & VM_WRITE)
-               return -EPERM;
-       /* TODO: Maybe the following is not needed if pages are COW */
-       vma->vm_flags &= ~VM_MAYWRITE;
-
-       lock_sock(sk);
-
-       ret = -ENOTCONN;
        if (sk->sk_state == TCP_LISTEN)
-               goto out;
+               return -ENOTCONN;
 
        sock_rps_record_flow(sk);
 
-       if (tcp_inq(sk) < size) {
-               ret = sock_flag(sk, SOCK_DONE) ? -EIO : -EAGAIN;
+       down_read(&current->mm->mmap_sem);
+
+       ret = -EINVAL;
+       vma = find_vma(current->mm, address);
+       if (!vma || vma->vm_start > address || vma->vm_ops != &tcp_vm_ops)
                goto out;
-       }
+       zc->length = min_t(unsigned long, zc->length, vma->vm_end - address);
+
        tp = tcp_sk(sk);
        seq = tp->copied_seq;
-       /* Abort if urgent data is in the area */
-       if (unlikely(tp->urg_data)) {
-               u32 urg_offset = tp->urg_seq - seq;
+       zc->length = min_t(u32, zc->length, tcp_inq(sk));
+       zc->length &= ~(PAGE_SIZE - 1);
 
-               ret = -EINVAL;
-               if (urg_offset < size)
-                       goto out;
-       }
-       ret = -ENOMEM;
-       pages_array = kvmalloc_array(nr_pages, sizeof(struct page *),
-                                    GFP_KERNEL);
-       if (!pages_array)
-               goto out;
-       skb = tcp_recv_skb(sk, seq, &offset);
-       ret = -EINVAL;
-skb_start:
-       /* We do not support anything not in page frags */
-       offset -= skb_headlen(skb);
-       if ((int)offset < 0)
-               goto out;
-       if (skb_has_frag_list(skb))
-               goto out;
-       len = skb->data_len - offset;
-       frags = skb_shinfo(skb)->frags;
-       while (offset) {
-               if (frags->size > offset)
-                       goto out;
-               offset -= frags->size;
-               frags++;
-       }
-       while (nr < nr_pages) {
-               if (len) {
-                       if (len < PAGE_SIZE)
-                               goto out;
-                       if (frags->size != PAGE_SIZE || frags->page_offset)
-                               goto out;
-                       pages_array[nr++] = skb_frag_page(frags);
-                       frags++;
-                       len -= PAGE_SIZE;
-                       seq += PAGE_SIZE;
-                       continue;
+       zap_page_range(vma, address, zc->length);
+
+       zc->recv_skip_hint = 0;
+       ret = 0;
+       while (length + PAGE_SIZE <= zc->length) {
+               if (zc->recv_skip_hint < PAGE_SIZE) {
+                       if (skb) {
+                               skb = skb->next;
+                               offset = seq - TCP_SKB_CB(skb)->seq;
+                       } else {
+                               skb = tcp_recv_skb(sk, seq, &offset);
+                       }
+
+                       zc->recv_skip_hint = skb->len - offset;
+                       offset -= skb_headlen(skb);
+                       if ((int)offset < 0 || skb_has_frag_list(skb))
+                               break;
+                       frags = skb_shinfo(skb)->frags;
+                       while (offset) {
+                               if (frags->size > offset)
+                                       goto out;
+                               offset -= frags->size;
+                               frags++;
+                       }
                }
-               skb = skb->next;
-               offset = seq - TCP_SKB_CB(skb)->seq;
-               goto skb_start;
-       }
-       /* OK, we have a full set of pages ready to be inserted into vma */
-       for (nr = 0; nr < nr_pages; nr++) {
-               ret = vm_insert_page(vma, vma->vm_start + (nr << PAGE_SHIFT),
-                                    pages_array[nr]);
+               if (frags->size != PAGE_SIZE || frags->page_offset)
+                       break;
+               ret = vm_insert_page(vma, address + length,
+                                    skb_frag_page(frags));
                if (ret)
-                       goto out;
+                       break;
+               length += PAGE_SIZE;
+               seq += PAGE_SIZE;
+               zc->recv_skip_hint -= PAGE_SIZE;
+               frags++;
        }
-       /* operation is complete, we can 'consume' all skbs */
-       tp->copied_seq = seq;
-       tcp_rcv_space_adjust(sk);
-
-       /* Clean up data we have read: This will do ACK frames. */
-       tcp_recv_skb(sk, seq, &offset);
-       tcp_cleanup_rbuf(sk, size);
-
-       ret = 0;
 out:
-       release_sock(sk);
-       kvfree(pages_array);
+       up_read(&current->mm->mmap_sem);
+       if (length) {
+               tp->copied_seq = seq;
+               tcp_rcv_space_adjust(sk);
+
+               /* Clean up data we have read: This will do ACK frames. */
+               tcp_recv_skb(sk, seq, &offset);
+               tcp_cleanup_rbuf(sk, length);
+               ret = 0;
+               if (length == zc->length)
+                       zc->recv_skip_hint = 0;
+       } else {
+               if (!zc->recv_skip_hint && sock_flag(sk, SOCK_DONE))
+                       ret = -EIO;
+       }
+       zc->length = length;
        return ret;
 }
-EXPORT_SYMBOL(tcp_mmap);
+#endif
 
 static void tcp_update_recv_tstamps(struct sk_buff *skb,
                                    struct scm_timestamping *tss)
@@ -1894,6 +1890,22 @@ static void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk,
        }
 }
 
+static int tcp_inq_hint(struct sock *sk)
+{
+       const struct tcp_sock *tp = tcp_sk(sk);
+       u32 copied_seq = READ_ONCE(tp->copied_seq);
+       u32 rcv_nxt = READ_ONCE(tp->rcv_nxt);
+       int inq;
+
+       inq = rcv_nxt - copied_seq;
+       if (unlikely(inq < 0 || copied_seq != READ_ONCE(tp->copied_seq))) {
+               lock_sock(sk);
+               inq = tp->rcv_nxt - tp->copied_seq;
+               release_sock(sk);
+       }
+       return inq;
+}
+
 /*
  *     This routine copies from a sock struct into the user buffer.
  *
@@ -1910,13 +1922,14 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
        u32 peek_seq;
        u32 *seq;
        unsigned long used;
-       int err;
+       int err, inq;
        int target;             /* Read at least this many bytes */
        long timeo;
        struct sk_buff *skb, *last;
        u32 urg_hole = 0;
        struct scm_timestamping tss;
        bool has_tss = false;
+       bool has_cmsg;
 
        if (unlikely(flags & MSG_ERRQUEUE))
                return inet_recv_error(sk, msg, len, addr_len);
@@ -1931,6 +1944,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
        if (sk->sk_state == TCP_LISTEN)
                goto out;
 
+       has_cmsg = tp->recvmsg_inq;
        timeo = sock_rcvtimeo(sk, nonblock);
 
        /* Urgent data needs to be handled specially. */
@@ -2117,6 +2131,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
                if (TCP_SKB_CB(skb)->has_rxtstamp) {
                        tcp_update_recv_tstamps(skb, &tss);
                        has_tss = true;
+                       has_cmsg = true;
                }
                if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
                        goto found_fin_ok;
@@ -2136,13 +2151,20 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
         * on connected socket. I was just happy when found this 8) --ANK
         */
 
-       if (has_tss)
-               tcp_recv_timestamp(msg, sk, &tss);
-
        /* Clean up data we have read: This will do ACK frames. */
        tcp_cleanup_rbuf(sk, copied);
 
        release_sock(sk);
+
+       if (has_cmsg) {
+               if (has_tss)
+                       tcp_recv_timestamp(msg, sk, &tss);
+               if (tp->recvmsg_inq) {
+                       inq = tcp_inq_hint(sk);
+                       put_cmsg(msg, SOL_TCP, TCP_CM_INQ, sizeof(inq), &inq);
+               }
+       }
+
        return copied;
 
 out:
@@ -2573,6 +2595,7 @@ int tcp_disconnect(struct sock *sk, int flags)
        dst_release(sk->sk_rx_dst);
        sk->sk_rx_dst = NULL;
        tcp_saved_syn_free(tp);
+       tp->compressed_ack = 0;
 
        /* Clean up fastopen related fields */
        tcp_free_fastopen_req(tp);
@@ -2812,7 +2835,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
        case TCP_REPAIR_QUEUE:
                if (!tp->repair)
                        err = -EPERM;
-               else if (val < TCP_QUEUES_NR)
+               else if ((unsigned int)val < TCP_QUEUES_NR)
                        tp->repair_queue = val;
                else
                        err = -EINVAL;
@@ -3011,6 +3034,12 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
                tp->notsent_lowat = val;
                sk->sk_write_space(sk);
                break;
+       case TCP_INQ:
+               if (val > 1 || val < 0)
+                       err = -EINVAL;
+               else
+                       tp->recvmsg_inq = val;
+               break;
        default:
                err = -ENOPROTOOPT;
                break;
@@ -3436,6 +3465,9 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
        case TCP_NOTSENT_LOWAT:
                val = tp->notsent_lowat;
                break;
+       case TCP_INQ:
+               val = tp->recvmsg_inq;
+               break;
        case TCP_SAVE_SYN:
                val = tp->save_syn;
                break;
@@ -3472,6 +3504,25 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
                }
                return 0;
        }
+#ifdef CONFIG_MMU
+       case TCP_ZEROCOPY_RECEIVE: {
+               struct tcp_zerocopy_receive zc;
+               int err;
+
+               if (get_user(len, optlen))
+                       return -EFAULT;
+               if (len != sizeof(zc))
+                       return -EINVAL;
+               if (copy_from_user(&zc, optval, len))
+                       return -EFAULT;
+               lock_sock(sk);
+               err = tcp_zerocopy_receive(sk, &zc);
+               release_sock(sk);
+               if (!err && copy_to_user(optval, &zc, len))
+                       err = -EFAULT;
+               return err;
+       }
+#endif
        default:
                return -ENOPROTOOPT;
        }
index 158d105e76da1b5fcf29db4a5b87b063645a5ac1..58e2f479ffb4d523b4ccfbb859bdd186a55ab83d 100644 (file)
@@ -806,7 +806,9 @@ static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs)
                        }
                }
        }
-       bbr->idle_restart = 0;
+       /* Restart after idle ends only once we process a new S/ACK for data */
+       if (rs->delivered > 0)
+               bbr->idle_restart = 0;
 }
 
 static void bbr_update_model(struct sock *sk, const struct rate_sample *rs)
index 17b78582ba631299f3a01e354b846386d6e82050..aebb29ab2fdf2ceaa182cd11928f145a886149ff 100644 (file)
@@ -78,7 +78,6 @@
 #include <linux/errqueue.h>
 #include <trace/events/tcp.h>
 #include <linux/static_key.h>
-#include <linux/sock_diag.h>
 
 int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
 
@@ -112,6 +111,25 @@ int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
 #define REXMIT_LOST    1 /* retransmit packets marked lost */
 #define REXMIT_NEW     2 /* FRTO-style transmit of unsent/new packets */
 
+#if IS_ENABLED(CONFIG_TLS_DEVICE)
+static DEFINE_STATIC_KEY_FALSE(clean_acked_data_enabled);
+
+void clean_acked_data_enable(struct inet_connection_sock *icsk,
+                            void (*cad)(struct sock *sk, u32 ack_seq))
+{
+       icsk->icsk_clean_acked = cad;
+       static_branch_inc(&clean_acked_data_enabled);
+}
+EXPORT_SYMBOL_GPL(clean_acked_data_enable);
+
+void clean_acked_data_disable(struct inet_connection_sock *icsk)
+{
+       static_branch_dec(&clean_acked_data_enabled);
+       icsk->icsk_clean_acked = NULL;
+}
+EXPORT_SYMBOL_GPL(clean_acked_data_disable);
+#endif
+
 static void tcp_gro_dev_warn(struct sock *sk, const struct sk_buff *skb,
                             unsigned int len)
 {
@@ -1899,19 +1917,54 @@ static inline void tcp_init_undo(struct tcp_sock *tp)
        tp->undo_retrans = tp->retrans_out ? : -1;
 }
 
-/* Enter Loss state. If we detect SACK reneging, forget all SACK information
+static bool tcp_is_rack(const struct sock *sk)
+{
+       return sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION;
+}
+
+/* If we detect SACK reneging, forget all SACK information
  * and reset tags completely, otherwise preserve SACKs. If receiver
  * dropped its ofo queue, we will know this due to reneging detection.
  */
+static void tcp_timeout_mark_lost(struct sock *sk)
+{
+       struct tcp_sock *tp = tcp_sk(sk);
+       struct sk_buff *skb, *head;
+       bool is_reneg;                  /* is receiver reneging on SACKs? */
+
+       head = tcp_rtx_queue_head(sk);
+       is_reneg = head && (TCP_SKB_CB(head)->sacked & TCPCB_SACKED_ACKED);
+       if (is_reneg) {
+               NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSACKRENEGING);
+               tp->sacked_out = 0;
+               /* Mark SACK reneging until we recover from this loss event. */
+               tp->is_sack_reneg = 1;
+       } else if (tcp_is_reno(tp)) {
+               tcp_reset_reno_sack(tp);
+       }
+
+       skb = head;
+       skb_rbtree_walk_from(skb) {
+               if (is_reneg)
+                       TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED;
+               else if (tcp_is_rack(sk) && skb != head &&
+                        tcp_rack_skb_timeout(tp, skb, 0) > 0)
+                       continue; /* Don't mark recently sent ones lost yet */
+               tcp_mark_skb_lost(sk, skb);
+       }
+       tcp_verify_left_out(tp);
+       tcp_clear_all_retrans_hints(tp);
+}
+
+/* Enter Loss state. */
 void tcp_enter_loss(struct sock *sk)
 {
        const struct inet_connection_sock *icsk = inet_csk(sk);
        struct tcp_sock *tp = tcp_sk(sk);
        struct net *net = sock_net(sk);
-       struct sk_buff *skb;
        bool new_recovery = icsk->icsk_ca_state < TCP_CA_Recovery;
-       bool is_reneg;                  /* is receiver reneging on SACKs? */
-       bool mark_lost;
+
+       tcp_timeout_mark_lost(sk);
 
        /* Reduce ssthresh if it has not yet been made inside this window. */
        if (icsk->icsk_ca_state <= TCP_CA_Disorder ||
@@ -1923,40 +1976,10 @@ void tcp_enter_loss(struct sock *sk)
                tcp_ca_event(sk, CA_EVENT_LOSS);
                tcp_init_undo(tp);
        }
-       tp->snd_cwnd       = 1;
+       tp->snd_cwnd       = tcp_packets_in_flight(tp) + 1;
        tp->snd_cwnd_cnt   = 0;
        tp->snd_cwnd_stamp = tcp_jiffies32;
 
-       tp->retrans_out = 0;
-       tp->lost_out = 0;
-
-       if (tcp_is_reno(tp))
-               tcp_reset_reno_sack(tp);
-
-       skb = tcp_rtx_queue_head(sk);
-       is_reneg = skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED);
-       if (is_reneg) {
-               NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSACKRENEGING);
-               tp->sacked_out = 0;
-               /* Mark SACK reneging until we recover from this loss event. */
-               tp->is_sack_reneg = 1;
-       }
-       tcp_clear_all_retrans_hints(tp);
-
-       skb_rbtree_walk_from(skb) {
-               mark_lost = (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) ||
-                            is_reneg);
-               if (mark_lost)
-                       tcp_sum_lost(tp, skb);
-               TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED;
-               if (mark_lost) {
-                       TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED;
-                       TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
-                       tp->lost_out += tcp_skb_pcount(skb);
-               }
-       }
-       tcp_verify_left_out(tp);
-
        /* Timeout in disordered state after receiving substantial DUPACKs
         * suggests that the degree of reordering is over-estimated.
         */
@@ -2123,7 +2146,7 @@ static bool tcp_time_to_recover(struct sock *sk, int flag)
                return true;
 
        /* Not-A-Trick#2 : Classic rule... */
-       if (tcp_dupack_heuristics(tp) > tp->reordering)
+       if (!tcp_is_rack(sk) && tcp_dupack_heuristics(tp) > tp->reordering)
                return true;
 
        return false;
@@ -2200,9 +2223,7 @@ static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit)
 {
        struct tcp_sock *tp = tcp_sk(sk);
 
-       if (tcp_is_reno(tp)) {
-               tcp_mark_head_lost(sk, 1, 1);
-       } else {
+       if (tcp_is_sack(tp)) {
                int sacked_upto = tp->sacked_out - tp->reordering;
                if (sacked_upto >= 0)
                        tcp_mark_head_lost(sk, sacked_upto, 0);
@@ -2700,12 +2721,16 @@ static bool tcp_try_undo_partial(struct sock *sk, u32 prior_snd_una)
        return false;
 }
 
-static void tcp_rack_identify_loss(struct sock *sk, int *ack_flag)
+static void tcp_identify_packet_loss(struct sock *sk, int *ack_flag)
 {
        struct tcp_sock *tp = tcp_sk(sk);
 
-       /* Use RACK to detect loss */
-       if (sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION) {
+       if (tcp_rtx_queue_empty(sk))
+               return;
+
+       if (unlikely(tcp_is_reno(tp))) {
+               tcp_newreno_mark_lost(sk, *ack_flag & FLAG_SND_UNA_ADVANCED);
+       } else if (tcp_is_rack(sk)) {
                u32 prior_retrans = tp->retrans_out;
 
                tcp_rack_mark_lost(sk);
@@ -2801,11 +2826,11 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
                        tcp_try_keep_open(sk);
                        return;
                }
-               tcp_rack_identify_loss(sk, ack_flag);
+               tcp_identify_packet_loss(sk, ack_flag);
                break;
        case TCP_CA_Loss:
                tcp_process_loss(sk, flag, is_dupack, rexmit);
-               tcp_rack_identify_loss(sk, ack_flag);
+               tcp_identify_packet_loss(sk, ack_flag);
                if (!(icsk->icsk_ca_state == TCP_CA_Open ||
                      (*ack_flag & FLAG_LOST_RETRANS)))
                        return;
@@ -2822,7 +2847,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
                if (icsk->icsk_ca_state <= TCP_CA_Disorder)
                        tcp_try_undo_dsack(sk);
 
-               tcp_rack_identify_loss(sk, ack_flag);
+               tcp_identify_packet_loss(sk, ack_flag);
                if (!tcp_time_to_recover(sk, flag)) {
                        tcp_try_to_open(sk, flag);
                        return;
@@ -2844,7 +2869,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
                fast_rexmit = 1;
        }
 
-       if (do_lost)
+       if (!tcp_is_rack(sk) && do_lost)
                tcp_update_scoreboard(sk, fast_rexmit);
        *rexmit = REXMIT_LOST;
 }
@@ -3561,6 +3586,12 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
        if (after(ack, prior_snd_una)) {
                flag |= FLAG_SND_UNA_ADVANCED;
                icsk->icsk_retransmits = 0;
+
+#if IS_ENABLED(CONFIG_TLS_DEVICE)
+               if (static_branch_unlikely(&clean_acked_data_enabled))
+                       if (icsk->icsk_clean_acked)
+                               icsk->icsk_clean_acked(sk, ack);
+#endif
        }
 
        prior_fack = tcp_is_sack(tp) ? tcp_highest_sack_seq(tp) : tp->snd_una;
@@ -3890,11 +3921,8 @@ const u8 *tcp_parse_md5sig_option(const struct tcphdr *th)
        int length = (th->doff << 2) - sizeof(*th);
        const u8 *ptr = (const u8 *)(th + 1);
 
-       /* If the TCP option is too short, we can short cut */
-       if (length < TCPOLEN_MD5SIG)
-               return NULL;
-
-       while (length > 0) {
+       /* If not enough data remaining, we can short cut */
+       while (length >= TCPOLEN_MD5SIG) {
                int opcode = *ptr++;
                int opsize;
 
@@ -4221,6 +4249,8 @@ static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
         * If the sack array is full, forget about the last one.
         */
        if (this_sack >= TCP_NUM_SACKS) {
+               if (tp->compressed_ack)
+                       tcp_send_ack(sk);
                this_sack--;
                tp->rx_opt.num_sacks--;
                sp--;
@@ -4687,8 +4717,6 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
        if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt + tcp_receive_window(tp)))
                goto out_of_window;
 
-       tcp_enter_quickack_mode(sk);
-
        if (before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
                /* Partial packet, seq < rcv_next < end_seq */
                SOCK_DEBUG(sk, "partial packet: rcv_next %X seq %X - %X\n",
@@ -5055,6 +5083,7 @@ static inline void tcp_data_snd_check(struct sock *sk)
 static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
 {
        struct tcp_sock *tp = tcp_sk(sk);
+       unsigned long rtt, delay;
 
            /* More than one full frame received... */
        if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss &&
@@ -5066,15 +5095,36 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
            (tp->rcv_nxt - tp->copied_seq < sk->sk_rcvlowat ||
             __tcp_select_window(sk) >= tp->rcv_wnd)) ||
            /* We ACK each frame or... */
-           tcp_in_quickack_mode(sk) ||
-           /* We have out of order data. */
-           (ofo_possible && !RB_EMPTY_ROOT(&tp->out_of_order_queue))) {
-               /* Then ack it now */
+           tcp_in_quickack_mode(sk)) {
+send_now:
                tcp_send_ack(sk);
-       } else {
-               /* Else, send delayed ack. */
+               return;
+       }
+
+       if (!ofo_possible || RB_EMPTY_ROOT(&tp->out_of_order_queue)) {
                tcp_send_delayed_ack(sk);
+               return;
        }
+
+       if (!tcp_is_sack(tp) ||
+           tp->compressed_ack >= sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr)
+               goto send_now;
+       tp->compressed_ack++;
+
+       if (hrtimer_is_queued(&tp->compressed_ack_timer))
+               return;
+
+       /* compress ack timer : 5 % of rtt, but no more than tcp_comp_sack_delay_ns */
+
+       rtt = tp->rcv_rtt_est.rtt_us;
+       if (tp->srtt_us && tp->srtt_us < rtt)
+               rtt = tp->srtt_us;
+
+       delay = min_t(unsigned long, sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns,
+                     rtt * (NSEC_PER_USEC >> 3)/20);
+       sock_hold(sk);
+       hrtimer_start(&tp->compressed_ack_timer, ns_to_ktime(delay),
+                     HRTIMER_MODE_REL_PINNED_SOFT);
 }
 
 static inline void tcp_ack_snd_check(struct sock *sk)
@@ -6191,15 +6241,10 @@ struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops,
 #if IS_ENABLED(CONFIG_IPV6)
                ireq->pktopts = NULL;
 #endif
+               atomic64_set(&ireq->ir_cookie, 0);
                ireq->ireq_state = TCP_NEW_SYN_RECV;
                write_pnet(&ireq->ireq_net, sock_net(sk_listener));
                ireq->ireq_family = sk_listener->sk_family;
-
-               BUILD_BUG_ON(offsetof(struct inet_request_sock, ir_cookie) !=
-                                       offsetof(struct sock, sk_cookie));
-               BUILD_BUG_ON(offsetof(struct inet_request_sock, ireq_net) !=
-                                       offsetof(struct sock, sk_net));
-               sock_init_cookie((struct sock *)ireq);
        }
 
        return req;
index f70586b508383b50c96b3167976a56ef3a6b3c0b..adbdb503db0c983ef4185f83b138aa51bafd15bf 100644 (file)
@@ -621,6 +621,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
        struct sock *sk1 = NULL;
 #endif
        struct net *net;
+       struct sock *ctl_sk;
 
        /* Never send a reset in response to a reset. */
        if (th->rst)
@@ -723,11 +724,16 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
        arg.tos = ip_hdr(skb)->tos;
        arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
        local_bh_disable();
-       ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
+       ctl_sk = *this_cpu_ptr(net->ipv4.tcp_sk);
+       if (sk)
+               ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
+                                  inet_twsk(sk)->tw_mark : sk->sk_mark;
+       ip_send_unicast_reply(ctl_sk,
                              skb, &TCP_SKB_CB(skb)->header.h4.opt,
                              ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
                              &arg, arg.iov[0].iov_len);
 
+       ctl_sk->sk_mark = 0;
        __TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
        __TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
        local_bh_enable();
@@ -759,6 +765,7 @@ static void tcp_v4_send_ack(const struct sock *sk,
        } rep;
        struct net *net = sock_net(sk);
        struct ip_reply_arg arg;
+       struct sock *ctl_sk;
 
        memset(&rep.th, 0, sizeof(struct tcphdr));
        memset(&arg, 0, sizeof(arg));
@@ -809,11 +816,16 @@ static void tcp_v4_send_ack(const struct sock *sk,
        arg.tos = tos;
        arg.uid = sock_net_uid(net, sk_fullsock(sk) ? sk : NULL);
        local_bh_disable();
-       ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
+       ctl_sk = *this_cpu_ptr(net->ipv4.tcp_sk);
+       if (sk)
+               ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
+                                  inet_twsk(sk)->tw_mark : sk->sk_mark;
+       ip_send_unicast_reply(ctl_sk,
                              skb, &TCP_SKB_CB(skb)->header.h4.opt,
                              ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
                              &arg, arg.iov[0].iov_len);
 
+       ctl_sk->sk_mark = 0;
        __TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
        local_bh_enable();
 }
@@ -2560,6 +2572,8 @@ static int __net_init tcp_sk_init(struct net *net)
                       init_net.ipv4.sysctl_tcp_wmem,
                       sizeof(init_net.ipv4.sysctl_tcp_wmem));
        }
+       net->ipv4.sysctl_tcp_comp_sack_delay_ns = NSEC_PER_MSEC;
+       net->ipv4.sysctl_tcp_comp_sack_nr = 44;
        net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE;
        spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock);
        net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 60 * 60;
index 57b5468b5139827aa631b01d51bf7178d59ab57f..f867658b4b30fbe8785688252bb902ab015e1bc9 100644 (file)
@@ -263,6 +263,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
                struct inet_sock *inet = inet_sk(sk);
 
                tw->tw_transparent      = inet->transparent;
+               tw->tw_mark             = sk->sk_mark;
                tw->tw_rcv_wscale       = tp->rx_opt.rcv_wscale;
                tcptw->tw_rcv_nxt       = tp->rcv_nxt;
                tcptw->tw_snd_nxt       = tp->snd_nxt;
index 383cac0ff0ec059ca7dbc1a6304cc7f8183e008d..8e08b409c71e1f8e69422f1756d48b5bc55411c3 100644 (file)
@@ -162,6 +162,15 @@ static void tcp_event_data_sent(struct tcp_sock *tp,
 /* Account for an ACK we sent. */
 static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts)
 {
+       struct tcp_sock *tp = tcp_sk(sk);
+
+       if (unlikely(tp->compressed_ack)) {
+               NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPACKCOMPRESSED,
+                             tp->compressed_ack);
+               tp->compressed_ack = 0;
+               if (hrtimer_try_to_cancel(&tp->compressed_ack_timer) == 1)
+                       __sock_put(sk);
+       }
        tcp_dec_quickack_mode(sk, pkts);
        inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
 }
@@ -229,11 +238,9 @@ void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss,
                }
        }
 
-       if (mss > (1 << *rcv_wscale)) {
-               if (!init_rcv_wnd) /* Use default unless specified otherwise */
-                       init_rcv_wnd = tcp_default_init_rwnd(mss);
-               *rcv_wnd = min(*rcv_wnd, init_rcv_wnd * mss);
-       }
+       if (!init_rcv_wnd) /* Use default unless specified otherwise */
+               init_rcv_wnd = tcp_default_init_rwnd(mss);
+       *rcv_wnd = min(*rcv_wnd, init_rcv_wnd * mss);
 
        /* Set the clamp no higher than max representable value */
        (*window_clamp) = min_t(__u32, U16_MAX << (*rcv_wscale), *window_clamp);
@@ -585,14 +592,15 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
        unsigned int remaining = MAX_TCP_OPTION_SPACE;
        struct tcp_fastopen_request *fastopen = tp->fastopen_req;
 
+       *md5 = NULL;
 #ifdef CONFIG_TCP_MD5SIG
-       *md5 = tp->af_specific->md5_lookup(sk, sk);
-       if (*md5) {
-               opts->options |= OPTION_MD5;
-               remaining -= TCPOLEN_MD5SIG_ALIGNED;
+       if (unlikely(rcu_access_pointer(tp->md5sig_info))) {
+               *md5 = tp->af_specific->md5_lookup(sk, sk);
+               if (*md5) {
+                       opts->options |= OPTION_MD5;
+                       remaining -= TCPOLEN_MD5SIG_ALIGNED;
+               }
        }
-#else
-       *md5 = NULL;
 #endif
 
        /* We always get an MSS option.  The option bytes which will be seen in
@@ -720,14 +728,15 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb
 
        opts->options = 0;
 
+       *md5 = NULL;
 #ifdef CONFIG_TCP_MD5SIG
-       *md5 = tp->af_specific->md5_lookup(sk, sk);
-       if (unlikely(*md5)) {
-               opts->options |= OPTION_MD5;
-               size += TCPOLEN_MD5SIG_ALIGNED;
+       if (unlikely(rcu_access_pointer(tp->md5sig_info))) {
+               *md5 = tp->af_specific->md5_lookup(sk, sk);
+               if (*md5) {
+                       opts->options |= OPTION_MD5;
+                       size += TCPOLEN_MD5SIG_ALIGNED;
+               }
        }
-#else
-       *md5 = NULL;
 #endif
 
        if (likely(tp->rx_opt.tstamp_ok)) {
@@ -772,7 +781,7 @@ struct tsq_tasklet {
 };
 static DEFINE_PER_CPU(struct tsq_tasklet, tsq_tasklet);
 
-static void tcp_tsq_handler(struct sock *sk)
+static void tcp_tsq_write(struct sock *sk)
 {
        if ((1 << sk->sk_state) &
            (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_CLOSING |
@@ -789,6 +798,16 @@ static void tcp_tsq_handler(struct sock *sk)
                               0, GFP_ATOMIC);
        }
 }
+
+static void tcp_tsq_handler(struct sock *sk)
+{
+       bh_lock_sock(sk);
+       if (!sock_owned_by_user(sk))
+               tcp_tsq_write(sk);
+       else if (!test_and_set_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags))
+               sock_hold(sk);
+       bh_unlock_sock(sk);
+}
 /*
  * One tasklet per cpu tries to send more skbs.
  * We run in tasklet context but need to disable irqs when
@@ -816,16 +835,7 @@ static void tcp_tasklet_func(unsigned long data)
                smp_mb__before_atomic();
                clear_bit(TSQ_QUEUED, &sk->sk_tsq_flags);
 
-               if (!sk->sk_lock.owned &&
-                   test_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags)) {
-                       bh_lock_sock(sk);
-                       if (!sock_owned_by_user(sk)) {
-                               clear_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags);
-                               tcp_tsq_handler(sk);
-                       }
-                       bh_unlock_sock(sk);
-               }
-
+               tcp_tsq_handler(sk);
                sk_free(sk);
        }
 }
@@ -853,9 +863,10 @@ void tcp_release_cb(struct sock *sk)
                nflags = flags & ~TCP_DEFERRED_ALL;
        } while (cmpxchg(&sk->sk_tsq_flags, flags, nflags) != flags);
 
-       if (flags & TCPF_TSQ_DEFERRED)
-               tcp_tsq_handler(sk);
-
+       if (flags & TCPF_TSQ_DEFERRED) {
+               tcp_tsq_write(sk);
+               __sock_put(sk);
+       }
        /* Here begins the tricky part :
         * We are called from release_sock() with :
         * 1) BH disabled
@@ -929,7 +940,7 @@ void tcp_wfree(struct sk_buff *skb)
                if (!(oval & TSQF_THROTTLED) || (oval & TSQF_QUEUED))
                        goto out;
 
-               nval = (oval & ~TSQF_THROTTLED) | TSQF_QUEUED | TCPF_TSQ_DEFERRED;
+               nval = (oval & ~TSQF_THROTTLED) | TSQF_QUEUED;
                nval = cmpxchg(&sk->sk_tsq_flags, oval, nval);
                if (nval != oval)
                        continue;
@@ -948,37 +959,17 @@ void tcp_wfree(struct sk_buff *skb)
        sk_free(sk);
 }
 
-/* Note: Called under hard irq.
- * We can not call TCP stack right away.
+/* Note: Called under soft irq.
+ * We can call TCP stack right away, unless socket is owned by user.
  */
 enum hrtimer_restart tcp_pace_kick(struct hrtimer *timer)
 {
        struct tcp_sock *tp = container_of(timer, struct tcp_sock, pacing_timer);
        struct sock *sk = (struct sock *)tp;
-       unsigned long nval, oval;
 
-       for (oval = READ_ONCE(sk->sk_tsq_flags);; oval = nval) {
-               struct tsq_tasklet *tsq;
-               bool empty;
-
-               if (oval & TSQF_QUEUED)
-                       break;
-
-               nval = (oval & ~TSQF_THROTTLED) | TSQF_QUEUED | TCPF_TSQ_DEFERRED;
-               nval = cmpxchg(&sk->sk_tsq_flags, oval, nval);
-               if (nval != oval)
-                       continue;
+       tcp_tsq_handler(sk);
+       sock_put(sk);
 
-               if (!refcount_inc_not_zero(&sk->sk_wmem_alloc))
-                       break;
-               /* queue this socket to tasklet queue */
-               tsq = this_cpu_ptr(&tsq_tasklet);
-               empty = list_empty(&tsq->head);
-               list_add(&tp->tsq_node, &tsq->head);
-               if (empty)
-                       tasklet_schedule(&tsq->tasklet);
-               break;
-       }
        return HRTIMER_NORESTART;
 }
 
@@ -1011,7 +1002,8 @@ static void tcp_internal_pacing(struct sock *sk, const struct sk_buff *skb)
        do_div(len_ns, rate);
        hrtimer_start(&tcp_sk(sk)->pacing_timer,
                      ktime_add_ns(ktime_get(), len_ns),
-                     HRTIMER_MODE_ABS_PINNED);
+                     HRTIMER_MODE_ABS_PINNED_SOFT);
+       sock_hold(sk);
 }
 
 static void tcp_update_skb_after_send(struct tcp_sock *tp, struct sk_buff *skb)
@@ -1078,7 +1070,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 
        /* if no packet is in qdisc/device queue, then allow XPS to select
         * another queue. We can be called from tcp_tsq_handler()
-        * which holds one reference to sk_wmem_alloc.
+        * which holds one reference to sk.
         *
         * TODO: Ideally, in-flight pure ACK packets should not matter here.
         * One way to get this would be to set skb->truesize = 2 on them.
@@ -2185,7 +2177,7 @@ static int tcp_mtu_probe(struct sock *sk)
 static bool tcp_pacing_check(const struct sock *sk)
 {
        return tcp_needs_internal_pacing(sk) &&
-              hrtimer_active(&tcp_sk(sk)->pacing_timer);
+              hrtimer_is_queued(&tcp_sk(sk)->pacing_timer);
 }
 
 /* TCP Small Queues :
@@ -2365,8 +2357,6 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
                                          skb, limit, mss_now, gfp)))
                        break;
 
-               if (test_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags))
-                       clear_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags);
                if (tcp_small_queue_check(sk, skb, 0))
                        break;
 
@@ -2833,8 +2823,10 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
                return -EBUSY;
 
        if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) {
-               if (before(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
-                       BUG();
+               if (unlikely(before(TCP_SKB_CB(skb)->end_seq, tp->snd_una))) {
+                       WARN_ON_ONCE(1);
+                       return -EINVAL;
+               }
                if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq))
                        return -ENOMEM;
        }
@@ -3342,6 +3334,7 @@ static void tcp_connect_init(struct sock *sk)
        sock_reset_flag(sk, SOCK_DONE);
        tp->snd_wnd = 0;
        tcp_init_wl(tp, 0);
+       tcp_write_queue_purge(sk);
        tp->snd_una = tp->write_seq;
        tp->snd_sml = tp->write_seq;
        tp->snd_up = tp->write_seq;
index 3a81720ac0c40877386e37c99f4f321ab4127fa4..71593e4400abe344969ed2a6d0f3461e6f8e9bb2 100644 (file)
@@ -2,7 +2,7 @@
 #include <linux/tcp.h>
 #include <net/tcp.h>
 
-static void tcp_rack_mark_skb_lost(struct sock *sk, struct sk_buff *skb)
+void tcp_mark_skb_lost(struct sock *sk, struct sk_buff *skb)
 {
        struct tcp_sock *tp = tcp_sk(sk);
 
@@ -21,6 +21,38 @@ static bool tcp_rack_sent_after(u64 t1, u64 t2, u32 seq1, u32 seq2)
        return t1 > t2 || (t1 == t2 && after(seq1, seq2));
 }
 
+static u32 tcp_rack_reo_wnd(const struct sock *sk)
+{
+       struct tcp_sock *tp = tcp_sk(sk);
+
+       if (!tp->rack.reord) {
+               /* If reordering has not been observed, be aggressive during
+                * the recovery or starting the recovery by DUPACK threshold.
+                */
+               if (inet_csk(sk)->icsk_ca_state >= TCP_CA_Recovery)
+                       return 0;
+
+               if (tp->sacked_out >= tp->reordering &&
+                   !(sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_NO_DUPTHRESH))
+                       return 0;
+       }
+
+       /* To be more reordering resilient, allow min_rtt/4 settling delay.
+        * Use min_rtt instead of the smoothed RTT because reordering is
+        * often a path property and less related to queuing or delayed ACKs.
+        * Upon receiving DSACKs, linearly increase the window up to the
+        * smoothed RTT.
+        */
+       return min((tcp_min_rtt(tp) >> 2) * tp->rack.reo_wnd_steps,
+                  tp->srtt_us >> 3);
+}
+
+s32 tcp_rack_skb_timeout(struct tcp_sock *tp, struct sk_buff *skb, u32 reo_wnd)
+{
+       return tp->rack.rtt_us + reo_wnd -
+              tcp_stamp_us_delta(tp->tcp_mstamp, skb->skb_mstamp);
+}
+
 /* RACK loss detection (IETF draft draft-ietf-tcpm-rack-01):
  *
  * Marks a packet lost, if some packet sent later has been (s)acked.
@@ -44,23 +76,11 @@ static bool tcp_rack_sent_after(u64 t1, u64 t2, u32 seq1, u32 seq2)
 static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout)
 {
        struct tcp_sock *tp = tcp_sk(sk);
-       u32 min_rtt = tcp_min_rtt(tp);
        struct sk_buff *skb, *n;
        u32 reo_wnd;
 
        *reo_timeout = 0;
-       /* To be more reordering resilient, allow min_rtt/4 settling delay
-        * (lower-bounded to 1000uS). We use min_rtt instead of the smoothed
-        * RTT because reordering is often a path property and less related
-        * to queuing or delayed ACKs.
-        */
-       reo_wnd = 1000;
-       if ((tp->rack.reord || inet_csk(sk)->icsk_ca_state < TCP_CA_Recovery) &&
-           min_rtt != ~0U) {
-               reo_wnd = max((min_rtt >> 2) * tp->rack.reo_wnd_steps, reo_wnd);
-               reo_wnd = min(reo_wnd, tp->srtt_us >> 3);
-       }
-
+       reo_wnd = tcp_rack_reo_wnd(sk);
        list_for_each_entry_safe(skb, n, &tp->tsorted_sent_queue,
                                 tcp_tsorted_anchor) {
                struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
@@ -78,10 +98,9 @@ static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout)
                /* A packet is lost if it has not been s/acked beyond
                 * the recent RTT plus the reordering window.
                 */
-               remaining = tp->rack.rtt_us + reo_wnd -
-                           tcp_stamp_us_delta(tp->tcp_mstamp, skb->skb_mstamp);
+               remaining = tcp_rack_skb_timeout(tp, skb, reo_wnd);
                if (remaining <= 0) {
-                       tcp_rack_mark_skb_lost(sk, skb);
+                       tcp_mark_skb_lost(sk, skb);
                        list_del_init(&skb->tcp_tsorted_anchor);
                } else {
                        /* Record maximum wait time */
@@ -202,3 +221,30 @@ void tcp_rack_update_reo_wnd(struct sock *sk, struct rate_sample *rs)
                tp->rack.reo_wnd_steps = 1;
        }
 }
+
+/* RFC6582 NewReno recovery for non-SACK connection. It simply retransmits
+ * the next unacked packet upon receiving
+ * a) three or more DUPACKs to start the fast recovery
+ * b) an ACK acknowledging new data during the fast recovery.
+ */
+void tcp_newreno_mark_lost(struct sock *sk, bool snd_una_advanced)
+{
+       const u8 state = inet_csk(sk)->icsk_ca_state;
+       struct tcp_sock *tp = tcp_sk(sk);
+
+       if ((state < TCP_CA_Recovery && tp->sacked_out >= tp->reordering) ||
+           (state == TCP_CA_Recovery && snd_una_advanced)) {
+               struct sk_buff *skb = tcp_rtx_queue_head(sk);
+               u32 mss;
+
+               if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST)
+                       return;
+
+               mss = tcp_skb_mss(skb);
+               if (tcp_skb_pcount(skb) > 1 && skb->len > mss)
+                       tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb,
+                                    mss, mss, GFP_ATOMIC);
+
+               tcp_skb_mark_lost_uncond_verify(tp, skb);
+       }
+}
index f7d944855f8ebd0a312fe73a53a56ab8d451ee44..3b3611729928f77934e0298bb248e55c7a7c5def 100644 (file)
@@ -708,11 +708,36 @@ static void tcp_keepalive_timer (struct timer_list *t)
        sock_put(sk);
 }
 
+static enum hrtimer_restart tcp_compressed_ack_kick(struct hrtimer *timer)
+{
+       struct tcp_sock *tp = container_of(timer, struct tcp_sock, compressed_ack_timer);
+       struct sock *sk = (struct sock *)tp;
+
+       bh_lock_sock(sk);
+       if (!sock_owned_by_user(sk)) {
+               if (tp->compressed_ack)
+                       tcp_send_ack(sk);
+       } else {
+               if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED,
+                                     &sk->sk_tsq_flags))
+                       sock_hold(sk);
+       }
+       bh_unlock_sock(sk);
+
+       sock_put(sk);
+
+       return HRTIMER_NORESTART;
+}
+
 void tcp_init_xmit_timers(struct sock *sk)
 {
        inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer,
                                  &tcp_keepalive_timer);
        hrtimer_init(&tcp_sk(sk)->pacing_timer, CLOCK_MONOTONIC,
-                    HRTIMER_MODE_ABS_PINNED);
+                    HRTIMER_MODE_ABS_PINNED_SOFT);
        tcp_sk(sk)->pacing_timer.function = tcp_pace_kick;
+
+       hrtimer_init(&tcp_sk(sk)->compressed_ack_timer, CLOCK_MONOTONIC,
+                    HRTIMER_MODE_REL_PINNED_SOFT);
+       tcp_sk(sk)->compressed_ack_timer.function = tcp_compressed_ack_kick;
 }
index 24b5c59b1c533a9793042c134f23891953527752..ff4d4ba67735fb7d829d09c6d2da1a30d52eefa6 100644 (file)
@@ -401,9 +401,9 @@ static int compute_score(struct sock *sk, struct net *net,
                bool dev_match = (sk->sk_bound_dev_if == dif ||
                                  sk->sk_bound_dev_if == sdif);
 
-               if (exact_dif && !dev_match)
+               if (!dev_match)
                        return -1;
-               if (sk->sk_bound_dev_if && dev_match)
+               if (sk->sk_bound_dev_if)
                        score += 4;
        }
 
@@ -757,7 +757,8 @@ void udp_set_csum(bool nocheck, struct sk_buff *skb,
 }
 EXPORT_SYMBOL(udp_set_csum);
 
-static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4)
+static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4,
+                       struct inet_cork *cork)
 {
        struct sock *sk = skb->sk;
        struct inet_sock *inet = inet_sk(sk);
@@ -777,6 +778,26 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4)
        uh->len = htons(len);
        uh->check = 0;
 
+       if (cork->gso_size) {
+               const int hlen = skb_network_header_len(skb) +
+                                sizeof(struct udphdr);
+
+               if (hlen + cork->gso_size > cork->fragsize)
+                       return -EINVAL;
+               if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS)
+                       return -EINVAL;
+               if (sk->sk_no_check_tx)
+                       return -EINVAL;
+               if (skb->ip_summed != CHECKSUM_PARTIAL || is_udplite)
+                       return -EIO;
+
+               skb_shinfo(skb)->gso_size = cork->gso_size;
+               skb_shinfo(skb)->gso_type = SKB_GSO_UDP_L4;
+               skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(len - sizeof(uh),
+                                                        cork->gso_size);
+               goto csum_partial;
+       }
+
        if (is_udplite)                                  /*     UDP-Lite      */
                csum = udplite_csum(skb);
 
@@ -786,6 +807,7 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4)
                goto send;
 
        } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */
+csum_partial:
 
                udp4_hwcsum(skb, fl4->saddr, fl4->daddr);
                goto send;
@@ -828,7 +850,7 @@ int udp_push_pending_frames(struct sock *sk)
        if (!skb)
                goto out;
 
-       err = udp_send_skb(skb, fl4);
+       err = udp_send_skb(skb, fl4, &inet->cork.base);
 
 out:
        up->len = 0;
@@ -837,6 +859,43 @@ int udp_push_pending_frames(struct sock *sk)
 }
 EXPORT_SYMBOL(udp_push_pending_frames);
 
+static int __udp_cmsg_send(struct cmsghdr *cmsg, u16 *gso_size)
+{
+       switch (cmsg->cmsg_type) {
+       case UDP_SEGMENT:
+               if (cmsg->cmsg_len != CMSG_LEN(sizeof(__u16)))
+                       return -EINVAL;
+               *gso_size = *(__u16 *)CMSG_DATA(cmsg);
+               return 0;
+       default:
+               return -EINVAL;
+       }
+}
+
+int udp_cmsg_send(struct sock *sk, struct msghdr *msg, u16 *gso_size)
+{
+       struct cmsghdr *cmsg;
+       bool need_ip = false;
+       int err;
+
+       for_each_cmsghdr(cmsg, msg) {
+               if (!CMSG_OK(msg, cmsg))
+                       return -EINVAL;
+
+               if (cmsg->cmsg_level != SOL_UDP) {
+                       need_ip = true;
+                       continue;
+               }
+
+               err = __udp_cmsg_send(cmsg, gso_size);
+               if (err)
+                       return err;
+       }
+
+       return need_ip;
+}
+EXPORT_SYMBOL_GPL(udp_cmsg_send);
+
 int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 {
        struct inet_sock *inet = inet_sk(sk);
@@ -922,10 +981,14 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
        ipc.sockc.tsflags = sk->sk_tsflags;
        ipc.addr = inet->inet_saddr;
        ipc.oif = sk->sk_bound_dev_if;
+       ipc.gso_size = up->gso_size;
 
        if (msg->msg_controllen) {
-               err = ip_cmsg_send(sk, msg, &ipc, sk->sk_family == AF_INET6);
-               if (unlikely(err)) {
+               err = udp_cmsg_send(sk, msg, &ipc.gso_size);
+               if (err > 0)
+                       err = ip_cmsg_send(sk, msg, &ipc,
+                                          sk->sk_family == AF_INET6);
+               if (unlikely(err < 0)) {
                        kfree(ipc.opt);
                        return err;
                }
@@ -952,8 +1015,10 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
        sock_tx_timestamp(sk, ipc.sockc.tsflags, &ipc.tx_flags);
 
        if (ipc.opt && ipc.opt->opt.srr) {
-               if (!daddr)
-                       return -EINVAL;
+               if (!daddr) {
+                       err = -EINVAL;
+                       goto out_free;
+               }
                faddr = ipc.opt->opt.faddr;
                connected = 0;
        }
@@ -1030,12 +1095,14 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 
        /* Lockless fast path for the non-corking case. */
        if (!corkreq) {
+               struct inet_cork cork;
+
                skb = ip_make_skb(sk, fl4, getfrag, msg, ulen,
                                  sizeof(struct udphdr), &ipc, &rt,
-                                 msg->msg_flags);
+                                 &cork, msg->msg_flags);
                err = PTR_ERR(skb);
                if (!IS_ERR_OR_NULL(skb))
-                       err = udp_send_skb(skb, fl4);
+                       err = udp_send_skb(skb, fl4, &cork);
                goto out;
        }
 
@@ -1074,6 +1141,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 
 out:
        ip_rt_put(rt);
+out_free:
        if (free)
                kfree(ipc.opt);
        if (!err)
@@ -1810,10 +1878,10 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
        return 0;
 }
 
-static struct static_key udp_encap_needed __read_mostly;
+static DEFINE_STATIC_KEY_FALSE(udp_encap_needed_key);
 void udp_encap_enable(void)
 {
-       static_key_enable(&udp_encap_needed);
+       static_branch_enable(&udp_encap_needed_key);
 }
 EXPORT_SYMBOL(udp_encap_enable);
 
@@ -1837,7 +1905,7 @@ static int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
                goto drop;
        nf_reset(skb);
 
-       if (static_key_false(&udp_encap_needed) && up->encap_type) {
+       if (static_branch_unlikely(&udp_encap_needed_key) && up->encap_type) {
                int (*encap_rcv)(struct sock *sk, struct sk_buff *skb);
 
                /*
@@ -2300,7 +2368,7 @@ void udp_destroy_sock(struct sock *sk)
        bool slow = lock_sock_fast(sk);
        udp_flush_pending_frames(sk);
        unlock_sock_fast(sk, slow);
-       if (static_key_false(&udp_encap_needed) && up->encap_type) {
+       if (static_branch_unlikely(&udp_encap_needed_key) && up->encap_type) {
                void (*encap_destroy)(struct sock *sk);
                encap_destroy = READ_ONCE(up->encap_destroy);
                if (encap_destroy)
@@ -2365,6 +2433,12 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
                up->no_check6_rx = valbool;
                break;
 
+       case UDP_SEGMENT:
+               if (val < 0 || val > USHRT_MAX)
+                       return -EINVAL;
+               up->gso_size = val;
+               break;
+
        /*
         *      UDP-Lite's partial checksum coverage (RFC 3828).
         */
@@ -2455,6 +2529,10 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname,
                val = up->no_check6_rx;
                break;
 
+       case UDP_SEGMENT:
+               val = up->gso_size;
+               break;
+
        /* The following two cannot be changed on UDP sockets, the return is
         * always 0 (which corresponds to the full checksum coverage of UDP). */
        case UDPLITE_SEND_CSCOV:
index ea6e6e7df0eec8e79f631851846cf6957014c162..92dc9e5a7ff3d0a7509bfa2a66e9189c8341a5fa 100644 (file)
@@ -187,6 +187,102 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
 }
 EXPORT_SYMBOL(skb_udp_tunnel_segment);
 
+struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
+                                 netdev_features_t features)
+{
+       struct sock *sk = gso_skb->sk;
+       unsigned int sum_truesize = 0;
+       struct sk_buff *segs, *seg;
+       struct udphdr *uh;
+       unsigned int mss;
+       bool copy_dtor;
+       __sum16 check;
+       __be16 newlen;
+
+       mss = skb_shinfo(gso_skb)->gso_size;
+       if (gso_skb->len <= sizeof(*uh) + mss)
+               return ERR_PTR(-EINVAL);
+
+       skb_pull(gso_skb, sizeof(*uh));
+
+       /* clear destructor to avoid skb_segment assigning it to tail */
+       copy_dtor = gso_skb->destructor == sock_wfree;
+       if (copy_dtor)
+               gso_skb->destructor = NULL;
+
+       segs = skb_segment(gso_skb, features);
+       if (unlikely(IS_ERR_OR_NULL(segs))) {
+               if (copy_dtor)
+                       gso_skb->destructor = sock_wfree;
+               return segs;
+       }
+
+       /* GSO partial and frag_list segmentation only requires splitting
+        * the frame into an MSS multiple and possibly a remainder, both
+        * cases return a GSO skb. So update the mss now.
+        */
+       if (skb_is_gso(segs))
+               mss *= skb_shinfo(segs)->gso_segs;
+
+       seg = segs;
+       uh = udp_hdr(seg);
+
+       /* compute checksum adjustment based on old length versus new */
+       newlen = htons(sizeof(*uh) + mss);
+       check = csum16_add(csum16_sub(uh->check, uh->len), newlen);
+
+       for (;;) {
+               if (copy_dtor) {
+                       seg->destructor = sock_wfree;
+                       seg->sk = sk;
+                       sum_truesize += seg->truesize;
+               }
+
+               if (!seg->next)
+                       break;
+
+               uh->len = newlen;
+               uh->check = check;
+
+               if (seg->ip_summed == CHECKSUM_PARTIAL)
+                       gso_reset_checksum(seg, ~check);
+               else
+                       uh->check = gso_make_checksum(seg, ~check) ? :
+                                   CSUM_MANGLED_0;
+
+               seg = seg->next;
+               uh = udp_hdr(seg);
+       }
+
+       /* last packet can be partial gso_size, account for that in checksum */
+       newlen = htons(skb_tail_pointer(seg) - skb_transport_header(seg) +
+                      seg->data_len);
+       check = csum16_add(csum16_sub(uh->check, uh->len), newlen);
+
+       uh->len = newlen;
+       uh->check = check;
+
+       if (seg->ip_summed == CHECKSUM_PARTIAL)
+               gso_reset_checksum(seg, ~check);
+       else
+               uh->check = gso_make_checksum(seg, ~check) ? : CSUM_MANGLED_0;
+
+       /* update refcount for the packet */
+       if (copy_dtor) {
+               int delta = sum_truesize - gso_skb->truesize;
+
+               /* In some pathological cases, delta can be negative.
+                * We need to either use refcount_add() or refcount_sub_and_test()
+                */
+               if (likely(delta >= 0))
+                       refcount_add(delta, &sk->sk_wmem_alloc);
+               else
+                       WARN_ON_ONCE(refcount_sub_and_test(-delta, &sk->sk_wmem_alloc));
+       }
+       return segs;
+}
+EXPORT_SYMBOL_GPL(__udp_gso_segment);
+
 static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
                                         netdev_features_t features)
 {
@@ -203,12 +299,15 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
                goto out;
        }
 
-       if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP))
+       if (!(skb_shinfo(skb)->gso_type & (SKB_GSO_UDP | SKB_GSO_UDP_L4)))
                goto out;
 
        if (!pskb_may_pull(skb, sizeof(struct udphdr)))
                goto out;
 
+       if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4)
+               return __udp_gso_segment(skb, features);
+
        mss = skb_shinfo(skb)->gso_size;
        if (unlikely(skb->len <= mss))
                goto out;
index 6794ddf0547cd0bec8b5ac7e4069b6f746809a49..11e4e80cf7e981826ade121bfe9c4559724c0f40 100644 (file)
@@ -34,16 +34,15 @@ config IPV6_ROUTE_INFO
        bool "IPv6: Route Information (RFC 4191) support"
        depends on IPV6_ROUTER_PREF
        ---help---
-         This is experimental support of Route Information.
+         Support of Route Information.
 
          If unsure, say N.
 
 config IPV6_OPTIMISTIC_DAD
        bool "IPv6: Enable RFC 4429 Optimistic DAD"
        ---help---
-         This is experimental support for optimistic Duplicate
-         Address Detection.  It allows for autoconfigured addresses
-         to be used more quickly.
+         Support for optimistic Duplicate Address Detection. It allows for
+         autoconfigured addresses to be used more quickly.
 
          If unsure, say N.
 
@@ -280,7 +279,7 @@ config IPV6_MROUTE
        depends on IPV6
        select IP_MROUTE_COMMON
        ---help---
-         Experimental support for IPv6 multicast forwarding.
+         Support for IPv6 multicast forwarding.
          If unsure, say N.
 
 config IPV6_MROUTE_MULTIPLE_TABLES
index 7b4d7bbf2c170b82041422e32db1e7ceb7562439..fbfd71a2d9c81960ad366210b2c88d228fc882e1 100644 (file)
@@ -3622,8 +3622,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
        struct net *net = dev_net(dev);
        struct inet6_dev *idev;
        struct inet6_ifaddr *ifa, *tmp;
-       int _keep_addr;
-       bool keep_addr;
+       bool keep_addr = false;
        int state, i;
 
        ASSERT_RTNL();
@@ -3649,15 +3648,18 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 
        }
 
-       /* aggregate the system setting and interface setting */
-       _keep_addr = net->ipv6.devconf_all->keep_addr_on_down;
-       if (!_keep_addr)
-               _keep_addr = idev->cnf.keep_addr_on_down;
-
        /* combine the user config with event to determine if permanent
         * addresses are to be removed from address hash table
         */
-       keep_addr = !(how || _keep_addr <= 0 || idev->cnf.disable_ipv6);
+       if (!how && !idev->cnf.disable_ipv6) {
+               /* aggregate the system setting and interface setting */
+               int _keep_addr = net->ipv6.devconf_all->keep_addr_on_down;
+
+               if (!_keep_addr)
+                       _keep_addr = idev->cnf.keep_addr_on_down;
+
+               keep_addr = (_keep_addr > 0);
+       }
 
        /* Step 2: clear hash table */
        for (i = 0; i < IN6_ADDR_HSIZE; i++) {
@@ -3707,11 +3709,6 @@ static int addrconf_ifdown(struct net_device *dev, int how)
                write_lock_bh(&idev->lock);
        }
 
-       /* re-combine the user config with event to determine if permanent
-        * addresses are to be removed from the interface list
-        */
-       keep_addr = (!how && _keep_addr > 0 && !idev->cnf.disable_ipv6);
-
        list_for_each_entry_safe(ifa, tmp, &idev->addr_list, if_list) {
                struct fib6_info *rt = NULL;
                bool keep;
index 32b564dfd02af6cadcc3611e0bc839459b15c024..2fe754fd4f5e24c62fae96cc55cb7f5411c2868c 100644 (file)
@@ -134,8 +134,39 @@ static int eafnosupport_ipv6_dst_lookup(struct net *net, struct sock *u1,
        return -EAFNOSUPPORT;
 }
 
+static struct fib6_table *eafnosupport_fib6_get_table(struct net *net, u32 id)
+{
+       return NULL;
+}
+
+static struct fib6_info *
+eafnosupport_fib6_table_lookup(struct net *net, struct fib6_table *table,
+                              int oif, struct flowi6 *fl6, int flags)
+{
+       return NULL;
+}
+
+static struct fib6_info *
+eafnosupport_fib6_lookup(struct net *net, int oif, struct flowi6 *fl6,
+                        int flags)
+{
+       return NULL;
+}
+
+static struct fib6_info *
+eafnosupport_fib6_multipath_select(const struct net *net, struct fib6_info *f6i,
+                                  struct flowi6 *fl6, int oif,
+                                  const struct sk_buff *skb, int strict)
+{
+       return f6i;
+}
+
 const struct ipv6_stub *ipv6_stub __read_mostly = &(struct ipv6_stub) {
-       .ipv6_dst_lookup = eafnosupport_ipv6_dst_lookup,
+       .ipv6_dst_lookup   = eafnosupport_ipv6_dst_lookup,
+       .fib6_get_table    = eafnosupport_fib6_get_table,
+       .fib6_table_lookup = eafnosupport_fib6_table_lookup,
+       .fib6_lookup       = eafnosupport_fib6_lookup,
+       .fib6_multipath_select = eafnosupport_fib6_multipath_select,
 };
 EXPORT_SYMBOL_GPL(ipv6_stub);
 
index 36d622c477b1ed3c5d2b753938444526344a6109..50de8b0d4f70719e3d7a71e48cd47c4091de49b9 100644 (file)
@@ -578,7 +578,9 @@ const struct proto_ops inet6_stream_ops = {
        .getsockopt        = sock_common_getsockopt,    /* ok           */
        .sendmsg           = inet_sendmsg,              /* ok           */
        .recvmsg           = inet_recvmsg,              /* ok           */
+#ifdef CONFIG_MMU
        .mmap              = tcp_mmap,
+#endif
        .sendpage          = inet_sendpage,
        .sendmsg_locked    = tcp_sendmsg_locked,
        .sendpage_locked   = tcp_sendpage_locked,
@@ -887,7 +889,11 @@ static struct pernet_operations inet6_net_ops = {
 static const struct ipv6_stub ipv6_stub_impl = {
        .ipv6_sock_mc_join = ipv6_sock_mc_join,
        .ipv6_sock_mc_drop = ipv6_sock_mc_drop,
-       .ipv6_dst_lookup = ip6_dst_lookup,
+       .ipv6_dst_lookup   = ip6_dst_lookup,
+       .fib6_get_table    = fib6_get_table,
+       .fib6_table_lookup = fib6_table_lookup,
+       .fib6_lookup       = fib6_lookup,
+       .fib6_multipath_select = fib6_multipath_select,
        .udpv6_encap_enable = udpv6_encap_enable,
        .ndisc_send_na = ndisc_send_na,
        .nd_tbl = &nd_tbl,
index b643f5ce6c8078ff91f02555a33964acdedcded4..ae365df8abf7babfebc50ccfb1e1ea3889a5129e 100644 (file)
@@ -161,7 +161,7 @@ EXPORT_SYMBOL_GPL(ipv6_find_tlv);
  * if target < 0. "last header" is transport protocol header, ESP, or
  * "No next header".
  *
- * Note that *offset is used as input/output parameter. an if it is not zero,
+ * Note that *offset is used as input/output parameter, and if it is not zero,
  * then it must be a valid offset to an inner IPv6 header. This can be used
  * to explore inner IPv6 header, eg. ICMPv6 error messages.
  *
index 6547fc6491a63af0ccbab6fd4ca04c8c64c8622b..f590446595d8b879de9ee384021311deed7c943a 100644 (file)
@@ -60,6 +60,39 @@ unsigned int fib6_rules_seq_read(struct net *net)
        return fib_rules_seq_read(net, AF_INET6);
 }
 
+/* called with rcu lock held; no reference taken on fib6_info */
+struct fib6_info *fib6_lookup(struct net *net, int oif, struct flowi6 *fl6,
+                             int flags)
+{
+       struct fib6_info *f6i;
+       int err;
+
+       if (net->ipv6.fib6_has_custom_rules) {
+               struct fib_lookup_arg arg = {
+                       .lookup_ptr = fib6_table_lookup,
+                       .lookup_data = &oif,
+                       .flags = FIB_LOOKUP_NOREF,
+               };
+
+               l3mdev_update_flow(net, flowi6_to_flowi(fl6));
+
+               err = fib_rules_lookup(net->ipv6.fib6_rules_ops,
+                                      flowi6_to_flowi(fl6), flags, &arg);
+               if (err)
+                       return ERR_PTR(err);
+
+               f6i = arg.result ? : net->ipv6.fib6_null_entry;
+       } else {
+               f6i = fib6_table_lookup(net, net->ipv6.fib6_local_tbl,
+                                       oif, fl6, flags);
+               if (!f6i || f6i == net->ipv6.fib6_null_entry)
+                       f6i = fib6_table_lookup(net, net->ipv6.fib6_main_tbl,
+                                               oif, fl6, flags);
+       }
+
+       return f6i;
+}
+
 struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
                                   const struct sk_buff *skb,
                                   int flags, pol_lookup_t lookup)
@@ -96,8 +129,73 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
        return &net->ipv6.ip6_null_entry->dst;
 }
 
-static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
-                           int flags, struct fib_lookup_arg *arg)
+static int fib6_rule_saddr(struct net *net, struct fib_rule *rule, int flags,
+                          struct flowi6 *flp6, const struct net_device *dev)
+{
+       struct fib6_rule *r = (struct fib6_rule *)rule;
+
+       /* If we need to find a source address for this traffic,
+        * we check the result if it meets requirement of the rule.
+        */
+       if ((rule->flags & FIB_RULE_FIND_SADDR) &&
+           r->src.plen && !(flags & RT6_LOOKUP_F_HAS_SADDR)) {
+               struct in6_addr saddr;
+
+               if (ipv6_dev_get_saddr(net, dev, &flp6->daddr,
+                                      rt6_flags2srcprefs(flags), &saddr))
+                       return -EAGAIN;
+
+               if (!ipv6_prefix_equal(&saddr, &r->src.addr, r->src.plen))
+                       return -EAGAIN;
+
+               flp6->saddr = saddr;
+       }
+
+       return 0;
+}
+
+static int fib6_rule_action_alt(struct fib_rule *rule, struct flowi *flp,
+                               int flags, struct fib_lookup_arg *arg)
+{
+       struct flowi6 *flp6 = &flp->u.ip6;
+       struct net *net = rule->fr_net;
+       struct fib6_table *table;
+       struct fib6_info *f6i;
+       int err = -EAGAIN, *oif;
+       u32 tb_id;
+
+       switch (rule->action) {
+       case FR_ACT_TO_TBL:
+               break;
+       case FR_ACT_UNREACHABLE:
+               return -ENETUNREACH;
+       case FR_ACT_PROHIBIT:
+               return -EACCES;
+       case FR_ACT_BLACKHOLE:
+       default:
+               return -EINVAL;
+       }
+
+       tb_id = fib_rule_get_table(rule, arg);
+       table = fib6_get_table(net, tb_id);
+       if (!table)
+               return -EAGAIN;
+
+       oif = (int *)arg->lookup_data;
+       f6i = fib6_table_lookup(net, table, *oif, flp6, flags);
+       if (f6i != net->ipv6.fib6_null_entry) {
+               err = fib6_rule_saddr(net, rule, flags, flp6,
+                                     fib6_info_nh_dev(f6i));
+
+               if (likely(!err))
+                       arg->result = f6i;
+       }
+
+       return err;
+}
+
+static int __fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
+                             int flags, struct fib_lookup_arg *arg)
 {
        struct flowi6 *flp6 = &flp->u.ip6;
        struct rt6_info *rt = NULL;
@@ -134,27 +232,12 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
 
        rt = lookup(net, table, flp6, arg->lookup_data, flags);
        if (rt != net->ipv6.ip6_null_entry) {
-               struct fib6_rule *r = (struct fib6_rule *)rule;
-
-               /*
-                * If we need to find a source address for this traffic,
-                * we check the result if it meets requirement of the rule.
-                */
-               if ((rule->flags & FIB_RULE_FIND_SADDR) &&
-                   r->src.plen && !(flags & RT6_LOOKUP_F_HAS_SADDR)) {
-                       struct in6_addr saddr;
-
-                       if (ipv6_dev_get_saddr(net,
-                                              ip6_dst_idev(&rt->dst)->dev,
-                                              &flp6->daddr,
-                                              rt6_flags2srcprefs(flags),
-                                              &saddr))
-                               goto again;
-                       if (!ipv6_prefix_equal(&saddr, &r->src.addr,
-                                              r->src.plen))
-                               goto again;
-                       flp6->saddr = saddr;
-               }
+               err = fib6_rule_saddr(net, rule, flags, flp6,
+                                     ip6_dst_idev(&rt->dst)->dev);
+
+               if (err == -EAGAIN)
+                       goto again;
+
                err = rt->dst.error;
                if (err != -EAGAIN)
                        goto out;
@@ -172,6 +255,15 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
        return err;
 }
 
+static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
+                           int flags, struct fib_lookup_arg *arg)
+{
+       if (arg->lookup_ptr == fib6_table_lookup)
+               return fib6_rule_action_alt(rule, flp, flags, arg);
+
+       return __fib6_rule_action(rule, flp, flags, arg);
+}
+
 static bool fib6_rule_suppress(struct fib_rule *rule, struct fib_lookup_arg *arg)
 {
        struct rt6_info *rt = (struct rt6_info *) arg->result;
index 6421c893466e7df6896e39c4c462c19eeacf1b65..d1dc6017f5a69d6ceb4fcade98ca931567925f84 100644 (file)
@@ -354,6 +354,13 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
        return &rt->dst;
 }
 
+/* called with rcu lock held; no reference taken on fib6_info */
+struct fib6_info *fib6_lookup(struct net *net, int oif, struct flowi6 *fl6,
+                             int flags)
+{
+       return fib6_table_lookup(net, net->ipv6.fib6_main_tbl, oif, fl6, flags);
+}
+
 static void __net_init fib6_tables_init(struct net *net)
 {
        fib6_link_table(net, net->ipv6.fib6_main_tbl);
@@ -945,7 +952,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
        ins = &fn->leaf;
 
        for (iter = leaf; iter;
-            iter = rcu_dereference_protected(iter->rt6_next,
+            iter = rcu_dereference_protected(iter->fib6_next,
                                lockdep_is_held(&rt->fib6_table->tb6_lock))) {
                /*
                 *      Search for duplicates
@@ -1002,7 +1009,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
                        break;
 
 next_iter:
-               ins = &iter->rt6_next;
+               ins = &iter->fib6_next;
        }
 
        if (fallback_ins && !found) {
@@ -1031,7 +1038,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
                                              &sibling->fib6_siblings);
                                break;
                        }
-                       sibling = rcu_dereference_protected(sibling->rt6_next,
+                       sibling = rcu_dereference_protected(sibling->fib6_next,
                                    lockdep_is_held(&rt->fib6_table->tb6_lock));
                }
                /* For each sibling in the list, increment the counter of
@@ -1065,7 +1072,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
                if (err)
                        return err;
 
-               rcu_assign_pointer(rt->rt6_next, iter);
+               rcu_assign_pointer(rt->fib6_next, iter);
                atomic_inc(&rt->fib6_ref);
                rcu_assign_pointer(rt->fib6_node, fn);
                rcu_assign_pointer(*ins, rt);
@@ -1096,7 +1103,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
 
                atomic_inc(&rt->fib6_ref);
                rcu_assign_pointer(rt->fib6_node, fn);
-               rt->rt6_next = iter->rt6_next;
+               rt->fib6_next = iter->fib6_next;
                rcu_assign_pointer(*ins, rt);
                if (!info->skip_notify)
                        inet6_rt_notify(RTM_NEWROUTE, rt, info, NLM_F_REPLACE);
@@ -1113,14 +1120,14 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
 
                if (nsiblings) {
                        /* Replacing an ECMP route, remove all siblings */
-                       ins = &rt->rt6_next;
+                       ins = &rt->fib6_next;
                        iter = rcu_dereference_protected(*ins,
                                    lockdep_is_held(&rt->fib6_table->tb6_lock));
                        while (iter) {
                                if (iter->fib6_metric > rt->fib6_metric)
                                        break;
                                if (rt6_qualify_for_ecmp(iter)) {
-                                       *ins = iter->rt6_next;
+                                       *ins = iter->fib6_next;
                                        iter->fib6_node = NULL;
                                        fib6_purge_rt(iter, fn, info->nl_net);
                                        if (rcu_access_pointer(fn->rr_ptr) == iter)
@@ -1129,7 +1136,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
                                        nsiblings--;
                                        info->nl_net->ipv6.rt6_stats->fib_rt_entries--;
                                } else {
-                                       ins = &iter->rt6_next;
+                                       ins = &iter->fib6_next;
                                }
                                iter = rcu_dereference_protected(*ins,
                                        lockdep_is_held(&rt->fib6_table->tb6_lock));
@@ -1354,8 +1361,8 @@ struct lookup_args {
        const struct in6_addr   *addr;          /* search key                   */
 };
 
-static struct fib6_node *fib6_lookup_1(struct fib6_node *root,
-                                      struct lookup_args *args)
+static struct fib6_node *fib6_node_lookup_1(struct fib6_node *root,
+                                           struct lookup_args *args)
 {
        struct fib6_node *fn;
        __be32 dir;
@@ -1400,7 +1407,8 @@ static struct fib6_node *fib6_lookup_1(struct fib6_node *root,
 #ifdef CONFIG_IPV6_SUBTREES
                                if (subtree) {
                                        struct fib6_node *sfn;
-                                       sfn = fib6_lookup_1(subtree, args + 1);
+                                       sfn = fib6_node_lookup_1(subtree,
+                                                                args + 1);
                                        if (!sfn)
                                                goto backtrack;
                                        fn = sfn;
@@ -1422,8 +1430,9 @@ static struct fib6_node *fib6_lookup_1(struct fib6_node *root,
 
 /* called with rcu_read_lock() held
  */
-struct fib6_node *fib6_lookup(struct fib6_node *root, const struct in6_addr *daddr,
-                             const struct in6_addr *saddr)
+struct fib6_node *fib6_node_lookup(struct fib6_node *root,
+                                  const struct in6_addr *daddr,
+                                  const struct in6_addr *saddr)
 {
        struct fib6_node *fn;
        struct lookup_args args[] = {
@@ -1442,7 +1451,7 @@ struct fib6_node *fib6_lookup(struct fib6_node *root, const struct in6_addr *dad
                }
        };
 
-       fn = fib6_lookup_1(root, daddr ? args : args + 1);
+       fn = fib6_node_lookup_1(root, daddr ? args : args + 1);
        if (!fn || fn->fn_flags & RTN_TL_ROOT)
                fn = root;
 
@@ -1712,7 +1721,7 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
        RT6_TRACE("fib6_del_route\n");
 
        /* Unlink it */
-       *rtp = rt->rt6_next;
+       *rtp = rt->fib6_next;
        rt->fib6_node = NULL;
        net->ipv6.rt6_stats->fib_rt_entries--;
        net->ipv6.rt6_stats->fib_discarded_routes++;
@@ -1741,7 +1750,7 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
        FOR_WALKERS(net, w) {
                if (w->state == FWS_C && w->leaf == rt) {
                        RT6_TRACE("walker %p adjusted by delroute\n", w);
-                       w->leaf = rcu_dereference_protected(rt->rt6_next,
+                       w->leaf = rcu_dereference_protected(rt->fib6_next,
                                            lockdep_is_held(&table->tb6_lock));
                        if (!w->leaf)
                                w->state = FWS_U;
@@ -1795,7 +1804,7 @@ int fib6_del(struct fib6_info *rt, struct nl_info *info)
                        fib6_del_route(table, fn, rtp, info);
                        return 0;
                }
-               rtp_next = &cur->rt6_next;
+               rtp_next = &cur->fib6_next;
        }
        return -ENOENT;
 }
@@ -2279,7 +2288,7 @@ static int ipv6_route_yield(struct fib6_walker *w)
 
        do {
                iter->w.leaf = rcu_dereference_protected(
-                               iter->w.leaf->rt6_next,
+                               iter->w.leaf->fib6_next,
                                lockdep_is_held(&iter->tbl->tb6_lock));
                iter->skip--;
                if (!iter->skip && iter->w.leaf)
@@ -2345,7 +2354,7 @@ static void *ipv6_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
        if (!v)
                goto iter_table;
 
-       n = rcu_dereference_bh(((struct fib6_info *)v)->rt6_next);
+       n = rcu_dereference_bh(((struct fib6_info *)v)->fib6_next);
        if (n) {
                ++*pos;
                return n;
index 69727bc168cb027009dac95431e40b71291697da..c8cf2fdbb13b88cc1bf6b494a75407cdc16977eb 100644 (file)
@@ -71,6 +71,7 @@ struct ip6gre_net {
        struct ip6_tnl __rcu *tunnels[4][IP6_GRE_HASH_SIZE];
 
        struct ip6_tnl __rcu *collect_md_tun;
+       struct ip6_tnl __rcu *collect_md_tun_erspan;
        struct net_device *fb_tunnel_dev;
 };
 
@@ -81,6 +82,7 @@ static int ip6gre_tunnel_init(struct net_device *dev);
 static void ip6gre_tunnel_setup(struct net_device *dev);
 static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t);
 static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu);
+static void ip6erspan_tnl_link_config(struct ip6_tnl *t, int set_mtu);
 
 /* Tunnel hash table */
 
@@ -232,7 +234,12 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev,
        if (cand)
                return cand;
 
-       t = rcu_dereference(ign->collect_md_tun);
+       if (gre_proto == htons(ETH_P_ERSPAN) ||
+           gre_proto == htons(ETH_P_ERSPAN2))
+               t = rcu_dereference(ign->collect_md_tun_erspan);
+       else
+               t = rcu_dereference(ign->collect_md_tun);
+
        if (t && t->dev->flags & IFF_UP)
                return t;
 
@@ -261,6 +268,31 @@ static struct ip6_tnl __rcu **__ip6gre_bucket(struct ip6gre_net *ign,
        return &ign->tunnels[prio][h];
 }
 
+static void ip6gre_tunnel_link_md(struct ip6gre_net *ign, struct ip6_tnl *t)
+{
+       if (t->parms.collect_md)
+               rcu_assign_pointer(ign->collect_md_tun, t);
+}
+
+static void ip6erspan_tunnel_link_md(struct ip6gre_net *ign, struct ip6_tnl *t)
+{
+       if (t->parms.collect_md)
+               rcu_assign_pointer(ign->collect_md_tun_erspan, t);
+}
+
+static void ip6gre_tunnel_unlink_md(struct ip6gre_net *ign, struct ip6_tnl *t)
+{
+       if (t->parms.collect_md)
+               rcu_assign_pointer(ign->collect_md_tun, NULL);
+}
+
+static void ip6erspan_tunnel_unlink_md(struct ip6gre_net *ign,
+                                      struct ip6_tnl *t)
+{
+       if (t->parms.collect_md)
+               rcu_assign_pointer(ign->collect_md_tun_erspan, NULL);
+}
+
 static inline struct ip6_tnl __rcu **ip6gre_bucket(struct ip6gre_net *ign,
                const struct ip6_tnl *t)
 {
@@ -271,9 +303,6 @@ static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t)
 {
        struct ip6_tnl __rcu **tp = ip6gre_bucket(ign, t);
 
-       if (t->parms.collect_md)
-               rcu_assign_pointer(ign->collect_md_tun, t);
-
        rcu_assign_pointer(t->next, rtnl_dereference(*tp));
        rcu_assign_pointer(*tp, t);
 }
@@ -283,9 +312,6 @@ static void ip6gre_tunnel_unlink(struct ip6gre_net *ign, struct ip6_tnl *t)
        struct ip6_tnl __rcu **tp;
        struct ip6_tnl *iter;
 
-       if (t->parms.collect_md)
-               rcu_assign_pointer(ign->collect_md_tun, NULL);
-
        for (tp = ip6gre_bucket(ign, t);
             (iter = rtnl_dereference(*tp)) != NULL;
             tp = &iter->next) {
@@ -374,11 +400,23 @@ static struct ip6_tnl *ip6gre_tunnel_locate(struct net *net,
        return NULL;
 }
 
+static void ip6erspan_tunnel_uninit(struct net_device *dev)
+{
+       struct ip6_tnl *t = netdev_priv(dev);
+       struct ip6gre_net *ign = net_generic(t->net, ip6gre_net_id);
+
+       ip6erspan_tunnel_unlink_md(ign, t);
+       ip6gre_tunnel_unlink(ign, t);
+       dst_cache_reset(&t->dst_cache);
+       dev_put(dev);
+}
+
 static void ip6gre_tunnel_uninit(struct net_device *dev)
 {
        struct ip6_tnl *t = netdev_priv(dev);
        struct ip6gre_net *ign = net_generic(t->net, ip6gre_net_id);
 
+       ip6gre_tunnel_unlink_md(ign, t);
        ip6gre_tunnel_unlink(ign, t);
        dst_cache_reset(&t->dst_cache);
        dev_put(dev);
@@ -698,6 +736,9 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
        else
                fl6->daddr = tunnel->parms.raddr;
 
+       if (skb_cow_head(skb, dev->needed_headroom ?: tunnel->hlen))
+               return -ENOMEM;
+
        /* Push GRE header. */
        protocol = (dev->type == ARPHRD_ETHER) ? htons(ETH_P_TEB) : proto;
 
@@ -807,7 +848,7 @@ static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev)
 }
 
 /**
- * ip6_tnl_addr_conflict - compare packet addresses to tunnel's own
+ * ip6gre_tnl_addr_conflict - compare packet addresses to tunnel's own
  *   @t: the outgoing tunnel device
  *   @hdr: IPv6 header from the incoming packet
  *
@@ -896,6 +937,8 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
        struct flowi6 fl6;
        int err = -EINVAL;
        __u32 mtu;
+       int nhoff;
+       int thoff;
 
        if (!ip6_tnl_xmit_ctl(t, &t->parms.laddr, &t->parms.raddr))
                goto tx_err;
@@ -908,7 +951,17 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
                truncate = true;
        }
 
-       if (skb_cow_head(skb, dev->needed_headroom))
+       nhoff = skb_network_header(skb) - skb_mac_header(skb);
+       if (skb->protocol == htons(ETH_P_IP) &&
+           (ntohs(ip_hdr(skb)->tot_len) > skb->len - nhoff))
+               truncate = true;
+
+       thoff = skb_transport_header(skb) - skb_mac_header(skb);
+       if (skb->protocol == htons(ETH_P_IPV6) &&
+           (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff))
+               truncate = true;
+
+       if (skb_cow_head(skb, dev->needed_headroom ?: t->hlen))
                goto tx_err;
 
        t->parms.o_flags &= ~TUNNEL_KEY;
@@ -979,11 +1032,14 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
                        erspan_build_header(skb, ntohl(t->parms.o_key),
                                            t->parms.index,
                                            truncate, false);
-               else
+               else if (t->parms.erspan_ver == 2)
                        erspan_build_header_v2(skb, ntohl(t->parms.o_key),
                                               t->parms.dir,
                                               t->parms.hwid,
                                               truncate, false);
+               else
+                       goto tx_err;
+
                fl6.daddr = t->parms.raddr;
        }
 
@@ -1019,12 +1075,11 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
        return NETDEV_TX_OK;
 }
 
-static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu)
+static void ip6gre_tnl_link_config_common(struct ip6_tnl *t)
 {
        struct net_device *dev = t->dev;
        struct __ip6_tnl_parm *p = &t->parms;
        struct flowi6 *fl6 = &t->fl.u.ip6;
-       int t_hlen;
 
        if (dev->type != ARPHRD_ETHER) {
                memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr));
@@ -1051,12 +1106,13 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu)
                dev->flags |= IFF_POINTOPOINT;
        else
                dev->flags &= ~IFF_POINTOPOINT;
+}
 
-       t->tun_hlen = gre_calc_hlen(t->parms.o_flags);
-
-       t->hlen = t->encap_hlen + t->tun_hlen;
-
-       t_hlen = t->hlen + sizeof(struct ipv6hdr);
+static void ip6gre_tnl_link_config_route(struct ip6_tnl *t, int set_mtu,
+                                        int t_hlen)
+{
+       const struct __ip6_tnl_parm *p = &t->parms;
+       struct net_device *dev = t->dev;
 
        if (p->flags & IP6_TNL_F_CAP_XMIT) {
                int strict = (ipv6_addr_type(&p->raddr) &
@@ -1088,8 +1144,26 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu)
        }
 }
 
-static int ip6gre_tnl_change(struct ip6_tnl *t,
-       const struct __ip6_tnl_parm *p, int set_mtu)
+static int ip6gre_calc_hlen(struct ip6_tnl *tunnel)
+{
+       int t_hlen;
+
+       tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
+       tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
+
+       t_hlen = tunnel->hlen + sizeof(struct ipv6hdr);
+       tunnel->dev->hard_header_len = LL_MAX_HEADER + t_hlen;
+       return t_hlen;
+}
+
+static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu)
+{
+       ip6gre_tnl_link_config_common(t);
+       ip6gre_tnl_link_config_route(t, set_mtu, ip6gre_calc_hlen(t));
+}
+
+static void ip6gre_tnl_copy_tnl_parm(struct ip6_tnl *t,
+                                    const struct __ip6_tnl_parm *p)
 {
        t->parms.laddr = p->laddr;
        t->parms.raddr = p->raddr;
@@ -1105,6 +1179,12 @@ static int ip6gre_tnl_change(struct ip6_tnl *t,
        t->parms.o_flags = p->o_flags;
        t->parms.fwmark = p->fwmark;
        dst_cache_reset(&t->dst_cache);
+}
+
+static int ip6gre_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p,
+                            int set_mtu)
+{
+       ip6gre_tnl_copy_tnl_parm(t, p);
        ip6gre_tnl_link_config(t, set_mtu);
        return 0;
 }
@@ -1308,6 +1388,7 @@ static void ip6gre_dev_free(struct net_device *dev)
 {
        struct ip6_tnl *t = netdev_priv(dev);
 
+       gro_cells_destroy(&t->gro_cells);
        dst_cache_destroy(&t->dst_cache);
        free_percpu(dev->tstats);
 }
@@ -1375,17 +1456,14 @@ static int ip6gre_tunnel_init_common(struct net_device *dev)
                return -ENOMEM;
 
        ret = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
-       if (ret) {
-               free_percpu(dev->tstats);
-               dev->tstats = NULL;
-               return ret;
-       }
+       if (ret)
+               goto cleanup_alloc_pcpu_stats;
 
-       tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
-       tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
-       t_hlen = tunnel->hlen + sizeof(struct ipv6hdr);
+       ret = gro_cells_init(&tunnel->gro_cells, dev);
+       if (ret)
+               goto cleanup_dst_cache_init;
 
-       dev->hard_header_len = LL_MAX_HEADER + t_hlen;
+       t_hlen = ip6gre_calc_hlen(tunnel);
        dev->mtu = ETH_DATA_LEN - t_hlen;
        if (dev->type == ARPHRD_ETHER)
                dev->mtu -= ETH_HLEN;
@@ -1399,6 +1477,13 @@ static int ip6gre_tunnel_init_common(struct net_device *dev)
        ip6gre_tnl_init_features(dev);
 
        return 0;
+
+cleanup_dst_cache_init:
+       dst_cache_destroy(&tunnel->dst_cache);
+cleanup_alloc_pcpu_stats:
+       free_percpu(dev->tstats);
+       dev->tstats = NULL;
+       return ret;
 }
 
 static int ip6gre_tunnel_init(struct net_device *dev)
@@ -1728,6 +1813,19 @@ static const struct net_device_ops ip6gre_tap_netdev_ops = {
        .ndo_get_iflink = ip6_tnl_get_iflink,
 };
 
+static int ip6erspan_calc_hlen(struct ip6_tnl *tunnel)
+{
+       int t_hlen;
+
+       tunnel->tun_hlen = 8;
+       tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen +
+                      erspan_hdr_len(tunnel->parms.erspan_ver);
+
+       t_hlen = tunnel->hlen + sizeof(struct ipv6hdr);
+       tunnel->dev->hard_header_len = LL_MAX_HEADER + t_hlen;
+       return t_hlen;
+}
+
 static int ip6erspan_tap_init(struct net_device *dev)
 {
        struct ip6_tnl *tunnel;
@@ -1745,18 +1843,14 @@ static int ip6erspan_tap_init(struct net_device *dev)
                return -ENOMEM;
 
        ret = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
-       if (ret) {
-               free_percpu(dev->tstats);
-               dev->tstats = NULL;
-               return ret;
-       }
+       if (ret)
+               goto cleanup_alloc_pcpu_stats;
 
-       tunnel->tun_hlen = 8;
-       tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen +
-                      erspan_hdr_len(tunnel->parms.erspan_ver);
-       t_hlen = tunnel->hlen + sizeof(struct ipv6hdr);
+       ret = gro_cells_init(&tunnel->gro_cells, dev);
+       if (ret)
+               goto cleanup_dst_cache_init;
 
-       dev->hard_header_len = LL_MAX_HEADER + t_hlen;
+       t_hlen = ip6erspan_calc_hlen(tunnel);
        dev->mtu = ETH_DATA_LEN - t_hlen;
        if (dev->type == ARPHRD_ETHER)
                dev->mtu -= ETH_HLEN;
@@ -1764,14 +1858,21 @@ static int ip6erspan_tap_init(struct net_device *dev)
                dev->mtu -= 8;
 
        dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
-       ip6gre_tnl_link_config(tunnel, 1);
+       ip6erspan_tnl_link_config(tunnel, 1);
 
        return 0;
+
+cleanup_dst_cache_init:
+       dst_cache_destroy(&tunnel->dst_cache);
+cleanup_alloc_pcpu_stats:
+       free_percpu(dev->tstats);
+       dev->tstats = NULL;
+       return ret;
 }
 
 static const struct net_device_ops ip6erspan_netdev_ops = {
        .ndo_init =             ip6erspan_tap_init,
-       .ndo_uninit =           ip6gre_tunnel_uninit,
+       .ndo_uninit =           ip6erspan_tunnel_uninit,
        .ndo_start_xmit =       ip6erspan_tunnel_xmit,
        .ndo_set_mac_address =  eth_mac_addr,
        .ndo_validate_addr =    eth_validate_addr,
@@ -1835,13 +1936,11 @@ static bool ip6gre_netlink_encap_parms(struct nlattr *data[],
        return ret;
 }
 
-static int ip6gre_newlink(struct net *src_net, struct net_device *dev,
-                         struct nlattr *tb[], struct nlattr *data[],
-                         struct netlink_ext_ack *extack)
+static int ip6gre_newlink_common(struct net *src_net, struct net_device *dev,
+                                struct nlattr *tb[], struct nlattr *data[],
+                                struct netlink_ext_ack *extack)
 {
        struct ip6_tnl *nt;
-       struct net *net = dev_net(dev);
-       struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
        struct ip_tunnel_encap ipencap;
        int err;
 
@@ -1854,16 +1953,6 @@ static int ip6gre_newlink(struct net *src_net, struct net_device *dev,
                        return err;
        }
 
-       ip6gre_netlink_parms(data, &nt->parms);
-
-       if (nt->parms.collect_md) {
-               if (rtnl_dereference(ign->collect_md_tun))
-                       return -EEXIST;
-       } else {
-               if (ip6gre_tunnel_find(net, &nt->parms, dev->type))
-                       return -EEXIST;
-       }
-
        if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
                eth_hw_addr_random(dev);
 
@@ -1874,51 +1963,94 @@ static int ip6gre_newlink(struct net *src_net, struct net_device *dev,
        if (err)
                goto out;
 
-       ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]);
-
        if (tb[IFLA_MTU])
                ip6_tnl_change_mtu(dev, nla_get_u32(tb[IFLA_MTU]));
 
        dev_hold(dev);
-       ip6gre_tunnel_link(ign, nt);
 
 out:
        return err;
 }
 
-static int ip6gre_changelink(struct net_device *dev, struct nlattr *tb[],
-                            struct nlattr *data[],
-                            struct netlink_ext_ack *extack)
+static int ip6gre_newlink(struct net *src_net, struct net_device *dev,
+                         struct nlattr *tb[], struct nlattr *data[],
+                         struct netlink_ext_ack *extack)
+{
+       struct ip6_tnl *nt = netdev_priv(dev);
+       struct net *net = dev_net(dev);
+       struct ip6gre_net *ign;
+       int err;
+
+       ip6gre_netlink_parms(data, &nt->parms);
+       ign = net_generic(net, ip6gre_net_id);
+
+       if (nt->parms.collect_md) {
+               if (rtnl_dereference(ign->collect_md_tun))
+                       return -EEXIST;
+       } else {
+               if (ip6gre_tunnel_find(net, &nt->parms, dev->type))
+                       return -EEXIST;
+       }
+
+       err = ip6gre_newlink_common(src_net, dev, tb, data, extack);
+       if (!err) {
+               ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]);
+               ip6gre_tunnel_link_md(ign, nt);
+               ip6gre_tunnel_link(net_generic(net, ip6gre_net_id), nt);
+       }
+       return err;
+}
+
+static struct ip6_tnl *
+ip6gre_changelink_common(struct net_device *dev, struct nlattr *tb[],
+                        struct nlattr *data[], struct __ip6_tnl_parm *p_p,
+                        struct netlink_ext_ack *extack)
 {
        struct ip6_tnl *t, *nt = netdev_priv(dev);
        struct net *net = nt->net;
        struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
-       struct __ip6_tnl_parm p;
        struct ip_tunnel_encap ipencap;
 
        if (dev == ign->fb_tunnel_dev)
-               return -EINVAL;
+               return ERR_PTR(-EINVAL);
 
        if (ip6gre_netlink_encap_parms(data, &ipencap)) {
                int err = ip6_tnl_encap_setup(nt, &ipencap);
 
                if (err < 0)
-                       return err;
+                       return ERR_PTR(err);
        }
 
-       ip6gre_netlink_parms(data, &p);
+       ip6gre_netlink_parms(data, p_p);
 
-       t = ip6gre_tunnel_locate(net, &p, 0);
+       t = ip6gre_tunnel_locate(net, p_p, 0);
 
        if (t) {
                if (t->dev != dev)
-                       return -EEXIST;
+                       return ERR_PTR(-EEXIST);
        } else {
                t = nt;
        }
 
+       return t;
+}
+
+static int ip6gre_changelink(struct net_device *dev, struct nlattr *tb[],
+                            struct nlattr *data[],
+                            struct netlink_ext_ack *extack)
+{
+       struct ip6gre_net *ign = net_generic(dev_net(dev), ip6gre_net_id);
+       struct __ip6_tnl_parm p;
+       struct ip6_tnl *t;
+
+       t = ip6gre_changelink_common(dev, tb, data, &p, extack);
+       if (IS_ERR(t))
+               return PTR_ERR(t);
+
+       ip6gre_tunnel_unlink_md(ign, t);
        ip6gre_tunnel_unlink(ign, t);
        ip6gre_tnl_change(t, &p, !tb[IFLA_MTU]);
+       ip6gre_tunnel_link_md(ign, t);
        ip6gre_tunnel_link(ign, t);
        return 0;
 }
@@ -2068,6 +2200,69 @@ static void ip6erspan_tap_setup(struct net_device *dev)
        netif_keep_dst(dev);
 }
 
+static int ip6erspan_newlink(struct net *src_net, struct net_device *dev,
+                            struct nlattr *tb[], struct nlattr *data[],
+                            struct netlink_ext_ack *extack)
+{
+       struct ip6_tnl *nt = netdev_priv(dev);
+       struct net *net = dev_net(dev);
+       struct ip6gre_net *ign;
+       int err;
+
+       ip6gre_netlink_parms(data, &nt->parms);
+       ign = net_generic(net, ip6gre_net_id);
+
+       if (nt->parms.collect_md) {
+               if (rtnl_dereference(ign->collect_md_tun_erspan))
+                       return -EEXIST;
+       } else {
+               if (ip6gre_tunnel_find(net, &nt->parms, dev->type))
+                       return -EEXIST;
+       }
+
+       err = ip6gre_newlink_common(src_net, dev, tb, data, extack);
+       if (!err) {
+               ip6erspan_tnl_link_config(nt, !tb[IFLA_MTU]);
+               ip6erspan_tunnel_link_md(ign, nt);
+               ip6gre_tunnel_link(net_generic(net, ip6gre_net_id), nt);
+       }
+       return err;
+}
+
+static void ip6erspan_tnl_link_config(struct ip6_tnl *t, int set_mtu)
+{
+       ip6gre_tnl_link_config_common(t);
+       ip6gre_tnl_link_config_route(t, set_mtu, ip6erspan_calc_hlen(t));
+}
+
+static int ip6erspan_tnl_change(struct ip6_tnl *t,
+                               const struct __ip6_tnl_parm *p, int set_mtu)
+{
+       ip6gre_tnl_copy_tnl_parm(t, p);
+       ip6erspan_tnl_link_config(t, set_mtu);
+       return 0;
+}
+
+static int ip6erspan_changelink(struct net_device *dev, struct nlattr *tb[],
+                               struct nlattr *data[],
+                               struct netlink_ext_ack *extack)
+{
+       struct ip6gre_net *ign = net_generic(dev_net(dev), ip6gre_net_id);
+       struct __ip6_tnl_parm p;
+       struct ip6_tnl *t;
+
+       t = ip6gre_changelink_common(dev, tb, data, &p, extack);
+       if (IS_ERR(t))
+               return PTR_ERR(t);
+
+       ip6gre_tunnel_unlink_md(ign, t);
+       ip6gre_tunnel_unlink(ign, t);
+       ip6erspan_tnl_change(t, &p, !tb[IFLA_MTU]);
+       ip6erspan_tunnel_link_md(ign, t);
+       ip6gre_tunnel_link(ign, t);
+       return 0;
+}
+
 static struct rtnl_link_ops ip6gre_link_ops __read_mostly = {
        .kind           = "ip6gre",
        .maxtype        = IFLA_GRE_MAX,
@@ -2104,8 +2299,8 @@ static struct rtnl_link_ops ip6erspan_tap_ops __read_mostly = {
        .priv_size      = sizeof(struct ip6_tnl),
        .setup          = ip6erspan_tap_setup,
        .validate       = ip6erspan_tap_validate,
-       .newlink        = ip6gre_newlink,
-       .changelink     = ip6gre_changelink,
+       .newlink        = ip6erspan_newlink,
+       .changelink     = ip6erspan_changelink,
        .get_size       = ip6gre_get_size,
        .fill_info      = ip6gre_fill_info,
        .get_link_net   = ip6_tnl_get_link_net,
index 4a87f9428ca519b475f8feaceaaa3a225bcfe6d2..5b3f2f89ef41c3276ef4b478683bd9ab04a1d3da 100644 (file)
@@ -88,9 +88,11 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
 
        if (skb->encapsulation &&
            skb_shinfo(skb)->gso_type & (SKB_GSO_IPXIP4 | SKB_GSO_IPXIP6))
-               udpfrag = proto == IPPROTO_UDP && encap;
+               udpfrag = proto == IPPROTO_UDP && encap &&
+                         (skb_shinfo(skb)->gso_type & SKB_GSO_UDP);
        else
-               udpfrag = proto == IPPROTO_UDP && !skb->encapsulation;
+               udpfrag = proto == IPPROTO_UDP && !skb->encapsulation &&
+                         (skb_shinfo(skb)->gso_type & SKB_GSO_UDP);
 
        ops = rcu_dereference(inet6_offloads[proto]);
        if (likely(ops && ops->callbacks.gso_segment)) {
index 6a477d54f8c72e5b502c202c7af0a04a552ee26e..60b0d16524489df4e06a559a7ad7016094f7a458 100644 (file)
@@ -383,28 +383,6 @@ static inline int ip6_forward_finish(struct net *net, struct sock *sk,
        return dst_output(net, sk, skb);
 }
 
-unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
-{
-       unsigned int mtu;
-       struct inet6_dev *idev;
-
-       if (dst_metric_locked(dst, RTAX_MTU)) {
-               mtu = dst_metric_raw(dst, RTAX_MTU);
-               if (mtu)
-                       return mtu;
-       }
-
-       mtu = IPV6_MIN_MTU;
-       rcu_read_lock();
-       idev = __in6_dev_get(dst->dev);
-       if (idev)
-               mtu = idev->cnf.mtu6;
-       rcu_read_unlock();
-
-       return mtu;
-}
-EXPORT_SYMBOL_GPL(ip6_dst_mtu_forward);
-
 static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
 {
        if (skb->len <= mtu)
@@ -1240,6 +1218,8 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
        if (mtu < IPV6_MIN_MTU)
                return -EINVAL;
        cork->base.fragsize = mtu;
+       cork->base.gso_size = sk->sk_type == SOCK_DGRAM ? ipc6->gso_size : 0;
+
        if (dst_allfrag(xfrm_dst_path(&rt->dst)))
                cork->base.flags |= IPCORK_ALLFRAG;
        cork->base.length = 0;
@@ -1274,6 +1254,7 @@ static int __ip6_append_data(struct sock *sk,
        int csummode = CHECKSUM_NONE;
        unsigned int maxnonfragsize, headersize;
        unsigned int wmem_alloc_delta = 0;
+       bool paged;
 
        skb = skb_peek_tail(queue);
        if (!skb) {
@@ -1281,7 +1262,8 @@ static int __ip6_append_data(struct sock *sk,
                dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
        }
 
-       mtu = cork->fragsize;
+       paged = !!cork->gso_size;
+       mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize;
        orig_mtu = mtu;
 
        hh_len = LL_RESERVED_SPACE(rt->dst.dev);
@@ -1329,7 +1311,7 @@ static int __ip6_append_data(struct sock *sk,
        if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
            headersize == sizeof(struct ipv6hdr) &&
            length <= mtu - headersize &&
-           !(flags & MSG_MORE) &&
+           (!(flags & MSG_MORE) || cork->gso_size) &&
            rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
                csummode = CHECKSUM_PARTIAL;
 
@@ -1372,6 +1354,7 @@ static int __ip6_append_data(struct sock *sk,
                        unsigned int fraglen;
                        unsigned int fraggap;
                        unsigned int alloclen;
+                       unsigned int pagedlen = 0;
 alloc_new_skb:
                        /* There's no room in the current skb */
                        if (skb)
@@ -1394,11 +1377,17 @@ static int __ip6_append_data(struct sock *sk,
 
                        if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
                                datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
+                       fraglen = datalen + fragheaderlen;
+
                        if ((flags & MSG_MORE) &&
                            !(rt->dst.dev->features&NETIF_F_SG))
                                alloclen = mtu;
-                       else
-                               alloclen = datalen + fragheaderlen;
+                       else if (!paged)
+                               alloclen = fraglen;
+                       else {
+                               alloclen = min_t(int, fraglen, MAX_HEADER);
+                               pagedlen = fraglen - alloclen;
+                       }
 
                        alloclen += dst_exthdrlen;
 
@@ -1420,7 +1409,7 @@ static int __ip6_append_data(struct sock *sk,
                         */
                        alloclen += sizeof(struct frag_hdr);
 
-                       copy = datalen - transhdrlen - fraggap;
+                       copy = datalen - transhdrlen - fraggap - pagedlen;
                        if (copy < 0) {
                                err = -EINVAL;
                                goto error;
@@ -1459,7 +1448,7 @@ static int __ip6_append_data(struct sock *sk,
                        /*
                         *      Find where to start putting bytes
                         */
-                       data = skb_put(skb, fraglen);
+                       data = skb_put(skb, fraglen - pagedlen);
                        skb_set_network_header(skb, exthdrlen);
                        data += fragheaderlen;
                        skb->transport_header = (skb->network_header +
@@ -1482,7 +1471,7 @@ static int __ip6_append_data(struct sock *sk,
                        }
 
                        offset += copy;
-                       length -= datalen - fraggap;
+                       length -= copy + transhdrlen;
                        transhdrlen = 0;
                        exthdrlen = 0;
                        dst_exthdrlen = 0;
@@ -1505,7 +1494,8 @@ static int __ip6_append_data(struct sock *sk,
                if (copy > length)
                        copy = length;
 
-               if (!(rt->dst.dev->features&NETIF_F_SG)) {
+               if (!(rt->dst.dev->features&NETIF_F_SG) &&
+                   skb_tailroom(skb) >= copy) {
                        unsigned int off;
 
                        off = skb->len;
@@ -1755,9 +1745,9 @@ struct sk_buff *ip6_make_skb(struct sock *sk,
                             void *from, int length, int transhdrlen,
                             struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
                             struct rt6_info *rt, unsigned int flags,
+                            struct inet_cork_full *cork,
                             const struct sockcm_cookie *sockc)
 {
-       struct inet_cork_full cork;
        struct inet6_cork v6_cork;
        struct sk_buff_head queue;
        int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
@@ -1768,27 +1758,27 @@ struct sk_buff *ip6_make_skb(struct sock *sk,
 
        __skb_queue_head_init(&queue);
 
-       cork.base.flags = 0;
-       cork.base.addr = 0;
-       cork.base.opt = NULL;
-       cork.base.dst = NULL;
+       cork->base.flags = 0;
+       cork->base.addr = 0;
+       cork->base.opt = NULL;
+       cork->base.dst = NULL;
        v6_cork.opt = NULL;
-       err = ip6_setup_cork(sk, &cork, &v6_cork, ipc6, rt, fl6);
+       err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt, fl6);
        if (err) {
-               ip6_cork_release(&cork, &v6_cork);
+               ip6_cork_release(cork, &v6_cork);
                return ERR_PTR(err);
        }
        if (ipc6->dontfrag < 0)
                ipc6->dontfrag = inet6_sk(sk)->dontfrag;
 
-       err = __ip6_append_data(sk, fl6, &queue, &cork.base, &v6_cork,
+       err = __ip6_append_data(sk, fl6, &queue, &cork->base, &v6_cork,
                                &current->task_frag, getfrag, from,
                                length + exthdrlen, transhdrlen + exthdrlen,
                                flags, ipc6, sockc);
        if (err) {
-               __ip6_flush_pending_frames(sk, &queue, &cork, &v6_cork);
+               __ip6_flush_pending_frames(sk, &queue, cork, &v6_cork);
                return ERR_PTR(err);
        }
 
-       return __ip6_make_skb(sk, &queue, &cork, &v6_cork);
+       return __ip6_make_skb(sk, &queue, cork, &v6_cork);
 }
index c214ffec02f06f6dccfb9769fc8640e5e56da618..b7f28deddaeaf8aa35f23da3256f10440d84af85 100644 (file)
@@ -669,7 +669,7 @@ static void vti6_link_config(struct ip6_tnl *t, bool keep_mtu)
        else
                mtu = ETH_DATA_LEN - LL_MAX_HEADER - sizeof(struct ipv6hdr);
 
-       dev->mtu = max_t(int, mtu, IPV6_MIN_MTU);
+       dev->mtu = max_t(int, mtu, IPV4_MIN_MTU);
 }
 
 /**
@@ -743,7 +743,7 @@ vti6_parm_to_user(struct ip6_tnl_parm2 *u, const struct __ip6_tnl_parm *p)
 }
 
 /**
- * vti6_tnl_ioctl - configure vti6 tunnels from userspace
+ * vti6_ioctl - configure vti6 tunnels from userspace
  *   @dev: virtual device associated with tunnel
  *   @ifr: parameters passed from userspace
  *   @cmd: command to be performed
@@ -881,7 +881,7 @@ static void vti6_dev_setup(struct net_device *dev)
        dev->priv_destructor = vti6_dev_free;
 
        dev->type = ARPHRD_TUNNEL6;
-       dev->min_mtu = IPV6_MIN_MTU;
+       dev->min_mtu = IPV4_MIN_MTU;
        dev->max_mtu = IP_MAX_MTU - sizeof(struct ipv6hdr);
        dev->flags |= IFF_NOARP;
        dev->addr_len = sizeof(struct in6_addr);
index ccbfa83e4bb076ce086ff1e4e79c130857c7a1a5..ce77bcc2490c2783604d69778887127d7f405239 100644 (file)
@@ -48,6 +48,34 @@ config NFT_CHAIN_ROUTE_IPV6
          fields such as the source, destination, flowlabel, hop-limit and
          the packet mark.
 
+if NF_NAT_IPV6
+
+config NFT_CHAIN_NAT_IPV6
+       tristate "IPv6 nf_tables nat chain support"
+       help
+         This option enables the "nat" chain for IPv6 in nf_tables. This
+         chain type is used to perform Network Address Translation (NAT)
+         packet transformations such as the source, destination address and
+         source and destination ports.
+
+config NFT_MASQ_IPV6
+       tristate "IPv6 masquerade support for nf_tables"
+       depends on NFT_MASQ
+       select NF_NAT_MASQUERADE_IPV6
+       help
+         This is the expression that provides IPv4 masquerading support for
+         nf_tables.
+
+config NFT_REDIR_IPV6
+       tristate "IPv6 redirect support for nf_tables"
+       depends on NFT_REDIR
+       select NF_NAT_REDIRECT
+       help
+         This is the expression that provides IPv4 redirect support for
+         nf_tables.
+
+endif # NF_NAT_IPV6
+
 config NFT_REJECT_IPV6
        select NF_REJECT_IPV6
        default NFT_REJECT
@@ -107,39 +135,12 @@ config NF_NAT_IPV6
 
 if NF_NAT_IPV6
 
-config NFT_CHAIN_NAT_IPV6
-       depends on NF_TABLES_IPV6
-       tristate "IPv6 nf_tables nat chain support"
-       help
-         This option enables the "nat" chain for IPv6 in nf_tables. This
-         chain type is used to perform Network Address Translation (NAT)
-         packet transformations such as the source, destination address and
-         source and destination ports.
-
 config NF_NAT_MASQUERADE_IPV6
        tristate "IPv6 masquerade support"
        help
          This is the kernel functionality to provide NAT in the masquerade
          flavour (automatic source address selection) for IPv6.
 
-config NFT_MASQ_IPV6
-       tristate "IPv6 masquerade support for nf_tables"
-       depends on NF_TABLES_IPV6
-       depends on NFT_MASQ
-       select NF_NAT_MASQUERADE_IPV6
-       help
-         This is the expression that provides IPv4 masquerading support for
-         nf_tables.
-
-config NFT_REDIR_IPV6
-       tristate "IPv6 redirect support for nf_tables"
-       depends on NF_TABLES_IPV6
-       depends on NFT_REDIR
-       select NF_NAT_REDIRECT
-       help
-         This is the expression that provides IPv4 redirect support for
-         nf_tables.
-
 endif # NF_NAT_IPV6
 
 config IP6_NF_IPTABLES
index 65c9e1a583056aa07b014ad4f701a2a234b9a26e..3dc9af775ce2162e3f12918a7e4a62f4d98254f6 100644 (file)
@@ -38,6 +38,7 @@
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
 MODULE_DESCRIPTION("IPv6 packet filter");
+MODULE_ALIAS("ip6t_icmp6");
 
 void *ip6t_alloc_initial_table(const struct xt_table *info)
 {
@@ -528,7 +529,6 @@ static int check_target(struct ip6t_entry *e, struct net *net, const char *name)
                .family    = NFPROTO_IPV6,
        };
 
-       t = ip6t_get_target(e);
        return xt_check_target(&par, t->u.target_size - sizeof(*t),
                               e->ipv6.proto,
                               e->ipv6.invflags & IP6T_INV_PROTO);
index 92c0047e7e33dc5925054c41143fe200db06f125..491f808e356a68046f553785836d220e287c318a 100644 (file)
@@ -29,7 +29,7 @@ masquerade_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 
 static int masquerade_tg6_checkentry(const struct xt_tgchk_param *par)
 {
-       const struct nf_nat_range *range = par->targinfo;
+       const struct nf_nat_range2 *range = par->targinfo;
 
        if (range->flags & NF_NAT_RANGE_MAP_IPS)
                return -EINVAL;
index 33719d5560c8ab4ee565ede2b03a107c06551e51..1059894a6f4c3f009a92b30fb257e6b35f3a4a26 100644 (file)
@@ -117,6 +117,130 @@ static bool srh_mt6(const struct sk_buff *skb, struct xt_action_param *par)
        return true;
 }
 
+static bool srh1_mt6(const struct sk_buff *skb, struct xt_action_param *par)
+{
+       int hdrlen, psidoff, nsidoff, lsidoff, srhoff = 0;
+       const struct ip6t_srh1 *srhinfo = par->matchinfo;
+       struct in6_addr *psid, *nsid, *lsid;
+       struct in6_addr _psid, _nsid, _lsid;
+       struct ipv6_sr_hdr *srh;
+       struct ipv6_sr_hdr _srh;
+
+       if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
+               return false;
+       srh = skb_header_pointer(skb, srhoff, sizeof(_srh), &_srh);
+       if (!srh)
+               return false;
+
+       hdrlen = ipv6_optlen(srh);
+       if (skb->len - srhoff < hdrlen)
+               return false;
+
+       if (srh->type != IPV6_SRCRT_TYPE_4)
+               return false;
+
+       if (srh->segments_left > srh->first_segment)
+               return false;
+
+       /* Next Header matching */
+       if (srhinfo->mt_flags & IP6T_SRH_NEXTHDR)
+               if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_NEXTHDR,
+                               !(srh->nexthdr == srhinfo->next_hdr)))
+                       return false;
+
+       /* Header Extension Length matching */
+       if (srhinfo->mt_flags & IP6T_SRH_LEN_EQ)
+               if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_LEN_EQ,
+                               !(srh->hdrlen == srhinfo->hdr_len)))
+                       return false;
+       if (srhinfo->mt_flags & IP6T_SRH_LEN_GT)
+               if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_LEN_GT,
+                               !(srh->hdrlen > srhinfo->hdr_len)))
+                       return false;
+       if (srhinfo->mt_flags & IP6T_SRH_LEN_LT)
+               if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_LEN_LT,
+                               !(srh->hdrlen < srhinfo->hdr_len)))
+                       return false;
+
+       /* Segments Left matching */
+       if (srhinfo->mt_flags & IP6T_SRH_SEGS_EQ)
+               if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_SEGS_EQ,
+                               !(srh->segments_left == srhinfo->segs_left)))
+                       return false;
+       if (srhinfo->mt_flags & IP6T_SRH_SEGS_GT)
+               if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_SEGS_GT,
+                               !(srh->segments_left > srhinfo->segs_left)))
+                       return false;
+       if (srhinfo->mt_flags & IP6T_SRH_SEGS_LT)
+               if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_SEGS_LT,
+                               !(srh->segments_left < srhinfo->segs_left)))
+                       return false;
+
+       /**
+        * Last Entry matching
+        * Last_Entry field was introduced in revision 6 of the SRH draft.
+        * It was called First_Segment in the previous revision
+        */
+       if (srhinfo->mt_flags & IP6T_SRH_LAST_EQ)
+               if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_LAST_EQ,
+                               !(srh->first_segment == srhinfo->last_entry)))
+                       return false;
+       if (srhinfo->mt_flags & IP6T_SRH_LAST_GT)
+               if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_LAST_GT,
+                               !(srh->first_segment > srhinfo->last_entry)))
+                       return false;
+       if (srhinfo->mt_flags & IP6T_SRH_LAST_LT)
+               if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_LAST_LT,
+                               !(srh->first_segment < srhinfo->last_entry)))
+                       return false;
+
+       /**
+        * Tag matchig
+        * Tag field was introduced in revision 6 of the SRH draft
+        */
+       if (srhinfo->mt_flags & IP6T_SRH_TAG)
+               if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_TAG,
+                               !(srh->tag == srhinfo->tag)))
+                       return false;
+
+       /* Previous SID matching */
+       if (srhinfo->mt_flags & IP6T_SRH_PSID) {
+               if (srh->segments_left == srh->first_segment)
+                       return false;
+               psidoff = srhoff + sizeof(struct ipv6_sr_hdr) +
+                         ((srh->segments_left + 1) * sizeof(struct in6_addr));
+               psid = skb_header_pointer(skb, psidoff, sizeof(_psid), &_psid);
+               if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_PSID,
+                               ipv6_masked_addr_cmp(psid, &srhinfo->psid_msk,
+                                                    &srhinfo->psid_addr)))
+                       return false;
+       }
+
+       /* Next SID matching */
+       if (srhinfo->mt_flags & IP6T_SRH_NSID) {
+               if (srh->segments_left == 0)
+                       return false;
+               nsidoff = srhoff + sizeof(struct ipv6_sr_hdr) +
+                         ((srh->segments_left - 1) * sizeof(struct in6_addr));
+               nsid = skb_header_pointer(skb, nsidoff, sizeof(_nsid), &_nsid);
+               if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_NSID,
+                               ipv6_masked_addr_cmp(nsid, &srhinfo->nsid_msk,
+                                                    &srhinfo->nsid_addr)))
+                       return false;
+       }
+
+       /* Last SID matching */
+       if (srhinfo->mt_flags & IP6T_SRH_LSID) {
+               lsidoff = srhoff + sizeof(struct ipv6_sr_hdr);
+               lsid = skb_header_pointer(skb, lsidoff, sizeof(_lsid), &_lsid);
+               if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_LSID,
+                               ipv6_masked_addr_cmp(lsid, &srhinfo->lsid_msk,
+                                                    &srhinfo->lsid_addr)))
+                       return false;
+       }
+       return true;
+}
+
 static int srh_mt6_check(const struct xt_mtchk_param *par)
 {
        const struct ip6t_srh *srhinfo = par->matchinfo;
@@ -136,23 +260,54 @@ static int srh_mt6_check(const struct xt_mtchk_param *par)
        return 0;
 }
 
-static struct xt_match srh_mt6_reg __read_mostly = {
-       .name           = "srh",
-       .family         = NFPROTO_IPV6,
-       .match          = srh_mt6,
-       .matchsize      = sizeof(struct ip6t_srh),
-       .checkentry     = srh_mt6_check,
-       .me             = THIS_MODULE,
+static int srh1_mt6_check(const struct xt_mtchk_param *par)
+{
+       const struct ip6t_srh1 *srhinfo = par->matchinfo;
+
+       if (srhinfo->mt_flags & ~IP6T_SRH_MASK) {
+               pr_info_ratelimited("unknown srh match flags  %X\n",
+                                   srhinfo->mt_flags);
+               return -EINVAL;
+       }
+
+       if (srhinfo->mt_invflags & ~IP6T_SRH_INV_MASK) {
+               pr_info_ratelimited("unknown srh invflags %X\n",
+                                   srhinfo->mt_invflags);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static struct xt_match srh_mt6_reg[] __read_mostly = {
+       {
+               .name           = "srh",
+               .revision       = 0,
+               .family         = NFPROTO_IPV6,
+               .match          = srh_mt6,
+               .matchsize      = sizeof(struct ip6t_srh),
+               .checkentry     = srh_mt6_check,
+               .me             = THIS_MODULE,
+       },
+       {
+               .name           = "srh",
+               .revision       = 1,
+               .family         = NFPROTO_IPV6,
+               .match          = srh1_mt6,
+               .matchsize      = sizeof(struct ip6t_srh1),
+               .checkentry     = srh1_mt6_check,
+               .me             = THIS_MODULE,
+       }
 };
 
 static int __init srh_mt6_init(void)
 {
-       return xt_register_match(&srh_mt6_reg);
+       return xt_register_matches(srh_mt6_reg, ARRAY_SIZE(srh_mt6_reg));
 }
 
 static void __exit srh_mt6_exit(void)
 {
-       xt_unregister_match(&srh_mt6_reg);
+       xt_unregister_matches(srh_mt6_reg, ARRAY_SIZE(srh_mt6_reg));
 }
 
 module_init(srh_mt6_init);
index 47306e45a80abf5225aab9c1209258302c63c161..2bf554e18af86ca217787db410b9d8bb5bb7346e 100644 (file)
@@ -35,8 +35,7 @@ static const struct xt_table nf_nat_ipv6_table = {
 
 static unsigned int ip6table_nat_do_chain(void *priv,
                                          struct sk_buff *skb,
-                                         const struct nf_hook_state *state,
-                                         struct nf_conn *ct)
+                                         const struct nf_hook_state *state)
 {
        return ip6t_do_table(skb, state, state->net->ipv6.ip6table_nat);
 }
index 207cb35569b1c3c382560088aeb6ecdd75a236b2..c511d206bf9bd4876a0480b44834db2ba9d8e633 100644 (file)
 #include <linux/module.h>
 #include <linux/netfilter.h>
 #include <linux/rhashtable.h>
-#include <linux/ipv6.h>
-#include <linux/netdevice.h>
-#include <net/ipv6.h>
-#include <net/ip6_route.h>
-#include <net/neighbour.h>
 #include <net/netfilter/nf_flow_table.h>
 #include <net/netfilter/nf_tables.h>
-/* For layer 4 checksum field offset. */
-#include <linux/tcp.h>
-#include <linux/udp.h>
-
-static int nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff,
-                               struct in6_addr *addr,
-                               struct in6_addr *new_addr)
-{
-       struct tcphdr *tcph;
-
-       if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
-           skb_try_make_writable(skb, thoff + sizeof(*tcph)))
-               return -1;
-
-       tcph = (void *)(skb_network_header(skb) + thoff);
-       inet_proto_csum_replace16(&tcph->check, skb, addr->s6_addr32,
-                                 new_addr->s6_addr32, true);
-
-       return 0;
-}
-
-static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
-                               struct in6_addr *addr,
-                               struct in6_addr *new_addr)
-{
-       struct udphdr *udph;
-
-       if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
-           skb_try_make_writable(skb, thoff + sizeof(*udph)))
-               return -1;
-
-       udph = (void *)(skb_network_header(skb) + thoff);
-       if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
-               inet_proto_csum_replace16(&udph->check, skb, addr->s6_addr32,
-                                         new_addr->s6_addr32, true);
-               if (!udph->check)
-                       udph->check = CSUM_MANGLED_0;
-       }
-
-       return 0;
-}
-
-static int nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h,
-                                   unsigned int thoff, struct in6_addr *addr,
-                                   struct in6_addr *new_addr)
-{
-       switch (ip6h->nexthdr) {
-       case IPPROTO_TCP:
-               if (nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr) < 0)
-                       return NF_DROP;
-               break;
-       case IPPROTO_UDP:
-               if (nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr) < 0)
-                       return NF_DROP;
-               break;
-       }
-
-       return 0;
-}
-
-static int nf_flow_snat_ipv6(const struct flow_offload *flow,
-                            struct sk_buff *skb, struct ipv6hdr *ip6h,
-                            unsigned int thoff,
-                            enum flow_offload_tuple_dir dir)
-{
-       struct in6_addr addr, new_addr;
-
-       switch (dir) {
-       case FLOW_OFFLOAD_DIR_ORIGINAL:
-               addr = ip6h->saddr;
-               new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6;
-               ip6h->saddr = new_addr;
-               break;
-       case FLOW_OFFLOAD_DIR_REPLY:
-               addr = ip6h->daddr;
-               new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6;
-               ip6h->daddr = new_addr;
-               break;
-       default:
-               return -1;
-       }
-
-       return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
-}
-
-static int nf_flow_dnat_ipv6(const struct flow_offload *flow,
-                            struct sk_buff *skb, struct ipv6hdr *ip6h,
-                            unsigned int thoff,
-                            enum flow_offload_tuple_dir dir)
-{
-       struct in6_addr addr, new_addr;
-
-       switch (dir) {
-       case FLOW_OFFLOAD_DIR_ORIGINAL:
-               addr = ip6h->daddr;
-               new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6;
-               ip6h->daddr = new_addr;
-               break;
-       case FLOW_OFFLOAD_DIR_REPLY:
-               addr = ip6h->saddr;
-               new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6;
-               ip6h->saddr = new_addr;
-               break;
-       default:
-               return -1;
-       }
-
-       return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
-}
-
-static int nf_flow_nat_ipv6(const struct flow_offload *flow,
-                           struct sk_buff *skb,
-                           enum flow_offload_tuple_dir dir)
-{
-       struct ipv6hdr *ip6h = ipv6_hdr(skb);
-       unsigned int thoff = sizeof(*ip6h);
-
-       if (flow->flags & FLOW_OFFLOAD_SNAT &&
-           (nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
-            nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
-               return -1;
-       if (flow->flags & FLOW_OFFLOAD_DNAT &&
-           (nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
-            nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
-               return -1;
-
-       return 0;
-}
-
-static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
-                             struct flow_offload_tuple *tuple)
-{
-       struct flow_ports *ports;
-       struct ipv6hdr *ip6h;
-       unsigned int thoff;
-
-       if (!pskb_may_pull(skb, sizeof(*ip6h)))
-               return -1;
-
-       ip6h = ipv6_hdr(skb);
-
-       if (ip6h->nexthdr != IPPROTO_TCP &&
-           ip6h->nexthdr != IPPROTO_UDP)
-               return -1;
-
-       thoff = sizeof(*ip6h);
-       if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
-               return -1;
-
-       ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
-
-       tuple->src_v6           = ip6h->saddr;
-       tuple->dst_v6           = ip6h->daddr;
-       tuple->src_port         = ports->source;
-       tuple->dst_port         = ports->dest;
-       tuple->l3proto          = AF_INET6;
-       tuple->l4proto          = ip6h->nexthdr;
-       tuple->iifidx           = dev->ifindex;
-
-       return 0;
-}
-
-/* Based on ip_exceeds_mtu(). */
-static bool __nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
-{
-       if (skb->len <= mtu)
-               return false;
-
-       if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
-               return false;
-
-       return true;
-}
-
-static bool nf_flow_exceeds_mtu(struct sk_buff *skb, const struct rt6_info *rt)
-{
-       u32 mtu;
-
-       mtu = ip6_dst_mtu_forward(&rt->dst);
-       if (__nf_flow_exceeds_mtu(skb, mtu))
-               return true;
-
-       return false;
-}
-
-unsigned int
-nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
-                         const struct nf_hook_state *state)
-{
-       struct flow_offload_tuple_rhash *tuplehash;
-       struct nf_flowtable *flow_table = priv;
-       struct flow_offload_tuple tuple = {};
-       enum flow_offload_tuple_dir dir;
-       struct flow_offload *flow;
-       struct net_device *outdev;
-       struct in6_addr *nexthop;
-       struct ipv6hdr *ip6h;
-       struct rt6_info *rt;
-
-       if (skb->protocol != htons(ETH_P_IPV6))
-               return NF_ACCEPT;
-
-       if (nf_flow_tuple_ipv6(skb, state->in, &tuple) < 0)
-               return NF_ACCEPT;
-
-       tuplehash = flow_offload_lookup(flow_table, &tuple);
-       if (tuplehash == NULL)
-               return NF_ACCEPT;
-
-       outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx);
-       if (!outdev)
-               return NF_ACCEPT;
-
-       dir = tuplehash->tuple.dir;
-       flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
-
-       rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst_cache;
-       if (unlikely(nf_flow_exceeds_mtu(skb, rt)))
-               return NF_ACCEPT;
-
-       if (skb_try_make_writable(skb, sizeof(*ip6h)))
-               return NF_DROP;
-
-       if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) &&
-           nf_flow_nat_ipv6(flow, skb, dir) < 0)
-               return NF_DROP;
-
-       flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
-       ip6h = ipv6_hdr(skb);
-       ip6h->hop_limit--;
-
-       skb->dev = outdev;
-       nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
-       neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
-
-       return NF_STOLEN;
-}
-EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook);
 
 static struct nf_flowtable_type flowtable_ipv6 = {
        .family         = NFPROTO_IPV6,
-       .params         = &nf_flow_offload_rhash_params,
-       .gc             = nf_flow_offload_work_gc,
+       .init           = nf_flow_table_init,
        .free           = nf_flow_table_free,
        .hook           = nf_flow_offload_ipv6_hook,
        .owner          = THIS_MODULE,
index 6b7f075f811f2b317f09f11998a80d87d0616e15..f1582b6f958844a157039fcfd1c14db7456d4948 100644 (file)
@@ -62,7 +62,7 @@ static void nf_nat_ipv6_decode_session(struct sk_buff *skb,
 #endif
 
 static bool nf_nat_ipv6_in_range(const struct nf_conntrack_tuple *t,
-                                const struct nf_nat_range *range)
+                                const struct nf_nat_range2 *range)
 {
        return ipv6_addr_cmp(&t->src.u3.in6, &range->min_addr.in6) >= 0 &&
               ipv6_addr_cmp(&t->src.u3.in6, &range->max_addr.in6) <= 0;
@@ -151,7 +151,7 @@ static void nf_nat_ipv6_csum_recalc(struct sk_buff *skb,
 
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
 static int nf_nat_ipv6_nlattr_to_range(struct nlattr *tb[],
-                                      struct nf_nat_range *range)
+                                      struct nf_nat_range2 *range)
 {
        if (tb[CTA_NAT_V6_MINIP]) {
                nla_memcpy(&range->min_addr.ip6, tb[CTA_NAT_V6_MINIP],
@@ -257,8 +257,7 @@ nf_nat_ipv6_fn(void *priv, struct sk_buff *skb,
               const struct nf_hook_state *state,
               unsigned int (*do_chain)(void *priv,
                                        struct sk_buff *skb,
-                                       const struct nf_hook_state *state,
-                                       struct nf_conn *ct))
+                                       const struct nf_hook_state *state))
 {
        struct nf_conn *ct;
        enum ip_conntrack_info ctinfo;
@@ -303,7 +302,7 @@ nf_nat_ipv6_fn(void *priv, struct sk_buff *skb,
                if (!nf_nat_initialized(ct, maniptype)) {
                        unsigned int ret;
 
-                       ret = do_chain(priv, skb, state, ct);
+                       ret = do_chain(priv, skb, state);
                        if (ret != NF_ACCEPT)
                                return ret;
 
@@ -343,8 +342,7 @@ nf_nat_ipv6_in(void *priv, struct sk_buff *skb,
               const struct nf_hook_state *state,
               unsigned int (*do_chain)(void *priv,
                                        struct sk_buff *skb,
-                                       const struct nf_hook_state *state,
-                                       struct nf_conn *ct))
+                                       const struct nf_hook_state *state))
 {
        unsigned int ret;
        struct in6_addr daddr = ipv6_hdr(skb)->daddr;
@@ -363,8 +361,7 @@ nf_nat_ipv6_out(void *priv, struct sk_buff *skb,
                const struct nf_hook_state *state,
                unsigned int (*do_chain)(void *priv,
                                         struct sk_buff *skb,
-                                        const struct nf_hook_state *state,
-                                        struct nf_conn *ct))
+                                        const struct nf_hook_state *state))
 {
 #ifdef CONFIG_XFRM
        const struct nf_conn *ct;
@@ -400,8 +397,7 @@ nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb,
                     const struct nf_hook_state *state,
                     unsigned int (*do_chain)(void *priv,
                                              struct sk_buff *skb,
-                                             const struct nf_hook_state *state,
-                                             struct nf_conn *ct))
+                                             const struct nf_hook_state *state))
 {
        const struct nf_conn *ct;
        enum ip_conntrack_info ctinfo;
index 98f61fcb91088cfebd1e77d998f3b445816e0ef2..9dfc2b90c3622ce2ff8ab0bef4b00c8faa96f164 100644 (file)
 static atomic_t v6_worker_count;
 
 unsigned int
-nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range *range,
+nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
                       const struct net_device *out)
 {
        enum ip_conntrack_info ctinfo;
        struct nf_conn_nat *nat;
        struct in6_addr src;
        struct nf_conn *ct;
-       struct nf_nat_range newrange;
+       struct nf_nat_range2 newrange;
 
        ct = nf_ct_get(skb, &ctinfo);
        WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
index 57593b00c5b4327164b79567be619f0f7561f5a2..d9bf42ba44fa134902332b974793740fb4dfb8da 100644 (file)
@@ -32,7 +32,7 @@ icmpv6_in_range(const struct nf_conntrack_tuple *tuple,
 static void
 icmpv6_unique_tuple(const struct nf_nat_l3proto *l3proto,
                    struct nf_conntrack_tuple *tuple,
-                   const struct nf_nat_range *range,
+                   const struct nf_nat_range2 *range,
                    enum nf_nat_manip_type maniptype,
                    const struct nf_conn *ct)
 {
index 3557b114446c93181c09c3384d1e28488b42225c..100a6bd1046ab6f9ccb389535fb2da72ec501ee2 100644 (file)
@@ -26,8 +26,7 @@
 
 static unsigned int nft_nat_do_chain(void *priv,
                                     struct sk_buff *skb,
-                                    const struct nf_hook_state *state,
-                                    struct nf_conn *ct)
+                                    const struct nf_hook_state *state)
 {
        struct nft_pktinfo pkt;
 
index 4146536e9c1517fc5e2e0ad066a8e87154446dda..dd0122f3cffea92f587f0c8a598281e77aa5c98b 100644 (file)
@@ -22,7 +22,7 @@ static void nft_masq_ipv6_eval(const struct nft_expr *expr,
                               const struct nft_pktinfo *pkt)
 {
        struct nft_masq *priv = nft_expr_priv(expr);
-       struct nf_nat_range range;
+       struct nf_nat_range2 range;
 
        memset(&range, 0, sizeof(range));
        range.flags = priv->flags;
index a27e424f690d699fafc5f2a7135637f36fb66388..74269865acc842892affd2067e170b00998ce17c 100644 (file)
@@ -22,7 +22,7 @@ static void nft_redir_ipv6_eval(const struct nft_expr *expr,
                                const struct nft_pktinfo *pkt)
 {
        struct nft_redir *priv = nft_expr_priv(expr);
-       struct nf_nat_range range;
+       struct nf_nat_range2 range;
 
        memset(&range, 0, sizeof(range));
        if (priv->sreg_proto_min) {
index ac3e51631c659b5c5c8a93c17011cb7f3ad266e2..cc24ed3bc3349d3116ae87b11adc7bb2ec80b3a9 100644 (file)
@@ -419,11 +419,11 @@ static bool rt6_check_expired(const struct rt6_info *rt)
        return false;
 }
 
-static struct fib6_info *rt6_multipath_select(const struct net *net,
-                                             struct fib6_info *match,
-                                            struct flowi6 *fl6, int oif,
-                                            const struct sk_buff *skb,
-                                            int strict)
+struct fib6_info *fib6_multipath_select(const struct net *net,
+                                       struct fib6_info *match,
+                                       struct flowi6 *fl6, int oif,
+                                       const struct sk_buff *skb,
+                                       int strict)
 {
        struct fib6_info *sibling, *next_sibling;
 
@@ -468,7 +468,7 @@ static inline struct fib6_info *rt6_device_match(struct net *net,
            !(rt->fib6_nh.nh_flags & RTNH_F_DEAD))
                return rt;
 
-       for (sprt = rt; sprt; sprt = rcu_dereference(sprt->rt6_next)) {
+       for (sprt = rt; sprt; sprt = rcu_dereference(sprt->fib6_next)) {
                const struct net_device *dev = sprt->fib6_nh.nh_dev;
 
                if (sprt->fib6_nh.nh_flags & RTNH_F_DEAD)
@@ -696,7 +696,7 @@ static struct fib6_info *find_rr_leaf(struct fib6_node *fn,
 
        match = NULL;
        cont = NULL;
-       for (rt = rr_head; rt; rt = rcu_dereference(rt->rt6_next)) {
+       for (rt = rr_head; rt; rt = rcu_dereference(rt->fib6_next)) {
                if (rt->fib6_metric != metric) {
                        cont = rt;
                        break;
@@ -706,7 +706,7 @@ static struct fib6_info *find_rr_leaf(struct fib6_node *fn,
        }
 
        for (rt = leaf; rt && rt != rr_head;
-            rt = rcu_dereference(rt->rt6_next)) {
+            rt = rcu_dereference(rt->fib6_next)) {
                if (rt->fib6_metric != metric) {
                        cont = rt;
                        break;
@@ -718,7 +718,7 @@ static struct fib6_info *find_rr_leaf(struct fib6_node *fn,
        if (match || !cont)
                return match;
 
-       for (rt = cont; rt; rt = rcu_dereference(rt->rt6_next))
+       for (rt = cont; rt; rt = rcu_dereference(rt->fib6_next))
                match = find_match(rt, oif, strict, &mpri, match, do_rr);
 
        return match;
@@ -756,7 +756,7 @@ static struct fib6_info *rt6_select(struct net *net, struct fib6_node *fn,
                             &do_rr);
 
        if (do_rr) {
-               struct fib6_info *next = rcu_dereference(rt0->rt6_next);
+               struct fib6_info *next = rcu_dereference(rt0->fib6_next);
 
                /* no entries matched; do round-robin */
                if (!next || next->fib6_metric != rt0->fib6_metric)
@@ -1006,7 +1006,7 @@ static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
                pn = rcu_dereference(fn->parent);
                sn = FIB6_SUBTREE(pn);
                if (sn && sn != fn)
-                       fn = fib6_lookup(sn, NULL, saddr);
+                       fn = fib6_node_lookup(sn, NULL, saddr);
                else
                        fn = pn;
                if (fn->fn_flags & RTN_RTINFO)
@@ -1059,7 +1059,7 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net,
                flags &= ~RT6_LOOKUP_F_IFACE;
 
        rcu_read_lock();
-       fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
+       fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
 restart:
        f6i = rcu_dereference(fn->leaf);
        if (!f6i) {
@@ -1068,8 +1068,9 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net,
                f6i = rt6_device_match(net, f6i, &fl6->saddr,
                                      fl6->flowi6_oif, flags);
                if (f6i->fib6_nsiblings && fl6->flowi6_oif == 0)
-                       f6i = rt6_multipath_select(net, f6i, fl6,
-                                                  fl6->flowi6_oif, skb, flags);
+                       f6i = fib6_multipath_select(net, f6i, fl6,
+                                                   fl6->flowi6_oif, skb,
+                                                   flags);
        }
        if (f6i == net->ipv6.fib6_null_entry) {
                fn = fib6_backtrack(fn, &fl6->saddr);
@@ -1077,6 +1078,8 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net,
                        goto restart;
        }
 
+       trace_fib6_table_lookup(net, f6i, table, fl6);
+
        /* Search through exception table */
        rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
        if (rt) {
@@ -1095,8 +1098,6 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net,
 
        rcu_read_unlock();
 
-       trace_fib6_table_lookup(net, rt, table, fl6);
-
        return rt;
 }
 
@@ -1546,8 +1547,7 @@ static int rt6_remove_exception_rt(struct rt6_info *rt)
        struct fib6_info *from;
        int err;
 
-       from = rcu_dereference_protected(rt->from,
-                                        lockdep_is_held(&rt6_exception_lock));
+       from = rcu_dereference(rt->from);
        if (!from ||
            !(rt->rt6i_flags & RTF_CACHE))
                return -EINVAL;
@@ -1800,23 +1800,14 @@ void rt6_age_exceptions(struct fib6_info *rt,
        rcu_read_unlock_bh();
 }
 
-struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
-                              int oif, struct flowi6 *fl6,
-                              const struct sk_buff *skb, int flags)
+/* must be called with rcu lock held */
+struct fib6_info *fib6_table_lookup(struct net *net, struct fib6_table *table,
+                                   int oif, struct flowi6 *fl6, int strict)
 {
        struct fib6_node *fn, *saved_fn;
        struct fib6_info *f6i;
-       struct rt6_info *rt;
-       int strict = 0;
-
-       strict |= flags & RT6_LOOKUP_F_IFACE;
-       strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
-       if (net->ipv6.devconf_all->forwarding == 0)
-               strict |= RT6_LOOKUP_F_REACHABLE;
-
-       rcu_read_lock();
 
-       fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
+       fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
        saved_fn = fn;
 
        if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
@@ -1824,8 +1815,6 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
 
 redo_rt6_select:
        f6i = rt6_select(net, fn, oif, strict);
-       if (f6i->fib6_nsiblings)
-               f6i = rt6_multipath_select(net, f6i, fl6, oif, skb, strict);
        if (f6i == net->ipv6.fib6_null_entry) {
                fn = fib6_backtrack(fn, &fl6->saddr);
                if (fn)
@@ -1838,11 +1827,34 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
                }
        }
 
+       trace_fib6_table_lookup(net, f6i, table, fl6);
+
+       return f6i;
+}
+
+struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
+                              int oif, struct flowi6 *fl6,
+                              const struct sk_buff *skb, int flags)
+{
+       struct fib6_info *f6i;
+       struct rt6_info *rt;
+       int strict = 0;
+
+       strict |= flags & RT6_LOOKUP_F_IFACE;
+       strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
+       if (net->ipv6.devconf_all->forwarding == 0)
+               strict |= RT6_LOOKUP_F_REACHABLE;
+
+       rcu_read_lock();
+
+       f6i = fib6_table_lookup(net, table, oif, fl6, strict);
+       if (f6i->fib6_nsiblings)
+               f6i = fib6_multipath_select(net, f6i, fl6, oif, skb, strict);
+
        if (f6i == net->ipv6.fib6_null_entry) {
                rt = net->ipv6.ip6_null_entry;
                rcu_read_unlock();
                dst_hold(&rt->dst);
-               trace_fib6_table_lookup(net, rt, table, fl6);
                return rt;
        }
 
@@ -1853,7 +1865,6 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
                        dst_use_noref(&rt->dst, jiffies);
 
                rcu_read_unlock();
-               trace_fib6_table_lookup(net, rt, table, fl6);
                return rt;
        } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
                            !(f6i->fib6_flags & RTF_GATEWAY))) {
@@ -1879,9 +1890,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
                        dst_hold(&uncached_rt->dst);
                }
 
-               trace_fib6_table_lookup(net, uncached_rt, table, fl6);
                return uncached_rt;
-
        } else {
                /* Get a percpu copy */
 
@@ -1895,7 +1904,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
 
                local_bh_enable();
                rcu_read_unlock();
-               trace_fib6_table_lookup(net, pcpu_rt, table, fl6);
+
                return pcpu_rt;
        }
 }
@@ -1933,11 +1942,16 @@ static void ip6_multipath_l3_keys(const struct sk_buff *skb,
        const struct ipv6hdr *inner_iph;
        const struct icmp6hdr *icmph;
        struct ipv6hdr _inner_iph;
+       struct icmp6hdr _icmph;
 
        if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
                goto out;
 
-       icmph = icmp6_hdr(skb);
+       icmph = skb_header_pointer(skb, skb_transport_offset(skb),
+                                  sizeof(_icmph), &_icmph);
+       if (!icmph)
+               goto out;
+
        if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
            icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
            icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
@@ -2421,7 +2435,7 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
         */
 
        rcu_read_lock();
-       fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
+       fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
 restart:
        for_each_fib6_node_rt_rcu(fn) {
                if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
@@ -2475,7 +2489,7 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
 
        rcu_read_unlock();
 
-       trace_fib6_table_lookup(net, ret, table, fl6);
+       trace_fib6_table_lookup(net, rt, table, fl6);
        return ret;
 };
 
@@ -3220,8 +3234,10 @@ static int ip6_route_del(struct fib6_config *cfg,
                                                              &cfg->fc_src);
                                if (rt_cache) {
                                        rc = ip6_del_cached_rt(rt_cache, cfg);
-                                       if (rc != -ESRCH)
+                                       if (rc != -ESRCH) {
+                                               rcu_read_unlock();
                                                return rc;
+                                       }
                                }
                                continue;
                        }
@@ -3777,7 +3793,7 @@ static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt)
                if (iter->fib6_metric == rt->fib6_metric &&
                    rt6_qualify_for_ecmp(iter))
                        return iter;
-               iter = rcu_dereference_protected(iter->rt6_next,
+               iter = rcu_dereference_protected(iter->fib6_next,
                                lockdep_is_held(&rt->fib6_table->tb6_lock));
        }
 
@@ -4054,6 +4070,7 @@ void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
 
 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
        [RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
+       [RTA_PREFSRC]           = { .len = sizeof(struct in6_addr) },
        [RTA_OIF]               = { .type = NLA_U32 },
        [RTA_IIF]               = { .type = NLA_U32 },
        [RTA_PRIORITY]          = { .type = NLA_U32 },
@@ -4065,6 +4082,7 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
        [RTA_EXPIRES]           = { .type = NLA_U32 },
        [RTA_UID]               = { .type = NLA_U32 },
        [RTA_MARK]              = { .type = NLA_U32 },
+       [RTA_TABLE]             = { .type = NLA_U32 },
 };
 
 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
index f343e6f0fc95a89fa5f386430dc161600367338c..eab39bd91548882abe19b16dff3c686409b5e856 100644 (file)
@@ -91,6 +91,24 @@ static void set_tun_src(struct net *net, struct net_device *dev,
        rcu_read_unlock();
 }
 
+/* Compute flowlabel for outer IPv6 header */
+static __be32 seg6_make_flowlabel(struct net *net, struct sk_buff *skb,
+                                 struct ipv6hdr *inner_hdr)
+{
+       int do_flowlabel = net->ipv6.sysctl.seg6_flowlabel;
+       __be32 flowlabel = 0;
+       u32 hash;
+
+       if (do_flowlabel > 0) {
+               hash = skb_get_hash(skb);
+               rol32(hash, 16);
+               flowlabel = (__force __be32)hash & IPV6_FLOWLABEL_MASK;
+       } else if (!do_flowlabel && skb->protocol == htons(ETH_P_IPV6)) {
+               flowlabel = ip6_flowlabel(inner_hdr);
+       }
+       return flowlabel;
+}
+
 /* encapsulate an IPv6 packet within an outer IPv6 header with a given SRH */
 int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
 {
@@ -99,6 +117,7 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
        struct ipv6hdr *hdr, *inner_hdr;
        struct ipv6_sr_hdr *isrh;
        int hdrlen, tot_len, err;
+       __be32 flowlabel;
 
        hdrlen = (osrh->hdrlen + 1) << 3;
        tot_len = hdrlen + sizeof(*hdr);
@@ -108,6 +127,7 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
                return err;
 
        inner_hdr = ipv6_hdr(skb);
+       flowlabel = seg6_make_flowlabel(net, skb, inner_hdr);
 
        skb_push(skb, tot_len);
        skb_reset_network_header(skb);
@@ -121,10 +141,10 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
 
        if (skb->protocol == htons(ETH_P_IPV6)) {
                ip6_flow_hdr(hdr, ip6_tclass(ip6_flowinfo(inner_hdr)),
-                            ip6_flowlabel(inner_hdr));
+                            flowlabel);
                hdr->hop_limit = inner_hdr->hop_limit;
        } else {
-               ip6_flow_hdr(hdr, 0, 0);
+               ip6_flow_hdr(hdr, 0, flowlabel);
                hdr->hop_limit = ip6_dst_hoplimit(skb_dst(skb));
        }
 
@@ -136,7 +156,7 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
        isrh->nexthdr = proto;
 
        hdr->daddr = isrh->segments[isrh->first_segment];
-       set_tun_src(net, ip6_dst_idev(dst)->dev, &hdr->daddr, &hdr->saddr);
+       set_tun_src(net, dst->dev, &hdr->daddr, &hdr->saddr);
 
 #ifdef CONFIG_IPV6_SEG6_HMAC
        if (sr_has_hmac(isrh)) {
index 6fbdef63015299380b3fbfd606d42176adad353d..e15cd37024fd9786bc675754514f03f5a8c919c2 100644 (file)
@@ -152,6 +152,13 @@ static struct ctl_table ipv6_table_template[] = {
                .extra1         = &zero,
                .extra2         = &one,
        },
+       {
+               .procname       = "seg6_flowlabel",
+               .data           = &init_net.ipv6.sysctl.seg6_flowlabel,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec
+       },
        { }
 };
 
@@ -217,6 +224,7 @@ static int __net_init ipv6_sysctl_net_init(struct net *net)
        ipv6_table[12].data = &net->ipv6.sysctl.max_dst_opts_len;
        ipv6_table[13].data = &net->ipv6.sysctl.max_hbh_opts_len;
        ipv6_table[14].data = &net->ipv6.sysctl.multipath_hash_policy,
+       ipv6_table[15].data = &net->ipv6.sysctl.seg6_flowlabel;
 
        ipv6_route_table = ipv6_route_sysctl_init(net);
        if (!ipv6_route_table)
index 6d664d83cd16a26cd96d65e2894468eb4ea38661..7d47c2b550a94f94f6647ed42f0ba05cc67d699c 100644 (file)
@@ -803,6 +803,7 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
        unsigned int tot_len = sizeof(struct tcphdr);
        struct dst_entry *dst;
        __be32 *topt;
+       __u32 mark = 0;
 
        if (tsecr)
                tot_len += TCPOLEN_TSTAMP_ALIGNED;
@@ -871,7 +872,10 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
                fl6.flowi6_oif = oif;
        }
 
-       fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark);
+       if (sk)
+               mark = (sk->sk_state == TCP_TIME_WAIT) ?
+                       inet_twsk(sk)->tw_mark : sk->sk_mark;
+       fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
        fl6.fl6_dport = t1->dest;
        fl6.fl6_sport = t1->source;
        fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
index 4ec76a87aeb869050022d71c8095d840526cef70..2839c1bd1e584f8fbc3cb7f999f741f3f41b4051 100644 (file)
@@ -148,9 +148,9 @@ static int compute_score(struct sock *sk, struct net *net,
                bool dev_match = (sk->sk_bound_dev_if == dif ||
                                  sk->sk_bound_dev_if == sdif);
 
-               if (exact_dif && !dev_match)
+               if (!dev_match)
                        return -1;
-               if (sk->sk_bound_dev_if && dev_match)
+               if (sk->sk_bound_dev_if)
                        score++;
        }
 
@@ -546,10 +546,10 @@ static __inline__ void udpv6_err(struct sk_buff *skb,
        __udp6_lib_err(skb, opt, type, code, offset, info, &udp_table);
 }
 
-static struct static_key udpv6_encap_needed __read_mostly;
+static DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key);
 void udpv6_encap_enable(void)
 {
-       static_key_enable(&udpv6_encap_needed);
+       static_branch_enable(&udpv6_encap_needed_key);
 }
 EXPORT_SYMBOL(udpv6_encap_enable);
 
@@ -561,7 +561,7 @@ static int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
        if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
                goto drop;
 
-       if (static_key_false(&udpv6_encap_needed) && up->encap_type) {
+       if (static_branch_unlikely(&udpv6_encap_needed_key) && up->encap_type) {
                int (*encap_rcv)(struct sock *sk, struct sk_buff *skb);
 
                /*
@@ -1023,7 +1023,8 @@ static void udp6_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb,
  *     Sending
  */
 
-static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6)
+static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6,
+                          struct inet_cork *cork)
 {
        struct sock *sk = skb->sk;
        struct udphdr *uh;
@@ -1042,12 +1043,31 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6)
        uh->len = htons(len);
        uh->check = 0;
 
+       if (cork->gso_size) {
+               const int hlen = skb_network_header_len(skb) +
+                                sizeof(struct udphdr);
+
+               if (hlen + cork->gso_size > cork->fragsize)
+                       return -EINVAL;
+               if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS)
+                       return -EINVAL;
+               if (udp_sk(sk)->no_check6_tx)
+                       return -EINVAL;
+               if (skb->ip_summed != CHECKSUM_PARTIAL || is_udplite)
+                       return -EIO;
+
+               skb_shinfo(skb)->gso_size = cork->gso_size;
+               skb_shinfo(skb)->gso_type = SKB_GSO_UDP_L4;
+               goto csum_partial;
+       }
+
        if (is_udplite)
                csum = udplite_csum(skb);
        else if (udp_sk(sk)->no_check6_tx) {   /* UDP csum disabled */
                skb->ip_summed = CHECKSUM_NONE;
                goto send;
        } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */
+csum_partial:
                udp6_hwcsum_outgoing(sk, skb, &fl6->saddr, &fl6->daddr, len);
                goto send;
        } else
@@ -1093,7 +1113,7 @@ static int udp_v6_push_pending_frames(struct sock *sk)
        if (!skb)
                goto out;
 
-       err = udp_v6_send_skb(skb, &fl6);
+       err = udp_v6_send_skb(skb, &fl6, &inet_sk(sk)->cork.base);
 
 out:
        up->len = 0;
@@ -1127,6 +1147,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
        ipc6.hlimit = -1;
        ipc6.tclass = -1;
        ipc6.dontfrag = -1;
+       ipc6.gso_size = up->gso_size;
        sockc.tsflags = sk->sk_tsflags;
 
        /* destination address check */
@@ -1259,7 +1280,10 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
                opt->tot_len = sizeof(*opt);
                ipc6.opt = opt;
 
-               err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, &ipc6, &sockc);
+               err = udp_cmsg_send(sk, msg, &ipc6.gso_size);
+               if (err > 0)
+                       err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6,
+                                                   &ipc6, &sockc);
                if (err < 0) {
                        fl6_sock_release(flowlabel);
                        return err;
@@ -1324,15 +1348,16 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 
        /* Lockless fast path for the non-corking case */
        if (!corkreq) {
+               struct inet_cork_full cork;
                struct sk_buff *skb;
 
                skb = ip6_make_skb(sk, getfrag, msg, ulen,
                                   sizeof(struct udphdr), &ipc6,
                                   &fl6, (struct rt6_info *)dst,
-                                  msg->msg_flags, &sockc);
+                                  msg->msg_flags, &cork, &sockc);
                err = PTR_ERR(skb);
                if (!IS_ERR_OR_NULL(skb))
-                       err = udp_v6_send_skb(skb, &fl6);
+                       err = udp_v6_send_skb(skb, &fl6, &cork.base);
                goto out;
        }
 
@@ -1402,7 +1427,7 @@ void udpv6_destroy_sock(struct sock *sk)
        udp_v6_flush_pending_frames(sk);
        release_sock(sk);
 
-       if (static_key_false(&udpv6_encap_needed) && up->encap_type) {
+       if (static_branch_unlikely(&udpv6_encap_needed_key) && up->encap_type) {
                void (*encap_destroy)(struct sock *sk);
                encap_destroy = READ_ONCE(up->encap_destroy);
                if (encap_destroy)
index 2a04dc9c781b5f236fbe6dc3804f5eeed174ee44..03a2ff3fe1e697e752e2aa9f13703b6feaff0453 100644 (file)
@@ -42,12 +42,15 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
                const struct ipv6hdr *ipv6h;
                struct udphdr *uh;
 
-               if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP))
+               if (!(skb_shinfo(skb)->gso_type & (SKB_GSO_UDP | SKB_GSO_UDP_L4)))
                        goto out;
 
                if (!pskb_may_pull(skb, sizeof(struct udphdr)))
                        goto out;
 
+               if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4)
+                       return __udp_gso_segment(skb, features);
+
                /* Do software UFO. Complete and fill in the UDP checksum as HW cannot
                 * do checksum of UDP packets sent as multiple IP fragments.
                 */
index 16f434791763f01de79cc0bf1d74715384034392..5bdca3d5d6b73b7a9ddd8b155cabaf832d3da30c 100644 (file)
@@ -60,11 +60,9 @@ xfrm6_init_temprop(struct xfrm_state *x, const struct xfrm_tmpl *tmpl,
 static int
 __xfrm6_sort(void **dst, void **src, int n, int (*cmp)(void *p), int maxclass)
 {
-       int i;
+       int count[XFRM_MAX_DEPTH] = { };
        int class[XFRM_MAX_DEPTH];
-       int count[maxclass];
-
-       memset(count, 0, sizeof(count));
+       int i;
 
        for (i = 0; i < n; i++) {
                int c;
index f85f0d7480acf48074a7d53557c3c50ca59973cf..4a46df8441c9fabd96c1a10e1b74e8821760c6e4 100644 (file)
@@ -341,6 +341,9 @@ static void __net_exit xfrm6_tunnel_net_exit(struct net *net)
        struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net);
        unsigned int i;
 
+       xfrm_state_flush(net, IPSEC_PROTO_ANY, false);
+       xfrm_flush_gc();
+
        for (i = 0; i < XFRM6_TUNNEL_SPI_BYADDR_HSIZE; i++)
                WARN_ON_ONCE(!hlist_empty(&xfrm6_tn->spi_byaddr[i]));
 
index 7e2e7188e7f4a28aa45c26848364ab0c297161a2..e62e52e8f1415f711133a97c357ae3da570ab781 100644 (file)
@@ -437,6 +437,24 @@ static int verify_address_len(const void *p)
        return 0;
 }
 
+static inline int sadb_key_len(const struct sadb_key *key)
+{
+       int key_bytes = DIV_ROUND_UP(key->sadb_key_bits, 8);
+
+       return DIV_ROUND_UP(sizeof(struct sadb_key) + key_bytes,
+                           sizeof(uint64_t));
+}
+
+static int verify_key_len(const void *p)
+{
+       const struct sadb_key *key = p;
+
+       if (sadb_key_len(key) > key->sadb_key_len)
+               return -EINVAL;
+
+       return 0;
+}
+
 static inline int pfkey_sec_ctx_len(const struct sadb_x_sec_ctx *sec_ctx)
 {
        return DIV_ROUND_UP(sizeof(struct sadb_x_sec_ctx) +
@@ -533,16 +551,25 @@ static int parse_exthdrs(struct sk_buff *skb, const struct sadb_msg *hdr, void *
                                return -EINVAL;
                        if (ext_hdrs[ext_type-1] != NULL)
                                return -EINVAL;
-                       if (ext_type == SADB_EXT_ADDRESS_SRC ||
-                           ext_type == SADB_EXT_ADDRESS_DST ||
-                           ext_type == SADB_EXT_ADDRESS_PROXY ||
-                           ext_type == SADB_X_EXT_NAT_T_OA) {
+                       switch (ext_type) {
+                       case SADB_EXT_ADDRESS_SRC:
+                       case SADB_EXT_ADDRESS_DST:
+                       case SADB_EXT_ADDRESS_PROXY:
+                       case SADB_X_EXT_NAT_T_OA:
                                if (verify_address_len(p))
                                        return -EINVAL;
-                       }
-                       if (ext_type == SADB_X_EXT_SEC_CTX) {
+                               break;
+                       case SADB_X_EXT_SEC_CTX:
                                if (verify_sec_ctx_len(p))
                                        return -EINVAL;
+                               break;
+                       case SADB_EXT_KEY_AUTH:
+                       case SADB_EXT_KEY_ENCRYPT:
+                               if (verify_key_len(p))
+                                       return -EINVAL;
+                               break;
+                       default:
+                               break;
                        }
                        ext_hdrs[ext_type-1] = (void *) p;
                }
@@ -1104,14 +1131,12 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
        key = ext_hdrs[SADB_EXT_KEY_AUTH - 1];
        if (key != NULL &&
            sa->sadb_sa_auth != SADB_X_AALG_NULL &&
-           ((key->sadb_key_bits+7) / 8 == 0 ||
-            (key->sadb_key_bits+7) / 8 > key->sadb_key_len * sizeof(uint64_t)))
+           key->sadb_key_bits == 0)
                return ERR_PTR(-EINVAL);
        key = ext_hdrs[SADB_EXT_KEY_ENCRYPT-1];
        if (key != NULL &&
            sa->sadb_sa_encrypt != SADB_EALG_NULL &&
-           ((key->sadb_key_bits+7) / 8 == 0 ||
-            (key->sadb_key_bits+7) / 8 > key->sadb_key_len * sizeof(uint64_t)))
+           key->sadb_key_bits == 0)
                return ERR_PTR(-EINVAL);
 
        x = xfrm_state_alloc(net);
index b8f9d45bfeb14c18824c3e1deff2375feeea2567..e87686f7d63ca7d98f9bcb60e72d36a684324d21 100644 (file)
@@ -57,6 +57,10 @@ static void l2tp_dfs_next_tunnel(struct l2tp_dfs_seq_data *pd)
 
 static void l2tp_dfs_next_session(struct l2tp_dfs_seq_data *pd)
 {
+       /* Drop reference taken during previous invocation */
+       if (pd->session)
+               l2tp_session_dec_refcount(pd->session);
+
        pd->session = l2tp_session_get_nth(pd->tunnel, pd->session_idx);
        pd->session_idx++;
 
@@ -105,9 +109,17 @@ static void l2tp_dfs_seq_stop(struct seq_file *p, void *v)
        if (!pd || pd == SEQ_START_TOKEN)
                return;
 
-       /* Drop reference taken by last invocation of l2tp_dfs_next_tunnel() */
-       if (pd->tunnel)
+       /* Drop reference taken by last invocation of l2tp_dfs_next_session()
+        * or l2tp_dfs_next_tunnel().
+        */
+       if (pd->session) {
+               l2tp_session_dec_refcount(pd->session);
+               pd->session = NULL;
+       }
+       if (pd->tunnel) {
                l2tp_tunnel_dec_refcount(pd->tunnel);
+               pd->tunnel = NULL;
+       }
 }
 
 static void l2tp_dfs_seq_tunnel_show(struct seq_file *m, void *v)
@@ -247,13 +259,10 @@ static int l2tp_dfs_seq_show(struct seq_file *m, void *v)
                goto out;
        }
 
-       /* Show the tunnel or session context */
-       if (!pd->session) {
+       if (!pd->session)
                l2tp_dfs_seq_tunnel_show(m, pd->tunnel);
-       } else {
+       else
                l2tp_dfs_seq_session_show(m, pd->session);
-               l2tp_session_dec_refcount(pd->session);
-       }
 
 out:
        return 0;
index 7d0c963680e64f6b0ec1fdac53e44260f0264996..f951c768dcf2491455ffa8ce406b9c4834d69c31 100644 (file)
@@ -619,6 +619,13 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
        lock_sock(sk);
 
        error = -EINVAL;
+
+       if (sockaddr_len != sizeof(struct sockaddr_pppol2tp) &&
+           sockaddr_len != sizeof(struct sockaddr_pppol2tpv3) &&
+           sockaddr_len != sizeof(struct sockaddr_pppol2tpin6) &&
+           sockaddr_len != sizeof(struct sockaddr_pppol2tpv3in6))
+               goto end;
+
        if (sp->sa_protocol != PX_PROTO_OL2TP)
                goto end;
 
@@ -1569,6 +1576,10 @@ static void pppol2tp_next_tunnel(struct net *net, struct pppol2tp_seq_data *pd)
 
 static void pppol2tp_next_session(struct net *net, struct pppol2tp_seq_data *pd)
 {
+       /* Drop reference taken during previous invocation */
+       if (pd->session)
+               l2tp_session_dec_refcount(pd->session);
+
        pd->session = l2tp_session_get_nth(pd->tunnel, pd->session_idx);
        pd->session_idx++;
 
@@ -1617,9 +1628,17 @@ static void pppol2tp_seq_stop(struct seq_file *p, void *v)
        if (!pd || pd == SEQ_START_TOKEN)
                return;
 
-       /* Drop reference taken by last invocation of pppol2tp_next_tunnel() */
-       if (pd->tunnel)
+       /* Drop reference taken by last invocation of pppol2tp_next_session()
+        * or pppol2tp_next_tunnel().
+        */
+       if (pd->session) {
+               l2tp_session_dec_refcount(pd->session);
+               pd->session = NULL;
+       }
+       if (pd->tunnel) {
                l2tp_tunnel_dec_refcount(pd->tunnel);
+               pd->tunnel = NULL;
+       }
 }
 
 static void pppol2tp_seq_tunnel_show(struct seq_file *m, void *v)
@@ -1713,14 +1732,10 @@ static int pppol2tp_seq_show(struct seq_file *m, void *v)
                goto out;
        }
 
-       /* Show the tunnel or session context.
-        */
-       if (!pd->session) {
+       if (!pd->session)
                pppol2tp_seq_tunnel_show(m, pd->tunnel);
-       } else {
+       else
                pppol2tp_seq_session_show(m, pd->session);
-               l2tp_session_dec_refcount(pd->session);
-       }
 
 out:
        return 0;
index 6d29b2b94e8482c507a619eddde29230ea83c3a4..1beeea9549fa6ec1f7b0e5f9af8ff3250a316f59 100644 (file)
@@ -189,7 +189,6 @@ static int llc_ui_release(struct socket *sock)
 {
        struct sock *sk = sock->sk;
        struct llc_sock *llc;
-       struct llc_sap *sap;
 
        if (unlikely(sk == NULL))
                goto out;
@@ -200,15 +199,19 @@ static int llc_ui_release(struct socket *sock)
                llc->laddr.lsap, llc->daddr.lsap);
        if (!llc_send_disc(sk))
                llc_ui_wait_for_disc(sk, sk->sk_rcvtimeo);
-       sap = llc->sap;
-       /* Hold this for release_sock(), so that llc_backlog_rcv() could still
-        * use it.
-        */
-       llc_sap_hold(sap);
-       if (!sock_flag(sk, SOCK_ZAPPED))
+       if (!sock_flag(sk, SOCK_ZAPPED)) {
+               struct llc_sap *sap = llc->sap;
+
+               /* Hold this for release_sock(), so that llc_backlog_rcv()
+                * could still use it.
+                */
+               llc_sap_hold(sap);
                llc_sap_remove_socket(llc->sap, sk);
-       release_sock(sk);
-       llc_sap_put(sap);
+               release_sock(sk);
+               llc_sap_put(sap);
+       } else {
+               release_sock(sk);
+       }
        if (llc->dev)
                dev_put(llc->dev);
        sock_put(sk);
@@ -927,6 +930,9 @@ static int llc_ui_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
        if (size > llc->dev->mtu)
                size = llc->dev->mtu;
        copied = size - hdrlen;
+       rc = -EINVAL;
+       if (copied < 0)
+               goto release;
        release_sock(sk);
        skb = sock_alloc_send_skb(sk, size, noblock, &rc);
        lock_sock(sk);
index 163121192acae1c88f6a81d7a594c189da3678da..4d78375f9872d1a6962aa0e7cda7b913a6e0a7f3 100644 (file)
@@ -1099,14 +1099,7 @@ int llc_conn_ac_inc_tx_win_size(struct sock *sk, struct sk_buff *skb)
 
 int llc_conn_ac_stop_all_timers(struct sock *sk, struct sk_buff *skb)
 {
-       struct llc_sock *llc = llc_sk(sk);
-
-       del_timer(&llc->pf_cycle_timer.timer);
-       del_timer(&llc->ack_timer.timer);
-       del_timer(&llc->rej_sent_timer.timer);
-       del_timer(&llc->busy_state_timer.timer);
-       llc->ack_must_be_send = 0;
-       llc->ack_pf = 0;
+       llc_sk_stop_all_timers(sk, false);
        return 0;
 }
 
index 110e32bcb39976c9e319329f6ca9ba5809614aa8..c0ac522b48a1404c6b06c1ac1d85dae463316768 100644 (file)
@@ -961,6 +961,26 @@ struct sock *llc_sk_alloc(struct net *net, int family, gfp_t priority, struct pr
        return sk;
 }
 
+void llc_sk_stop_all_timers(struct sock *sk, bool sync)
+{
+       struct llc_sock *llc = llc_sk(sk);
+
+       if (sync) {
+               del_timer_sync(&llc->pf_cycle_timer.timer);
+               del_timer_sync(&llc->ack_timer.timer);
+               del_timer_sync(&llc->rej_sent_timer.timer);
+               del_timer_sync(&llc->busy_state_timer.timer);
+       } else {
+               del_timer(&llc->pf_cycle_timer.timer);
+               del_timer(&llc->ack_timer.timer);
+               del_timer(&llc->rej_sent_timer.timer);
+               del_timer(&llc->busy_state_timer.timer);
+       }
+
+       llc->ack_must_be_send = 0;
+       llc->ack_pf = 0;
+}
+
 /**
  *     llc_sk_free - Frees a LLC socket
  *     @sk - socket to free
@@ -973,7 +993,7 @@ void llc_sk_free(struct sock *sk)
 
        llc->state = LLC_CONN_OUT_OF_SVC;
        /* Stop all (possibly) running timers */
-       llc_conn_ac_stop_all_timers(sk, NULL);
+       llc_sk_stop_all_timers(sk, true);
 #ifdef DEBUG_LLC_CONN_ALLOC
        printk(KERN_INFO "%s: unackq=%d, txq=%d\n", __func__,
                skb_queue_len(&llc->pdu_unack_q),
index 595c662a61e87bd7621c9336df8a81b5080c1d1f..ac4295296514365ad1972ddc22754be1cdb8384a 100644 (file)
@@ -8,6 +8,7 @@
  * Copyright 2007, Michael Wu <flamingice@sourmilk.net>
  * Copyright 2007-2010, Intel Corporation
  * Copyright(c) 2015-2017 Intel Deutschland GmbH
+ * Copyright (C) 2018 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -970,6 +971,9 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local,
 
                sta->ampdu_mlme.addba_req_num[tid] = 0;
 
+               tid_tx->timeout =
+                       le16_to_cpu(mgmt->u.action.u.addba_resp.timeout);
+
                if (tid_tx->timeout) {
                        mod_timer(&tid_tx->session_timer,
                                  TU_TO_EXP_TIME(tid_tx->timeout));
index 69449db7e283316197a4d900f6cd9159e3185ea5..233068756502b733520190835bff56b5f869be10 100644 (file)
@@ -36,6 +36,7 @@
 #define IEEE80211_AUTH_TIMEOUT         (HZ / 5)
 #define IEEE80211_AUTH_TIMEOUT_LONG    (HZ / 2)
 #define IEEE80211_AUTH_TIMEOUT_SHORT   (HZ / 10)
+#define IEEE80211_AUTH_TIMEOUT_SAE     (HZ * 2)
 #define IEEE80211_AUTH_MAX_TRIES       3
 #define IEEE80211_AUTH_WAIT_ASSOC      (HZ * 5)
 #define IEEE80211_ASSOC_TIMEOUT                (HZ / 5)
@@ -1787,7 +1788,7 @@ static bool ieee80211_sta_wmm_params(struct ieee80211_local *local,
                params[ac].acm = acm;
                params[ac].uapsd = uapsd;
 
-               if (params->cw_min == 0 ||
+               if (params[ac].cw_min == 0 ||
                    params[ac].cw_min > params[ac].cw_max) {
                        sdata_info(sdata,
                                   "AP has invalid WMM params (CWmin/max=%d/%d for ACI %d), using defaults\n",
@@ -3814,16 +3815,19 @@ static int ieee80211_auth(struct ieee80211_sub_if_data *sdata)
                            tx_flags);
 
        if (tx_flags == 0) {
-               auth_data->timeout = jiffies + IEEE80211_AUTH_TIMEOUT;
-               auth_data->timeout_started = true;
-               run_again(sdata, auth_data->timeout);
+               if (auth_data->algorithm == WLAN_AUTH_SAE)
+                       auth_data->timeout = jiffies +
+                               IEEE80211_AUTH_TIMEOUT_SAE;
+               else
+                       auth_data->timeout = jiffies + IEEE80211_AUTH_TIMEOUT;
        } else {
                auth_data->timeout =
                        round_jiffies_up(jiffies + IEEE80211_AUTH_TIMEOUT_LONG);
-               auth_data->timeout_started = true;
-               run_again(sdata, auth_data->timeout);
        }
 
+       auth_data->timeout_started = true;
+       run_again(sdata, auth_data->timeout);
+
        return 0;
 }
 
@@ -3894,8 +3898,15 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
                ifmgd->status_received = false;
                if (ifmgd->auth_data && ieee80211_is_auth(fc)) {
                        if (status_acked) {
-                               ifmgd->auth_data->timeout =
-                                       jiffies + IEEE80211_AUTH_TIMEOUT_SHORT;
+                               if (ifmgd->auth_data->algorithm ==
+                                   WLAN_AUTH_SAE)
+                                       ifmgd->auth_data->timeout =
+                                               jiffies +
+                                               IEEE80211_AUTH_TIMEOUT_SAE;
+                               else
+                                       ifmgd->auth_data->timeout =
+                                               jiffies +
+                                               IEEE80211_AUTH_TIMEOUT_SHORT;
                                run_again(sdata, ifmgd->auth_data->timeout);
                        } else {
                                ifmgd->auth_data->timeout = jiffies - 1;
index 535de3161a781f90ea9a91e97fdc4aa46db0eac6..05a265cd573d4b5d9c5263dd4f8659ad83d327fb 100644 (file)
@@ -4,6 +4,7 @@
  * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
  * Copyright 2007      Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
+ * Copyright (C) 2018 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -1135,7 +1136,7 @@ static bool ieee80211_tx_prep_agg(struct ieee80211_tx_data *tx,
        }
 
        /* reset session timer */
-       if (reset_agg_timer && tid_tx->timeout)
+       if (reset_agg_timer)
                tid_tx->last_tx = jiffies;
 
        return queued;
index ce9497966ebe2eda3f046698bc2c3716a94e1eb6..a6b7c7d5c829694a2c253eaee55e4e006c948f40 100644 (file)
@@ -347,7 +347,7 @@ static int ncsi_rsp_handler_svf(struct ncsi_request *nr)
 
        cmd = (struct ncsi_cmd_svf_pkt *)skb_network_header(nr->cmd);
        ncf = &nc->vlan_filter;
-       if (cmd->index > ncf->n_vids)
+       if (cmd->index == 0 || cmd->index > ncf->n_vids)
                return -ERANGE;
 
        /* Add or remove the VLAN filter. Remember HW indexes from 1 */
@@ -445,7 +445,8 @@ static int ncsi_rsp_handler_sma(struct ncsi_request *nr)
        ncf = &nc->mac_filter;
        bitmap = &ncf->bitmap;
 
-       if (cmd->index > ncf->n_uc + ncf->n_mc + ncf->n_mixed)
+       if (cmd->index == 0 ||
+           cmd->index > ncf->n_uc + ncf->n_mc + ncf->n_mixed)
                return -ERANGE;
 
        index = (cmd->index - 1) * ETH_ALEN;
index 704b3832dbad33688d8298518ec34c19cb57d847..e57c9d4795035410bb904797f53471d3e0c3c05b 100644 (file)
@@ -444,6 +444,9 @@ config NETFILTER_SYNPROXY
 
 endif # NF_CONNTRACK
 
+config NF_OSF
+       tristate 'Passive OS fingerprint infrastructure'
+
 config NF_TABLES
        select NETFILTER_NETLINK
        tristate "Netfilter nf_tables support"
@@ -474,24 +477,6 @@ config NF_TABLES_NETDEV
        help
          This option enables support for the "netdev" table.
 
-config NFT_EXTHDR
-       tristate "Netfilter nf_tables exthdr module"
-       help
-         This option adds the "exthdr" expression that you can use to match
-         IPv6 extension headers and tcp options.
-
-config NFT_META
-       tristate "Netfilter nf_tables meta module"
-       help
-         This option adds the "meta" expression that you can use to match and
-         to set packet metainformation such as the packet mark.
-
-config NFT_RT
-       tristate "Netfilter nf_tables routing module"
-       help
-         This option adds the "rt" expression that you can use to match
-         packet routing information such as the packet nexthop.
-
 config NFT_NUMGEN
        tristate "Netfilter nf_tables number generator module"
        help
@@ -594,6 +579,7 @@ config NFT_QUOTA
 config NFT_REJECT
        default m if NETFILTER_ADVANCED=n
        tristate "Netfilter nf_tables reject support"
+       depends on !NF_TABLES_INET || (IPV6!=m || m)
        help
          This option adds the "reject" expression that you can use to
          explicitly deny and notify via TCP reset/ICMP informational errors
@@ -666,8 +652,7 @@ endif # NF_TABLES
 
 config NF_FLOW_TABLE_INET
        tristate "Netfilter flow table mixed IPv4/IPv6 module"
-       depends on NF_FLOW_TABLE_IPV4
-       depends on NF_FLOW_TABLE_IPV6
+       depends on NF_FLOW_TABLE
        help
           This option adds the flow table mixed IPv4/IPv6 support.
 
@@ -1377,6 +1362,7 @@ config NETFILTER_XT_MATCH_NFACCT
 config NETFILTER_XT_MATCH_OSF
        tristate '"osf" Passive OS fingerprint match'
        depends on NETFILTER_ADVANCED && NETFILTER_NETLINK
+       select NF_OSF
        help
          This option selects the Passive OS Fingerprinting match module
          that allows to passively match the remote operating system by
index fd32bd2c95215800e374998c0d6d88de5076bd44..1aa710b5d384ecc76a49be9011d64851bb427ff6 100644 (file)
@@ -76,13 +76,10 @@ obj-$(CONFIG_NF_DUP_NETDEV) += nf_dup_netdev.o
 nf_tables-objs := nf_tables_core.o nf_tables_api.o nft_chain_filter.o \
                  nf_tables_trace.o nft_immediate.o nft_cmp.o nft_range.o \
                  nft_bitwise.o nft_byteorder.o nft_payload.o nft_lookup.o \
-                 nft_dynset.o
+                 nft_dynset.o nft_meta.o nft_rt.o nft_exthdr.o
 
 obj-$(CONFIG_NF_TABLES)                += nf_tables.o
 obj-$(CONFIG_NFT_COMPAT)       += nft_compat.o
-obj-$(CONFIG_NFT_EXTHDR)       += nft_exthdr.o
-obj-$(CONFIG_NFT_META)         += nft_meta.o
-obj-$(CONFIG_NFT_RT)           += nft_rt.o
 obj-$(CONFIG_NFT_NUMGEN)       += nft_numgen.o
 obj-$(CONFIG_NFT_CT)           += nft_ct.o
 obj-$(CONFIG_NFT_FLOW_OFFLOAD) += nft_flow_offload.o
@@ -104,6 +101,7 @@ obj-$(CONFIG_NFT_HASH)              += nft_hash.o
 obj-$(CONFIG_NFT_FIB)          += nft_fib.o
 obj-$(CONFIG_NFT_FIB_INET)     += nft_fib_inet.o
 obj-$(CONFIG_NFT_FIB_NETDEV)   += nft_fib_netdev.o
+obj-$(CONFIG_NF_OSF)           += nf_osf.o
 
 # nf_tables netdev
 obj-$(CONFIG_NFT_DUP_NETDEV)   += nft_dup_netdev.o
@@ -111,6 +109,8 @@ obj-$(CONFIG_NFT_FWD_NETDEV)        += nft_fwd_netdev.o
 
 # flow table infrastructure
 obj-$(CONFIG_NF_FLOW_TABLE)    += nf_flow_table.o
+nf_flow_table-objs := nf_flow_table_core.o nf_flow_table_ip.o
+
 obj-$(CONFIG_NF_FLOW_TABLE_INET) += nf_flow_table_inet.o
 
 # generic X tables 
index 0f6b8172fb9ab1bed02439b130b306b42e22adf6..206fb2c4c319da69c6016920955f1509ee5cff27 100644 (file)
@@ -585,7 +585,8 @@ void (*nf_nat_decode_session_hook)(struct sk_buff *, struct flowi *);
 EXPORT_SYMBOL(nf_nat_decode_session_hook);
 #endif
 
-static void __net_init __netfilter_net_init(struct nf_hook_entries **e, int max)
+static void __net_init
+__netfilter_net_init(struct nf_hook_entries __rcu **e, int max)
 {
        int h;
 
index b32fb0dbe237dcffb0a979b8c270c18d9f56e449..05dc1b77e466f4d556037b8bb6464c151ccb89a6 100644 (file)
@@ -225,6 +225,25 @@ config     IP_VS_SH
          If you want to compile it in kernel, say Y. To compile it as a
          module, choose M here. If unsure, say N.
 
+config IP_VS_MH
+       tristate "maglev hashing scheduling"
+       ---help---
+         The maglev consistent hashing scheduling algorithm provides the
+         Google's Maglev hashing algorithm as a IPVS scheduler. It assigns
+         network connections to the servers through looking up a statically
+         assigned special hash table called the lookup table. Maglev hashing
+         is to assign a preference list of all the lookup table positions
+         to each destination.
+
+         Through this operation, The maglev hashing gives an almost equal
+         share of the lookup table to each of the destinations and provides
+         minimal disruption by using the lookup table. When the set of
+         destinations changes, a connection will likely be sent to the same
+         destination as it was before.
+
+         If you want to compile it in kernel, say Y. To compile it as a
+         module, choose M here. If unsure, say N.
+
 config IP_VS_SED
        tristate "shortest expected delay scheduling"
        ---help---
@@ -266,6 +285,24 @@ config IP_VS_SH_TAB_BITS
          needs to be large enough to effectively fit all the destinations
          multiplied by their respective weights.
 
+comment 'IPVS MH scheduler'
+
+config IP_VS_MH_TAB_INDEX
+       int "IPVS maglev hashing table index of size (the prime numbers)"
+       range 8 17
+       default 12
+       ---help---
+         The maglev hashing scheduler maps source IPs to destinations
+         stored in a hash table. This table is assigned by a preference
+         list of the positions to each destination until all slots in
+         the table are filled. The index determines the prime for size of
+         the table as 251, 509, 1021, 2039, 4093, 8191, 16381, 32749,
+         65521 or 131071. When using weights to allow destinations to
+         receive more connections, the table is assigned an amount
+         proportional to the weights specified. The table needs to be large
+         enough to effectively fit all the destinations multiplied by their
+         respective weights.
+
 comment 'IPVS application helper'
 
 config IP_VS_FTP
index c552993fa4b9181e68e7d9680babec0462288719..bfce2677fda2610b681effae44d27b465b64795a 100644 (file)
@@ -33,6 +33,7 @@ obj-$(CONFIG_IP_VS_LBLC) += ip_vs_lblc.o
 obj-$(CONFIG_IP_VS_LBLCR) += ip_vs_lblcr.o
 obj-$(CONFIG_IP_VS_DH) += ip_vs_dh.o
 obj-$(CONFIG_IP_VS_SH) += ip_vs_sh.o
+obj-$(CONFIG_IP_VS_MH) += ip_vs_mh.o
 obj-$(CONFIG_IP_VS_SED) += ip_vs_sed.o
 obj-$(CONFIG_IP_VS_NQ) += ip_vs_nq.o
 
index 370abbf6f4217362ffe53e766964015776cc976f..75de46576f5100d40ce9edf5ddb3ce7818447a53 100644 (file)
@@ -232,7 +232,10 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)
 static inline bool ip_vs_conn_unlink(struct ip_vs_conn *cp)
 {
        unsigned int hash;
-       bool ret;
+       bool ret = false;
+
+       if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
+               return refcount_dec_if_one(&cp->refcnt);
 
        hash = ip_vs_conn_hashkey_conn(cp);
 
@@ -240,15 +243,13 @@ static inline bool ip_vs_conn_unlink(struct ip_vs_conn *cp)
        spin_lock(&cp->lock);
 
        if (cp->flags & IP_VS_CONN_F_HASHED) {
-               ret = false;
                /* Decrease refcnt and unlink conn only if we are last user */
                if (refcount_dec_if_one(&cp->refcnt)) {
                        hlist_del_rcu(&cp->c_list);
                        cp->flags &= ~IP_VS_CONN_F_HASHED;
                        ret = true;
                }
-       } else
-               ret = refcount_read(&cp->refcnt) ? false : true;
+       }
 
        spin_unlock(&cp->lock);
        ct_write_unlock_bh(hash);
@@ -454,12 +455,6 @@ ip_vs_conn_out_get_proto(struct netns_ipvs *ipvs, int af,
 }
 EXPORT_SYMBOL_GPL(ip_vs_conn_out_get_proto);
 
-static void __ip_vs_conn_put_notimer(struct ip_vs_conn *cp)
-{
-       __ip_vs_conn_put(cp);
-       ip_vs_conn_expire(&cp->timer);
-}
-
 /*
  *      Put back the conn and restart its timer with its timeout
  */
@@ -478,7 +473,7 @@ void ip_vs_conn_put(struct ip_vs_conn *cp)
            (refcount_read(&cp->refcnt) == 1) &&
            !timer_pending(&cp->timer))
                /* expire connection immediately */
-               __ip_vs_conn_put_notimer(cp);
+               ip_vs_conn_expire(&cp->timer);
        else
                __ip_vs_conn_put_timer(cp);
 }
index 5f6f73cf2174d1494a685d73ca94ea124da83de5..0679dd101e72af062b0a167f817ae43904eb3da8 100644 (file)
@@ -119,6 +119,8 @@ ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
                struct ip_vs_cpu_stats *s;
                struct ip_vs_service *svc;
 
+               local_bh_disable();
+
                s = this_cpu_ptr(dest->stats.cpustats);
                u64_stats_update_begin(&s->syncp);
                s->cnt.inpkts++;
@@ -137,6 +139,8 @@ ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
                s->cnt.inpkts++;
                s->cnt.inbytes += skb->len;
                u64_stats_update_end(&s->syncp);
+
+               local_bh_enable();
        }
 }
 
@@ -151,6 +155,8 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
                struct ip_vs_cpu_stats *s;
                struct ip_vs_service *svc;
 
+               local_bh_disable();
+
                s = this_cpu_ptr(dest->stats.cpustats);
                u64_stats_update_begin(&s->syncp);
                s->cnt.outpkts++;
@@ -169,6 +175,8 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
                s->cnt.outpkts++;
                s->cnt.outbytes += skb->len;
                u64_stats_update_end(&s->syncp);
+
+               local_bh_enable();
        }
 }
 
@@ -179,6 +187,8 @@ ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc)
        struct netns_ipvs *ipvs = svc->ipvs;
        struct ip_vs_cpu_stats *s;
 
+       local_bh_disable();
+
        s = this_cpu_ptr(cp->dest->stats.cpustats);
        u64_stats_update_begin(&s->syncp);
        s->cnt.conns++;
@@ -193,6 +203,8 @@ ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc)
        u64_stats_update_begin(&s->syncp);
        s->cnt.conns++;
        u64_stats_update_end(&s->syncp);
+
+       local_bh_enable();
 }
 
 
index 5ebde4b15810c5c219ad64c1bb0b6378a38d468c..d4f68d0f7df7cbf5c81c49177d355f6afbfd44fd 100644 (file)
@@ -821,6 +821,10 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
        if (add && udest->af != svc->af)
                ipvs->mixed_address_family_dests++;
 
+       /* keep the last_weight with latest non-0 weight */
+       if (add || udest->weight != 0)
+               atomic_set(&dest->last_weight, udest->weight);
+
        /* set the weight and the flags */
        atomic_set(&dest->weight, udest->weight);
        conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
@@ -2384,11 +2388,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
                        strlcpy(cfg.mcast_ifn, dm->mcast_ifn,
                                sizeof(cfg.mcast_ifn));
                        cfg.syncid = dm->syncid;
-                       rtnl_lock();
-                       mutex_lock(&ipvs->sync_mutex);
                        ret = start_sync_thread(ipvs, &cfg, dm->state);
-                       mutex_unlock(&ipvs->sync_mutex);
-                       rtnl_unlock();
                } else {
                        mutex_lock(&ipvs->sync_mutex);
                        ret = stop_sync_thread(ipvs, dm->state);
@@ -3481,12 +3481,8 @@ static int ip_vs_genl_new_daemon(struct netns_ipvs *ipvs, struct nlattr **attrs)
        if (ipvs->mixed_address_family_dests > 0)
                return -EINVAL;
 
-       rtnl_lock();
-       mutex_lock(&ipvs->sync_mutex);
        ret = start_sync_thread(ipvs, &c,
                                nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
-       mutex_unlock(&ipvs->sync_mutex);
-       rtnl_unlock();
        return ret;
 }
 
index 75f798f8e83b706701787706c6fad2facad35155..07459e71d9072387531d760dbbde7ab23fc5f145 100644 (file)
@@ -43,6 +43,7 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/skbuff.h>
+#include <linux/hash.h>
 
 #include <net/ip_vs.h>
 
@@ -81,7 +82,7 @@ static inline unsigned int ip_vs_dh_hashkey(int af, const union nf_inet_addr *ad
                addr_fold = addr->ip6[0]^addr->ip6[1]^
                            addr->ip6[2]^addr->ip6[3];
 #endif
-       return (ntohl(addr_fold)*2654435761UL) & IP_VS_DH_TAB_MASK;
+       return hash_32(ntohl(addr_fold), IP_VS_DH_TAB_BITS);
 }
 
 
index 3057e453bf317c500cc77865ec4ed6d2a4eb479a..b9f375e6dc937d2979074365118ecd11d662c282 100644 (file)
@@ -48,6 +48,7 @@
 #include <linux/kernel.h>
 #include <linux/skbuff.h>
 #include <linux/jiffies.h>
+#include <linux/hash.h>
 
 /* for sysctl */
 #include <linux/fs.h>
@@ -160,7 +161,7 @@ ip_vs_lblc_hashkey(int af, const union nf_inet_addr *addr)
                addr_fold = addr->ip6[0]^addr->ip6[1]^
                            addr->ip6[2]^addr->ip6[3];
 #endif
-       return (ntohl(addr_fold)*2654435761UL) & IP_VS_LBLC_TAB_MASK;
+       return hash_32(ntohl(addr_fold), IP_VS_LBLC_TAB_BITS);
 }
 
 
@@ -371,6 +372,7 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc)
        tbl->counter = 1;
        tbl->dead = false;
        tbl->svc = svc;
+       atomic_set(&tbl->entries, 0);
 
        /*
         *    Hook periodic timer for garbage collection
index 92adc04557ed954c769e309e9e51d945ac474620..542c4949937ab284ad656a558a8f9c24ccfff37c 100644 (file)
@@ -47,6 +47,7 @@
 #include <linux/jiffies.h>
 #include <linux/list.h>
 #include <linux/slab.h>
+#include <linux/hash.h>
 
 /* for sysctl */
 #include <linux/fs.h>
@@ -323,7 +324,7 @@ ip_vs_lblcr_hashkey(int af, const union nf_inet_addr *addr)
                addr_fold = addr->ip6[0]^addr->ip6[1]^
                            addr->ip6[2]^addr->ip6[3];
 #endif
-       return (ntohl(addr_fold)*2654435761UL) & IP_VS_LBLCR_TAB_MASK;
+       return hash_32(ntohl(addr_fold), IP_VS_LBLCR_TAB_BITS);
 }
 
 
@@ -534,6 +535,7 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc)
        tbl->counter = 1;
        tbl->dead = false;
        tbl->svc = svc;
+       atomic_set(&tbl->entries, 0);
 
        /*
         *    Hook periodic timer for garbage collection
diff --git a/net/netfilter/ipvs/ip_vs_mh.c b/net/netfilter/ipvs/ip_vs_mh.c
new file mode 100644 (file)
index 0000000..0f795b1
--- /dev/null
@@ -0,0 +1,540 @@
+// SPDX-License-Identifier: GPL-2.0
+/* IPVS:       Maglev Hashing scheduling module
+ *
+ * Authors:    Inju Song <inju.song@navercorp.com>
+ *
+ */
+
+/* The mh algorithm is to assign a preference list of all the lookup
+ * table positions to each destination and populate the table with
+ * the most-preferred position of destinations. Then it is to select
+ * destination with the hash key of source IP address through looking
+ * up a the lookup table.
+ *
+ * The algorithm is detailed in:
+ * [3.4 Consistent Hasing]
+https://www.usenix.org/system/files/conference/nsdi16/nsdi16-paper-eisenbud.pdf
+ *
+ */
+
+#define KMSG_COMPONENT "IPVS"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/ip.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+
+#include <net/ip_vs.h>
+
+#include <linux/siphash.h>
+#include <linux/bitops.h>
+#include <linux/gcd.h>
+
+#define IP_VS_SVC_F_SCHED_MH_FALLBACK  IP_VS_SVC_F_SCHED1 /* MH fallback */
+#define IP_VS_SVC_F_SCHED_MH_PORT      IP_VS_SVC_F_SCHED2 /* MH use port */
+
+struct ip_vs_mh_lookup {
+       struct ip_vs_dest __rcu *dest;  /* real server (cache) */
+};
+
+struct ip_vs_mh_dest_setup {
+       unsigned int    offset; /* starting offset */
+       unsigned int    skip;   /* skip */
+       unsigned int    perm;   /* next_offset */
+       int             turns;  /* weight / gcd() and rshift */
+};
+
+/* Available prime numbers for MH table */
+static int primes[] = {251, 509, 1021, 2039, 4093,
+                      8191, 16381, 32749, 65521, 131071};
+
+/* For IPVS MH entry hash table */
+#ifndef CONFIG_IP_VS_MH_TAB_INDEX
+#define CONFIG_IP_VS_MH_TAB_INDEX      12
+#endif
+#define IP_VS_MH_TAB_BITS              (CONFIG_IP_VS_MH_TAB_INDEX / 2)
+#define IP_VS_MH_TAB_INDEX             (CONFIG_IP_VS_MH_TAB_INDEX - 8)
+#define IP_VS_MH_TAB_SIZE               primes[IP_VS_MH_TAB_INDEX]
+
+struct ip_vs_mh_state {
+       struct rcu_head                 rcu_head;
+       struct ip_vs_mh_lookup          *lookup;
+       struct ip_vs_mh_dest_setup      *dest_setup;
+       hsiphash_key_t                  hash1, hash2;
+       int                             gcd;
+       int                             rshift;
+};
+
+static inline void generate_hash_secret(hsiphash_key_t *hash1,
+                                       hsiphash_key_t *hash2)
+{
+       hash1->key[0] = 2654435761UL;
+       hash1->key[1] = 2654435761UL;
+
+       hash2->key[0] = 2654446892UL;
+       hash2->key[1] = 2654446892UL;
+}
+
+/* Helper function to determine if server is unavailable */
+static inline bool is_unavailable(struct ip_vs_dest *dest)
+{
+       return atomic_read(&dest->weight) <= 0 ||
+              dest->flags & IP_VS_DEST_F_OVERLOAD;
+}
+
+/* Returns hash value for IPVS MH entry */
+static inline unsigned int
+ip_vs_mh_hashkey(int af, const union nf_inet_addr *addr,
+                __be16 port, hsiphash_key_t *key, unsigned int offset)
+{
+       unsigned int v;
+       __be32 addr_fold = addr->ip;
+
+#ifdef CONFIG_IP_VS_IPV6
+       if (af == AF_INET6)
+               addr_fold = addr->ip6[0] ^ addr->ip6[1] ^
+                           addr->ip6[2] ^ addr->ip6[3];
+#endif
+       v = (offset + ntohs(port) + ntohl(addr_fold));
+       return hsiphash(&v, sizeof(v), key);
+}
+
+/* Reset all the hash buckets of the specified table. */
+static void ip_vs_mh_reset(struct ip_vs_mh_state *s)
+{
+       int i;
+       struct ip_vs_mh_lookup *l;
+       struct ip_vs_dest *dest;
+
+       l = &s->lookup[0];
+       for (i = 0; i < IP_VS_MH_TAB_SIZE; i++) {
+               dest = rcu_dereference_protected(l->dest, 1);
+               if (dest) {
+                       ip_vs_dest_put(dest);
+                       RCU_INIT_POINTER(l->dest, NULL);
+               }
+               l++;
+       }
+}
+
+static int ip_vs_mh_permutate(struct ip_vs_mh_state *s,
+                             struct ip_vs_service *svc)
+{
+       struct list_head *p;
+       struct ip_vs_mh_dest_setup *ds;
+       struct ip_vs_dest *dest;
+       int lw;
+
+       /* If gcd is smaller then 1, number of dests or
+        * all last_weight of dests are zero. So, skip
+        * permutation for the dests.
+        */
+       if (s->gcd < 1)
+               return 0;
+
+       /* Set dest_setup for the dests permutation */
+       p = &svc->destinations;
+       ds = &s->dest_setup[0];
+       while ((p = p->next) != &svc->destinations) {
+               dest = list_entry(p, struct ip_vs_dest, n_list);
+
+               ds->offset = ip_vs_mh_hashkey(svc->af, &dest->addr,
+                                             dest->port, &s->hash1, 0) %
+                                             IP_VS_MH_TAB_SIZE;
+               ds->skip = ip_vs_mh_hashkey(svc->af, &dest->addr,
+                                           dest->port, &s->hash2, 0) %
+                                           (IP_VS_MH_TAB_SIZE - 1) + 1;
+               ds->perm = ds->offset;
+
+               lw = atomic_read(&dest->last_weight);
+               ds->turns = ((lw / s->gcd) >> s->rshift) ? : (lw != 0);
+               ds++;
+       }
+
+       return 0;
+}
+
+static int ip_vs_mh_populate(struct ip_vs_mh_state *s,
+                            struct ip_vs_service *svc)
+{
+       int n, c, dt_count;
+       unsigned long *table;
+       struct list_head *p;
+       struct ip_vs_mh_dest_setup *ds;
+       struct ip_vs_dest *dest, *new_dest;
+
+       /* If gcd is smaller then 1, number of dests or
+        * all last_weight of dests are zero. So, skip
+        * the population for the dests and reset lookup table.
+        */
+       if (s->gcd < 1) {
+               ip_vs_mh_reset(s);
+               return 0;
+       }
+
+       table =  kcalloc(BITS_TO_LONGS(IP_VS_MH_TAB_SIZE),
+                        sizeof(unsigned long), GFP_KERNEL);
+       if (!table)
+               return -ENOMEM;
+
+       p = &svc->destinations;
+       n = 0;
+       dt_count = 0;
+       while (n < IP_VS_MH_TAB_SIZE) {
+               if (p == &svc->destinations)
+                       p = p->next;
+
+               ds = &s->dest_setup[0];
+               while (p != &svc->destinations) {
+                       /* Ignore added server with zero weight */
+                       if (ds->turns < 1) {
+                               p = p->next;
+                               ds++;
+                               continue;
+                       }
+
+                       c = ds->perm;
+                       while (test_bit(c, table)) {
+                               /* Add skip, mod IP_VS_MH_TAB_SIZE */
+                               ds->perm += ds->skip;
+                               if (ds->perm >= IP_VS_MH_TAB_SIZE)
+                                       ds->perm -= IP_VS_MH_TAB_SIZE;
+                               c = ds->perm;
+                       }
+
+                       __set_bit(c, table);
+
+                       dest = rcu_dereference_protected(s->lookup[c].dest, 1);
+                       new_dest = list_entry(p, struct ip_vs_dest, n_list);
+                       if (dest != new_dest) {
+                               if (dest)
+                                       ip_vs_dest_put(dest);
+                               ip_vs_dest_hold(new_dest);
+                               RCU_INIT_POINTER(s->lookup[c].dest, new_dest);
+                       }
+
+                       if (++n == IP_VS_MH_TAB_SIZE)
+                               goto out;
+
+                       if (++dt_count >= ds->turns) {
+                               dt_count = 0;
+                               p = p->next;
+                               ds++;
+                       }
+               }
+       }
+
+out:
+       kfree(table);
+       return 0;
+}
+
+/* Get ip_vs_dest associated with supplied parameters. */
+static inline struct ip_vs_dest *
+ip_vs_mh_get(struct ip_vs_service *svc, struct ip_vs_mh_state *s,
+            const union nf_inet_addr *addr, __be16 port)
+{
+       unsigned int hash = ip_vs_mh_hashkey(svc->af, addr, port, &s->hash1, 0)
+                                            % IP_VS_MH_TAB_SIZE;
+       struct ip_vs_dest *dest = rcu_dereference(s->lookup[hash].dest);
+
+       return (!dest || is_unavailable(dest)) ? NULL : dest;
+}
+
+/* As ip_vs_mh_get, but with fallback if selected server is unavailable */
+static inline struct ip_vs_dest *
+ip_vs_mh_get_fallback(struct ip_vs_service *svc, struct ip_vs_mh_state *s,
+                     const union nf_inet_addr *addr, __be16 port)
+{
+       unsigned int offset, roffset;
+       unsigned int hash, ihash;
+       struct ip_vs_dest *dest;
+
+       /* First try the dest it's supposed to go to */
+       ihash = ip_vs_mh_hashkey(svc->af, addr, port,
+                                &s->hash1, 0) % IP_VS_MH_TAB_SIZE;
+       dest = rcu_dereference(s->lookup[ihash].dest);
+       if (!dest)
+               return NULL;
+       if (!is_unavailable(dest))
+               return dest;
+
+       IP_VS_DBG_BUF(6, "MH: selected unavailable server %s:%u, reselecting",
+                     IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port));
+
+       /* If the original dest is unavailable, loop around the table
+        * starting from ihash to find a new dest
+        */
+       for (offset = 0; offset < IP_VS_MH_TAB_SIZE; offset++) {
+               roffset = (offset + ihash) % IP_VS_MH_TAB_SIZE;
+               hash = ip_vs_mh_hashkey(svc->af, addr, port, &s->hash1,
+                                       roffset) % IP_VS_MH_TAB_SIZE;
+               dest = rcu_dereference(s->lookup[hash].dest);
+               if (!dest)
+                       break;
+               if (!is_unavailable(dest))
+                       return dest;
+               IP_VS_DBG_BUF(6,
+                             "MH: selected unavailable server %s:%u (offset %u), reselecting",
+                             IP_VS_DBG_ADDR(dest->af, &dest->addr),
+                             ntohs(dest->port), roffset);
+       }
+
+       return NULL;
+}
+
+/* Assign all the hash buckets of the specified table with the service. */
+static int ip_vs_mh_reassign(struct ip_vs_mh_state *s,
+                            struct ip_vs_service *svc)
+{
+       int ret;
+
+       if (svc->num_dests > IP_VS_MH_TAB_SIZE)
+               return -EINVAL;
+
+       if (svc->num_dests >= 1) {
+               s->dest_setup = kcalloc(svc->num_dests,
+                                       sizeof(struct ip_vs_mh_dest_setup),
+                                       GFP_KERNEL);
+               if (!s->dest_setup)
+                       return -ENOMEM;
+       }
+
+       ip_vs_mh_permutate(s, svc);
+
+       ret = ip_vs_mh_populate(s, svc);
+       if (ret < 0)
+               goto out;
+
+       IP_VS_DBG_BUF(6, "MH: reassign lookup table of %s:%u\n",
+                     IP_VS_DBG_ADDR(svc->af, &svc->addr),
+                     ntohs(svc->port));
+
+out:
+       if (svc->num_dests >= 1) {
+               kfree(s->dest_setup);
+               s->dest_setup = NULL;
+       }
+       return ret;
+}
+
+static int ip_vs_mh_gcd_weight(struct ip_vs_service *svc)
+{
+       struct ip_vs_dest *dest;
+       int weight;
+       int g = 0;
+
+       list_for_each_entry(dest, &svc->destinations, n_list) {
+               weight = atomic_read(&dest->last_weight);
+               if (weight > 0) {
+                       if (g > 0)
+                               g = gcd(weight, g);
+                       else
+                               g = weight;
+               }
+       }
+       return g;
+}
+
+/* To avoid assigning huge weight for the MH table,
+ * calculate shift value with gcd.
+ */
+static int ip_vs_mh_shift_weight(struct ip_vs_service *svc, int gcd)
+{
+       struct ip_vs_dest *dest;
+       int new_weight, weight = 0;
+       int mw, shift;
+
+       /* If gcd is smaller then 1, number of dests or
+        * all last_weight of dests are zero. So, return
+        * shift value as zero.
+        */
+       if (gcd < 1)
+               return 0;
+
+       list_for_each_entry(dest, &svc->destinations, n_list) {
+               new_weight = atomic_read(&dest->last_weight);
+               if (new_weight > weight)
+                       weight = new_weight;
+       }
+
+       /* Because gcd is greater than zero,
+        * the maximum weight and gcd are always greater than zero
+        */
+       mw = weight / gcd;
+
+       /* shift = occupied bits of weight/gcd - MH highest bits */
+       shift = fls(mw) - IP_VS_MH_TAB_BITS;
+       return (shift >= 0) ? shift : 0;
+}
+
+static void ip_vs_mh_state_free(struct rcu_head *head)
+{
+       struct ip_vs_mh_state *s;
+
+       s = container_of(head, struct ip_vs_mh_state, rcu_head);
+       kfree(s->lookup);
+       kfree(s);
+}
+
+static int ip_vs_mh_init_svc(struct ip_vs_service *svc)
+{
+       int ret;
+       struct ip_vs_mh_state *s;
+
+       /* Allocate the MH table for this service */
+       s = kzalloc(sizeof(*s), GFP_KERNEL);
+       if (!s)
+               return -ENOMEM;
+
+       s->lookup = kcalloc(IP_VS_MH_TAB_SIZE, sizeof(struct ip_vs_mh_lookup),
+                           GFP_KERNEL);
+       if (!s->lookup) {
+               kfree(s);
+               return -ENOMEM;
+       }
+
+       generate_hash_secret(&s->hash1, &s->hash2);
+       s->gcd = ip_vs_mh_gcd_weight(svc);
+       s->rshift = ip_vs_mh_shift_weight(svc, s->gcd);
+
+       IP_VS_DBG(6,
+                 "MH lookup table (memory=%zdbytes) allocated for current service\n",
+                 sizeof(struct ip_vs_mh_lookup) * IP_VS_MH_TAB_SIZE);
+
+       /* Assign the lookup table with current dests */
+       ret = ip_vs_mh_reassign(s, svc);
+       if (ret < 0) {
+               ip_vs_mh_reset(s);
+               ip_vs_mh_state_free(&s->rcu_head);
+               return ret;
+       }
+
+       /* No more failures, attach state */
+       svc->sched_data = s;
+       return 0;
+}
+
+static void ip_vs_mh_done_svc(struct ip_vs_service *svc)
+{
+       struct ip_vs_mh_state *s = svc->sched_data;
+
+       /* Got to clean up lookup entry here */
+       ip_vs_mh_reset(s);
+
+       call_rcu(&s->rcu_head, ip_vs_mh_state_free);
+       IP_VS_DBG(6, "MH lookup table (memory=%zdbytes) released\n",
+                 sizeof(struct ip_vs_mh_lookup) * IP_VS_MH_TAB_SIZE);
+}
+
+static int ip_vs_mh_dest_changed(struct ip_vs_service *svc,
+                                struct ip_vs_dest *dest)
+{
+       struct ip_vs_mh_state *s = svc->sched_data;
+
+       s->gcd = ip_vs_mh_gcd_weight(svc);
+       s->rshift = ip_vs_mh_shift_weight(svc, s->gcd);
+
+       /* Assign the lookup table with the updated service */
+       return ip_vs_mh_reassign(s, svc);
+}
+
+/* Helper function to get port number */
+static inline __be16
+ip_vs_mh_get_port(const struct sk_buff *skb, struct ip_vs_iphdr *iph)
+{
+       __be16 _ports[2], *ports;
+
+       /* At this point we know that we have a valid packet of some kind.
+        * Because ICMP packets are only guaranteed to have the first 8
+        * bytes, let's just grab the ports.  Fortunately they're in the
+        * same position for all three of the protocols we care about.
+        */
+       switch (iph->protocol) {
+       case IPPROTO_TCP:
+       case IPPROTO_UDP:
+       case IPPROTO_SCTP:
+               ports = skb_header_pointer(skb, iph->len, sizeof(_ports),
+                                          &_ports);
+               if (unlikely(!ports))
+                       return 0;
+
+               if (likely(!ip_vs_iph_inverse(iph)))
+                       return ports[0];
+               else
+                       return ports[1];
+       default:
+               return 0;
+       }
+}
+
+/* Maglev Hashing scheduling */
+static struct ip_vs_dest *
+ip_vs_mh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
+                 struct ip_vs_iphdr *iph)
+{
+       struct ip_vs_dest *dest;
+       struct ip_vs_mh_state *s;
+       __be16 port = 0;
+       const union nf_inet_addr *hash_addr;
+
+       hash_addr = ip_vs_iph_inverse(iph) ? &iph->daddr : &iph->saddr;
+
+       IP_VS_DBG(6, "%s : Scheduling...\n", __func__);
+
+       if (svc->flags & IP_VS_SVC_F_SCHED_MH_PORT)
+               port = ip_vs_mh_get_port(skb, iph);
+
+       s = (struct ip_vs_mh_state *)svc->sched_data;
+
+       if (svc->flags & IP_VS_SVC_F_SCHED_MH_FALLBACK)
+               dest = ip_vs_mh_get_fallback(svc, s, hash_addr, port);
+       else
+               dest = ip_vs_mh_get(svc, s, hash_addr, port);
+
+       if (!dest) {
+               ip_vs_scheduler_err(svc, "no destination available");
+               return NULL;
+       }
+
+       IP_VS_DBG_BUF(6, "MH: source IP address %s:%u --> server %s:%u\n",
+                     IP_VS_DBG_ADDR(svc->af, hash_addr),
+                     ntohs(port),
+                     IP_VS_DBG_ADDR(dest->af, &dest->addr),
+                     ntohs(dest->port));
+
+       return dest;
+}
+
+/* IPVS MH Scheduler structure */
+static struct ip_vs_scheduler ip_vs_mh_scheduler = {
+       .name =                 "mh",
+       .refcnt =               ATOMIC_INIT(0),
+       .module =               THIS_MODULE,
+       .n_list  =              LIST_HEAD_INIT(ip_vs_mh_scheduler.n_list),
+       .init_service =         ip_vs_mh_init_svc,
+       .done_service =         ip_vs_mh_done_svc,
+       .add_dest =             ip_vs_mh_dest_changed,
+       .del_dest =             ip_vs_mh_dest_changed,
+       .upd_dest =             ip_vs_mh_dest_changed,
+       .schedule =             ip_vs_mh_schedule,
+};
+
+static int __init ip_vs_mh_init(void)
+{
+       return register_ip_vs_scheduler(&ip_vs_mh_scheduler);
+}
+
+static void __exit ip_vs_mh_cleanup(void)
+{
+       unregister_ip_vs_scheduler(&ip_vs_mh_scheduler);
+       rcu_barrier();
+}
+
+module_init(ip_vs_mh_init);
+module_exit(ip_vs_mh_cleanup);
+MODULE_DESCRIPTION("Maglev hashing ipvs scheduler");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Inju Song <inju.song@navercorp.com>");
index bcd9b7bde4ee3da423456800436912e06dd46958..569631d2b2a10d32c149491b5140158020917831 100644 (file)
@@ -436,7 +436,7 @@ static bool tcp_state_active(int state)
        return tcp_state_active_table[state];
 }
 
-static struct tcp_states_t tcp_states [] = {
+static struct tcp_states_t tcp_states[] = {
 /*     INPUT */
 /*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA        */
 /*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }},
@@ -459,7 +459,7 @@ static struct tcp_states_t tcp_states [] = {
 /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
 };
 
-static struct tcp_states_t tcp_states_dos [] = {
+static struct tcp_states_t tcp_states_dos[] = {
 /*     INPUT */
 /*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA        */
 /*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSA }},
index 16aaac6eedc963dee56f088c8933dc962bbd27c6..1e01c782583a62efd4258c133c7b2c8f97bc2e85 100644 (file)
@@ -96,7 +96,8 @@ ip_vs_sh_hashkey(int af, const union nf_inet_addr *addr,
                addr_fold = addr->ip6[0]^addr->ip6[1]^
                            addr->ip6[2]^addr->ip6[3];
 #endif
-       return (offset + (ntohs(port) + ntohl(addr_fold))*2654435761UL) &
+       return (offset + hash_32(ntohs(port) + ntohl(addr_fold),
+                                IP_VS_SH_TAB_BITS)) &
                IP_VS_SH_TAB_MASK;
 }
 
index fbaf3bd05b2ec9051734849cf1ab0fdf15dea386..001501e25625fcd3f87a22fec4c4e013c90b3b8d 100644 (file)
@@ -49,6 +49,7 @@
 #include <linux/kthread.h>
 #include <linux/wait.h>
 #include <linux/kernel.h>
+#include <linux/sched/signal.h>
 
 #include <asm/unaligned.h>             /* Used for ntoh_seq and hton_seq */
 
@@ -1360,15 +1361,9 @@ static void set_mcast_pmtudisc(struct sock *sk, int val)
 /*
  *      Specifiy default interface for outgoing multicasts
  */
-static int set_mcast_if(struct sock *sk, char *ifname)
+static int set_mcast_if(struct sock *sk, struct net_device *dev)
 {
-       struct net_device *dev;
        struct inet_sock *inet = inet_sk(sk);
-       struct net *net = sock_net(sk);
-
-       dev = __dev_get_by_name(net, ifname);
-       if (!dev)
-               return -ENODEV;
 
        if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
                return -EINVAL;
@@ -1396,19 +1391,14 @@ static int set_mcast_if(struct sock *sk, char *ifname)
  *      in the in_addr structure passed in as a parameter.
  */
 static int
-join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname)
+join_mcast_group(struct sock *sk, struct in_addr *addr, struct net_device *dev)
 {
-       struct net *net = sock_net(sk);
        struct ip_mreqn mreq;
-       struct net_device *dev;
        int ret;
 
        memset(&mreq, 0, sizeof(mreq));
        memcpy(&mreq.imr_multiaddr, addr, sizeof(struct in_addr));
 
-       dev = __dev_get_by_name(net, ifname);
-       if (!dev)
-               return -ENODEV;
        if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
                return -EINVAL;
 
@@ -1423,15 +1413,10 @@ join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname)
 
 #ifdef CONFIG_IP_VS_IPV6
 static int join_mcast_group6(struct sock *sk, struct in6_addr *addr,
-                            char *ifname)
+                            struct net_device *dev)
 {
-       struct net *net = sock_net(sk);
-       struct net_device *dev;
        int ret;
 
-       dev = __dev_get_by_name(net, ifname);
-       if (!dev)
-               return -ENODEV;
        if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
                return -EINVAL;
 
@@ -1443,24 +1428,18 @@ static int join_mcast_group6(struct sock *sk, struct in6_addr *addr,
 }
 #endif
 
-static int bind_mcastif_addr(struct socket *sock, char *ifname)
+static int bind_mcastif_addr(struct socket *sock, struct net_device *dev)
 {
-       struct net *net = sock_net(sock->sk);
-       struct net_device *dev;
        __be32 addr;
        struct sockaddr_in sin;
 
-       dev = __dev_get_by_name(net, ifname);
-       if (!dev)
-               return -ENODEV;
-
        addr = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
        if (!addr)
                pr_err("You probably need to specify IP address on "
                       "multicast interface.\n");
 
        IP_VS_DBG(7, "binding socket with (%s) %pI4\n",
-                 ifname, &addr);
+                 dev->name, &addr);
 
        /* Now bind the socket with the address of multicast interface */
        sin.sin_family       = AF_INET;
@@ -1493,7 +1472,8 @@ static void get_mcast_sockaddr(union ipvs_sockaddr *sa, int *salen,
 /*
  *      Set up sending multicast socket over UDP
  */
-static struct socket *make_send_sock(struct netns_ipvs *ipvs, int id)
+static int make_send_sock(struct netns_ipvs *ipvs, int id,
+                         struct net_device *dev, struct socket **sock_ret)
 {
        /* multicast addr */
        union ipvs_sockaddr mcast_addr;
@@ -1505,9 +1485,10 @@ static struct socket *make_send_sock(struct netns_ipvs *ipvs, int id)
                                  IPPROTO_UDP, &sock);
        if (result < 0) {
                pr_err("Error during creation of socket; terminating\n");
-               return ERR_PTR(result);
+               goto error;
        }
-       result = set_mcast_if(sock->sk, ipvs->mcfg.mcast_ifn);
+       *sock_ret = sock;
+       result = set_mcast_if(sock->sk, dev);
        if (result < 0) {
                pr_err("Error setting outbound mcast interface\n");
                goto error;
@@ -1522,7 +1503,7 @@ static struct socket *make_send_sock(struct netns_ipvs *ipvs, int id)
                set_sock_size(sock->sk, 1, result);
 
        if (AF_INET == ipvs->mcfg.mcast_af)
-               result = bind_mcastif_addr(sock, ipvs->mcfg.mcast_ifn);
+               result = bind_mcastif_addr(sock, dev);
        else
                result = 0;
        if (result < 0) {
@@ -1538,19 +1519,18 @@ static struct socket *make_send_sock(struct netns_ipvs *ipvs, int id)
                goto error;
        }
 
-       return sock;
+       return 0;
 
 error:
-       sock_release(sock);
-       return ERR_PTR(result);
+       return result;
 }
 
 
 /*
  *      Set up receiving multicast socket over UDP
  */
-static struct socket *make_receive_sock(struct netns_ipvs *ipvs, int id,
-                                       int ifindex)
+static int make_receive_sock(struct netns_ipvs *ipvs, int id,
+                            struct net_device *dev, struct socket **sock_ret)
 {
        /* multicast addr */
        union ipvs_sockaddr mcast_addr;
@@ -1562,8 +1542,9 @@ static struct socket *make_receive_sock(struct netns_ipvs *ipvs, int id,
                                  IPPROTO_UDP, &sock);
        if (result < 0) {
                pr_err("Error during creation of socket; terminating\n");
-               return ERR_PTR(result);
+               goto error;
        }
+       *sock_ret = sock;
        /* it is equivalent to the REUSEADDR option in user-space */
        sock->sk->sk_reuse = SK_CAN_REUSE;
        result = sysctl_sync_sock_size(ipvs);
@@ -1571,7 +1552,7 @@ static struct socket *make_receive_sock(struct netns_ipvs *ipvs, int id,
                set_sock_size(sock->sk, 0, result);
 
        get_mcast_sockaddr(&mcast_addr, &salen, &ipvs->bcfg, id);
-       sock->sk->sk_bound_dev_if = ifindex;
+       sock->sk->sk_bound_dev_if = dev->ifindex;
        result = sock->ops->bind(sock, (struct sockaddr *)&mcast_addr, salen);
        if (result < 0) {
                pr_err("Error binding to the multicast addr\n");
@@ -1582,21 +1563,20 @@ static struct socket *make_receive_sock(struct netns_ipvs *ipvs, int id,
 #ifdef CONFIG_IP_VS_IPV6
        if (ipvs->bcfg.mcast_af == AF_INET6)
                result = join_mcast_group6(sock->sk, &mcast_addr.in6.sin6_addr,
-                                          ipvs->bcfg.mcast_ifn);
+                                          dev);
        else
 #endif
                result = join_mcast_group(sock->sk, &mcast_addr.in.sin_addr,
-                                         ipvs->bcfg.mcast_ifn);
+                                         dev);
        if (result < 0) {
                pr_err("Error joining to the multicast group\n");
                goto error;
        }
 
-       return sock;
+       return 0;
 
 error:
-       sock_release(sock);
-       return ERR_PTR(result);
+       return result;
 }
 
 
@@ -1778,13 +1758,12 @@ static int sync_thread_backup(void *data)
 int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c,
                      int state)
 {
-       struct ip_vs_sync_thread_data *tinfo;
+       struct ip_vs_sync_thread_data *tinfo = NULL;
        struct task_struct **array = NULL, *task;
-       struct socket *sock;
        struct net_device *dev;
        char *name;
        int (*threadfn)(void *data);
-       int id, count, hlen;
+       int id = 0, count, hlen;
        int result = -ENOMEM;
        u16 mtu, min_mtu;
 
@@ -1792,6 +1771,18 @@ int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c,
        IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %zd bytes\n",
                  sizeof(struct ip_vs_sync_conn_v0));
 
+       /* Do not hold one mutex and then to block on another */
+       for (;;) {
+               rtnl_lock();
+               if (mutex_trylock(&ipvs->sync_mutex))
+                       break;
+               rtnl_unlock();
+               mutex_lock(&ipvs->sync_mutex);
+               if (rtnl_trylock())
+                       break;
+               mutex_unlock(&ipvs->sync_mutex);
+       }
+
        if (!ipvs->sync_state) {
                count = clamp(sysctl_sync_ports(ipvs), 1, IPVS_SYNC_PORTS_MAX);
                ipvs->threads_mask = count - 1;
@@ -1810,7 +1801,8 @@ int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c,
        dev = __dev_get_by_name(ipvs->net, c->mcast_ifn);
        if (!dev) {
                pr_err("Unknown mcast interface: %s\n", c->mcast_ifn);
-               return -ENODEV;
+               result = -ENODEV;
+               goto out_early;
        }
        hlen = (AF_INET6 == c->mcast_af) ?
               sizeof(struct ipv6hdr) + sizeof(struct udphdr) :
@@ -1827,26 +1819,30 @@ int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c,
                c->sync_maxlen = mtu - hlen;
 
        if (state == IP_VS_STATE_MASTER) {
+               result = -EEXIST;
                if (ipvs->ms)
-                       return -EEXIST;
+                       goto out_early;
 
                ipvs->mcfg = *c;
                name = "ipvs-m:%d:%d";
                threadfn = sync_thread_master;
        } else if (state == IP_VS_STATE_BACKUP) {
+               result = -EEXIST;
                if (ipvs->backup_threads)
-                       return -EEXIST;
+                       goto out_early;
 
                ipvs->bcfg = *c;
                name = "ipvs-b:%d:%d";
                threadfn = sync_thread_backup;
        } else {
-               return -EINVAL;
+               result = -EINVAL;
+               goto out_early;
        }
 
        if (state == IP_VS_STATE_MASTER) {
                struct ipvs_master_sync_state *ms;
 
+               result = -ENOMEM;
                ipvs->ms = kcalloc(count, sizeof(ipvs->ms[0]), GFP_KERNEL);
                if (!ipvs->ms)
                        goto out;
@@ -1862,39 +1858,38 @@ int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c,
        } else {
                array = kcalloc(count, sizeof(struct task_struct *),
                                GFP_KERNEL);
+               result = -ENOMEM;
                if (!array)
                        goto out;
        }
 
-       tinfo = NULL;
        for (id = 0; id < count; id++) {
-               if (state == IP_VS_STATE_MASTER)
-                       sock = make_send_sock(ipvs, id);
-               else
-                       sock = make_receive_sock(ipvs, id, dev->ifindex);
-               if (IS_ERR(sock)) {
-                       result = PTR_ERR(sock);
-                       goto outtinfo;
-               }
+               result = -ENOMEM;
                tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL);
                if (!tinfo)
-                       goto outsocket;
+                       goto out;
                tinfo->ipvs = ipvs;
-               tinfo->sock = sock;
+               tinfo->sock = NULL;
                if (state == IP_VS_STATE_BACKUP) {
                        tinfo->buf = kmalloc(ipvs->bcfg.sync_maxlen,
                                             GFP_KERNEL);
                        if (!tinfo->buf)
-                               goto outtinfo;
+                               goto out;
                } else {
                        tinfo->buf = NULL;
                }
                tinfo->id = id;
+               if (state == IP_VS_STATE_MASTER)
+                       result = make_send_sock(ipvs, id, dev, &tinfo->sock);
+               else
+                       result = make_receive_sock(ipvs, id, dev, &tinfo->sock);
+               if (result < 0)
+                       goto out;
 
                task = kthread_run(threadfn, tinfo, name, ipvs->gen, id);
                if (IS_ERR(task)) {
                        result = PTR_ERR(task);
-                       goto outtinfo;
+                       goto out;
                }
                tinfo = NULL;
                if (state == IP_VS_STATE_MASTER)
@@ -1911,20 +1906,20 @@ int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c,
        ipvs->sync_state |= state;
        spin_unlock_bh(&ipvs->sync_buff_lock);
 
+       mutex_unlock(&ipvs->sync_mutex);
+       rtnl_unlock();
+
        /* increase the module use count */
        ip_vs_use_count_inc();
 
        return 0;
 
-outsocket:
-       sock_release(sock);
-
-outtinfo:
-       if (tinfo) {
-               sock_release(tinfo->sock);
-               kfree(tinfo->buf);
-               kfree(tinfo);
-       }
+out:
+       /* We do not need RTNL lock anymore, release it here so that
+        * sock_release below and in the kthreads can use rtnl_lock
+        * to leave the mcast group.
+        */
+       rtnl_unlock();
        count = id;
        while (count-- > 0) {
                if (state == IP_VS_STATE_MASTER)
@@ -1932,13 +1927,23 @@ int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c,
                else
                        kthread_stop(array[count]);
        }
-       kfree(array);
-
-out:
        if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) {
                kfree(ipvs->ms);
                ipvs->ms = NULL;
        }
+       mutex_unlock(&ipvs->sync_mutex);
+       if (tinfo) {
+               if (tinfo->sock)
+                       sock_release(tinfo->sock);
+               kfree(tinfo->buf);
+               kfree(tinfo);
+       }
+       kfree(array);
+       return result;
+
+out_early:
+       mutex_unlock(&ipvs->sync_mutex);
+       rtnl_unlock();
        return result;
 }
 
index 41ff04ee2554a31fa1a0ffd132f1f9268854cbfb..60544172700846a731786c318adbab3a1cb3bf35 100644 (file)
@@ -186,6 +186,7 @@ unsigned int nf_conntrack_htable_size __read_mostly;
 EXPORT_SYMBOL_GPL(nf_conntrack_htable_size);
 
 unsigned int nf_conntrack_max __read_mostly;
+EXPORT_SYMBOL_GPL(nf_conntrack_max);
 seqcount_t nf_conntrack_generation __read_mostly;
 static unsigned int nf_conntrack_hash_rnd __read_mostly;
 
index 8ef21d9f9a00d960ff3f26396b1736cfb1898d36..4b2b3d53acfcabfbe8fd4899f6ed825289387143 100644 (file)
@@ -252,7 +252,7 @@ static inline int expect_clash(const struct nf_conntrack_expect *a,
 static inline int expect_matches(const struct nf_conntrack_expect *a,
                                 const struct nf_conntrack_expect *b)
 {
-       return a->master == b->master && a->class == b->class &&
+       return a->master == b->master &&
               nf_ct_tuple_equal(&a->tuple, &b->tuple) &&
               nf_ct_tuple_mask_equal(&a->mask, &b->mask) &&
               net_eq(nf_ct_net(a->master), nf_ct_net(b->master)) &&
@@ -421,6 +421,9 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
        h = nf_ct_expect_dst_hash(net, &expect->tuple);
        hlist_for_each_entry_safe(i, next, &nf_ct_expect_hash[h], hnode) {
                if (expect_matches(i, expect)) {
+                       if (i->class != expect->class)
+                               return -EALREADY;
+
                        if (nf_ct_remove_expect(i))
                                break;
                } else if (expect_clash(i, expect)) {
index 9fe0ddc333fbb263d98e639b1cb869b24e0b619c..277bbfe26478bae483ce7f0543a4977402ca9040 100644 (file)
@@ -9,6 +9,7 @@
  *      2 of the License, or (at your option) any later version.
  */
 #include <linux/kernel.h>
+#include <linux/kmemleak.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/rcupdate.h>
@@ -71,6 +72,7 @@ void *nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp)
        rcu_read_unlock();
 
        alloc = max(newlen, NF_CT_EXT_PREALLOC);
+       kmemleak_not_leak(old);
        new = __krealloc(old, alloc, gfp);
        if (!new)
                return NULL;
index f0e9a7511e1ac4915ed9bc43492d214417b3c1bd..a11c304fb7713b36eacd21f421b72e4a4db1b9e7 100644 (file)
@@ -566,8 +566,7 @@ static const struct nf_conntrack_expect_policy ftp_exp_policy = {
        .timeout        = 5 * 60,
 };
 
-/* don't make this __exit, since it's called from __init ! */
-static void nf_conntrack_ftp_fini(void)
+static void __exit nf_conntrack_ftp_fini(void)
 {
        nf_conntrack_helpers_unregister(ftp, ports_c * 2);
        kfree(ftp_buffer);
index 5523acce9d6993dc71e467bbdf7b718ab2b25bf7..4099f4d79bae7b675df541547d70f9d793794c16 100644 (file)
@@ -232,8 +232,6 @@ static int help(struct sk_buff *skb, unsigned int protoff,
 static struct nf_conntrack_helper irc[MAX_PORTS] __read_mostly;
 static struct nf_conntrack_expect_policy irc_exp_policy;
 
-static void nf_conntrack_irc_fini(void);
-
 static int __init nf_conntrack_irc_init(void)
 {
        int i, ret;
@@ -276,9 +274,7 @@ static int __init nf_conntrack_irc_init(void)
        return 0;
 }
 
-/* This function is intentionally _NOT_ defined as __exit, because
- * it is needed by the init function */
-static void nf_conntrack_irc_fini(void)
+static void __exit nf_conntrack_irc_fini(void)
 {
        nf_conntrack_helpers_unregister(irc, ports_c);
        kfree(irc_buffer);
index 4c1d0c5bc26800a87c4af6f47942a61be633524a..d807b8770be3e27d1ae258086e8387a272720c60 100644 (file)
@@ -2205,6 +2205,9 @@ ctnetlink_stat_ct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
        if (nla_put_be32(skb, CTA_STATS_GLOBAL_ENTRIES, htonl(nr_conntracks)))
                goto nla_put_failure;
 
+       if (nla_put_be32(skb, CTA_STATS_GLOBAL_MAX_ENTRIES, htonl(nf_conntrack_max)))
+               goto nla_put_failure;
+
        nlmsg_end(skb, nlh);
        return skb->len;
 
index e97cdc1cf98c2618dffe838c4c7ef395e03ed8e0..8e67910185a05717628837d34c7e4fe829ca7424 100644 (file)
@@ -981,6 +981,17 @@ static int tcp_packet(struct nf_conn *ct,
                        return NF_ACCEPT; /* Don't change state */
                }
                break;
+       case TCP_CONNTRACK_SYN_SENT2:
+               /* tcp_conntracks table is not smart enough to handle
+                * simultaneous open.
+                */
+               ct->proto.tcp.last_flags |= IP_CT_TCP_SIMULTANEOUS_OPEN;
+               break;
+       case TCP_CONNTRACK_SYN_RECV:
+               if (dir == IP_CT_DIR_REPLY && index == TCP_ACK_SET &&
+                   ct->proto.tcp.last_flags & IP_CT_TCP_SIMULTANEOUS_OPEN)
+                       new_state = TCP_CONNTRACK_ESTABLISHED;
+               break;
        case TCP_CONNTRACK_CLOSE:
                if (index == TCP_RST_SET
                    && (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET)
index ae457f39d5ceb804aa60120c7d897dd441e49d2a..5072ff96ab3342e2183067d39333c161cd0b1848 100644 (file)
@@ -173,8 +173,7 @@ static const struct nf_conntrack_expect_policy sane_exp_policy = {
        .timeout        = 5 * 60,
 };
 
-/* don't make this __exit, since it's called from __init ! */
-static void nf_conntrack_sane_fini(void)
+static void __exit nf_conntrack_sane_fini(void)
 {
        nf_conntrack_helpers_unregister(sane, ports_c * 2);
        kfree(sane_buffer);
index 4dbb5bad4363ba9f67fea15160bb7b0b88ae9c38..c8d2b6688a2a082ca00adb68a6e033e2958221c5 100644 (file)
@@ -938,11 +938,19 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff,
                                       datalen, rtp_exp, rtcp_exp,
                                       mediaoff, medialen, daddr);
        else {
-               if (nf_ct_expect_related(rtp_exp) == 0) {
-                       if (nf_ct_expect_related(rtcp_exp) != 0)
-                               nf_ct_unexpect_related(rtp_exp);
-                       else
+               /* -EALREADY handling works around end-points that send
+                * SDP messages with identical port but different media type,
+                * we pretend expectation was set up.
+                */
+               int errp = nf_ct_expect_related(rtp_exp);
+
+               if (errp == 0 || errp == -EALREADY) {
+                       int errcp = nf_ct_expect_related(rtcp_exp);
+
+                       if (errcp == 0 || errcp == -EALREADY)
                                ret = NF_ACCEPT;
+                       else if (errp == 0)
+                               nf_ct_unexpect_related(rtp_exp);
                }
        }
        nf_ct_expect_put(rtcp_exp);
@@ -1609,7 +1617,7 @@ static const struct nf_conntrack_expect_policy sip_exp_policy[SIP_EXPECT_MAX + 1
        },
 };
 
-static void nf_conntrack_sip_fini(void)
+static void __exit nf_conntrack_sip_fini(void)
 {
        nf_conntrack_helpers_unregister(sip, ports_c * 4);
 }
index 0ec6779fd5d944406eb67ff6e3256698ec40d20c..548b673b3625cc55eec26bb096253b592e21c188 100644 (file)
@@ -104,7 +104,7 @@ static const struct nf_conntrack_expect_policy tftp_exp_policy = {
        .timeout        = 5 * 60,
 };
 
-static void nf_conntrack_tftp_fini(void)
+static void __exit nf_conntrack_tftp_fini(void)
 {
        nf_conntrack_helpers_unregister(tftp, ports_c * 2);
 }
similarity index 67%
rename from net/netfilter/nf_flow_table.c
rename to net/netfilter/nf_flow_table_core.c
index ec410cae93071cb70702f3618b97a6b3e19e47d8..eb0d1658ac0559d8d74444ac3bb7714662cbe506 100644 (file)
@@ -4,6 +4,8 @@
 #include <linux/netfilter.h>
 #include <linux/rhashtable.h>
 #include <linux/netdevice.h>
+#include <net/ip.h>
+#include <net/ip6_route.h>
 #include <net/netfilter/nf_tables.h>
 #include <net/netfilter/nf_flow_table.h>
 #include <net/netfilter/nf_conntrack.h>
@@ -16,6 +18,43 @@ struct flow_offload_entry {
        struct rcu_head         rcu_head;
 };
 
+static DEFINE_MUTEX(flowtable_lock);
+static LIST_HEAD(flowtables);
+
+static void
+flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
+                     struct nf_flow_route *route,
+                     enum flow_offload_tuple_dir dir)
+{
+       struct flow_offload_tuple *ft = &flow->tuplehash[dir].tuple;
+       struct nf_conntrack_tuple *ctt = &ct->tuplehash[dir].tuple;
+       struct dst_entry *dst = route->tuple[dir].dst;
+
+       ft->dir = dir;
+
+       switch (ctt->src.l3num) {
+       case NFPROTO_IPV4:
+               ft->src_v4 = ctt->src.u3.in;
+               ft->dst_v4 = ctt->dst.u3.in;
+               ft->mtu = ip_dst_mtu_maybe_forward(dst, true);
+               break;
+       case NFPROTO_IPV6:
+               ft->src_v6 = ctt->src.u3.in6;
+               ft->dst_v6 = ctt->dst.u3.in6;
+               ft->mtu = ip6_dst_mtu_forward(dst);
+               break;
+       }
+
+       ft->l3proto = ctt->src.l3num;
+       ft->l4proto = ctt->dst.protonum;
+       ft->src_port = ctt->src.u.tcp.port;
+       ft->dst_port = ctt->dst.u.tcp.port;
+
+       ft->iifidx = route->tuple[dir].ifindex;
+       ft->oifidx = route->tuple[!dir].ifindex;
+       ft->dst_cache = dst;
+}
+
 struct flow_offload *
 flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route)
 {
@@ -40,69 +79,12 @@ flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route)
 
        entry->ct = ct;
 
-       switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num) {
-       case NFPROTO_IPV4:
-               flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4 =
-                       ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.in;
-               flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4 =
-                       ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in;
-               flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4 =
-                       ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.in;
-               flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4 =
-                       ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.in;
-               break;
-       case NFPROTO_IPV6:
-               flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6 =
-                       ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.in6;
-               flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6 =
-                       ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in6;
-               flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6 =
-                       ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.in6;
-               flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6 =
-                       ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.in6;
-               break;
-       }
-
-       flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l3proto =
-               ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num;
-       flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto =
-               ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum;
-       flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.l3proto =
-               ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num;
-       flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.l4proto =
-               ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum;
-
-       flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache =
-                 route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst;
-       flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache =
-                 route->tuple[FLOW_OFFLOAD_DIR_REPLY].dst;
-
-       flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port =
-               ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.tcp.port;
-       flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port =
-               ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.tcp.port;
-       flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port =
-               ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u.tcp.port;
-       flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port =
-               ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port;
-
-       flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dir =
-                                               FLOW_OFFLOAD_DIR_ORIGINAL;
-       flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dir =
-                                               FLOW_OFFLOAD_DIR_REPLY;
-
-       flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.iifidx =
-               route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].ifindex;
-       flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.oifidx =
-               route->tuple[FLOW_OFFLOAD_DIR_REPLY].ifindex;
-       flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.iifidx =
-               route->tuple[FLOW_OFFLOAD_DIR_REPLY].ifindex;
-       flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.oifidx =
-               route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].ifindex;
+       flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_ORIGINAL);
+       flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_REPLY);
 
        if (ct->status & IPS_SRC_NAT)
                flow->flags |= FLOW_OFFLOAD_SNAT;
-       else if (ct->status & IPS_DST_NAT)
+       if (ct->status & IPS_DST_NAT)
                flow->flags |= FLOW_OFFLOAD_DNAT;
 
        return flow;
@@ -118,6 +100,43 @@ flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route)
 }
 EXPORT_SYMBOL_GPL(flow_offload_alloc);
 
+static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp)
+{
+       tcp->state = TCP_CONNTRACK_ESTABLISHED;
+       tcp->seen[0].td_maxwin = 0;
+       tcp->seen[1].td_maxwin = 0;
+}
+
+static void flow_offload_fixup_ct_state(struct nf_conn *ct)
+{
+       const struct nf_conntrack_l4proto *l4proto;
+       struct net *net = nf_ct_net(ct);
+       unsigned int *timeouts;
+       unsigned int timeout;
+       int l4num;
+
+       l4num = nf_ct_protonum(ct);
+       if (l4num == IPPROTO_TCP)
+               flow_offload_fixup_tcp(&ct->proto.tcp);
+
+       l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), l4num);
+       if (!l4proto)
+               return;
+
+       timeouts = l4proto->get_timeouts(net);
+       if (!timeouts)
+               return;
+
+       if (l4num == IPPROTO_TCP)
+               timeout = timeouts[TCP_CONNTRACK_ESTABLISHED];
+       else if (l4num == IPPROTO_UDP)
+               timeout = timeouts[UDP_CT_REPLIED];
+       else
+               return;
+
+       ct->timeout = nfct_time_stamp + timeout;
+}
+
 void flow_offload_free(struct flow_offload *flow)
 {
        struct flow_offload_entry *e;
@@ -125,17 +144,46 @@ void flow_offload_free(struct flow_offload *flow)
        dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache);
        dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache);
        e = container_of(flow, struct flow_offload_entry, flow);
-       nf_ct_delete(e->ct, 0, 0);
+       if (flow->flags & FLOW_OFFLOAD_DYING)
+               nf_ct_delete(e->ct, 0, 0);
        nf_ct_put(e->ct);
        kfree_rcu(e, rcu_head);
 }
 EXPORT_SYMBOL_GPL(flow_offload_free);
 
-void flow_offload_dead(struct flow_offload *flow)
+static u32 flow_offload_hash(const void *data, u32 len, u32 seed)
+{
+       const struct flow_offload_tuple *tuple = data;
+
+       return jhash(tuple, offsetof(struct flow_offload_tuple, dir), seed);
+}
+
+static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed)
+{
+       const struct flow_offload_tuple_rhash *tuplehash = data;
+
+       return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, dir), seed);
+}
+
+static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg,
+                                       const void *ptr)
 {
-       flow->flags |= FLOW_OFFLOAD_DYING;
+       const struct flow_offload_tuple *tuple = arg->key;
+       const struct flow_offload_tuple_rhash *x = ptr;
+
+       if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, dir)))
+               return 1;
+
+       return 0;
 }
-EXPORT_SYMBOL_GPL(flow_offload_dead);
+
+static const struct rhashtable_params nf_flow_offload_rhash_params = {
+       .head_offset            = offsetof(struct flow_offload_tuple_rhash, node),
+       .hashfn                 = flow_offload_hash,
+       .obj_hashfn             = flow_offload_hash_obj,
+       .obj_cmpfn              = flow_offload_hash_cmp,
+       .automatic_shrinking    = true,
+};
 
 int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
 {
@@ -143,10 +191,10 @@ int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
 
        rhashtable_insert_fast(&flow_table->rhashtable,
                               &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
-                              *flow_table->type->params);
+                              nf_flow_offload_rhash_params);
        rhashtable_insert_fast(&flow_table->rhashtable,
                               &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
-                              *flow_table->type->params);
+                              nf_flow_offload_rhash_params);
        return 0;
 }
 EXPORT_SYMBOL_GPL(flow_offload_add);
@@ -154,22 +202,51 @@ EXPORT_SYMBOL_GPL(flow_offload_add);
 static void flow_offload_del(struct nf_flowtable *flow_table,
                             struct flow_offload *flow)
 {
+       struct flow_offload_entry *e;
+
        rhashtable_remove_fast(&flow_table->rhashtable,
                               &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
-                              *flow_table->type->params);
+                              nf_flow_offload_rhash_params);
        rhashtable_remove_fast(&flow_table->rhashtable,
                               &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
-                              *flow_table->type->params);
+                              nf_flow_offload_rhash_params);
+
+       e = container_of(flow, struct flow_offload_entry, flow);
+       clear_bit(IPS_OFFLOAD_BIT, &e->ct->status);
 
        flow_offload_free(flow);
 }
 
+void flow_offload_teardown(struct flow_offload *flow)
+{
+       struct flow_offload_entry *e;
+
+       flow->flags |= FLOW_OFFLOAD_TEARDOWN;
+
+       e = container_of(flow, struct flow_offload_entry, flow);
+       flow_offload_fixup_ct_state(e->ct);
+}
+EXPORT_SYMBOL_GPL(flow_offload_teardown);
+
 struct flow_offload_tuple_rhash *
 flow_offload_lookup(struct nf_flowtable *flow_table,
                    struct flow_offload_tuple *tuple)
 {
-       return rhashtable_lookup_fast(&flow_table->rhashtable, tuple,
-                                     *flow_table->type->params);
+       struct flow_offload_tuple_rhash *tuplehash;
+       struct flow_offload *flow;
+       int dir;
+
+       tuplehash = rhashtable_lookup_fast(&flow_table->rhashtable, tuple,
+                                          nf_flow_offload_rhash_params);
+       if (!tuplehash)
+               return NULL;
+
+       dir = tuplehash->tuple.dir;
+       flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
+       if (flow->flags & (FLOW_OFFLOAD_DYING | FLOW_OFFLOAD_TEARDOWN))
+               return NULL;
+
+       return tuplehash;
 }
 EXPORT_SYMBOL_GPL(flow_offload_lookup);
 
@@ -216,11 +293,6 @@ static inline bool nf_flow_has_expired(const struct flow_offload *flow)
        return (__s32)(flow->timeout - (u32)jiffies) <= 0;
 }
 
-static inline bool nf_flow_is_dying(const struct flow_offload *flow)
-{
-       return flow->flags & FLOW_OFFLOAD_DYING;
-}
-
 static int nf_flow_offload_gc_step(struct nf_flowtable *flow_table)
 {
        struct flow_offload_tuple_rhash *tuplehash;
@@ -248,7 +320,8 @@ static int nf_flow_offload_gc_step(struct nf_flowtable *flow_table)
                flow = container_of(tuplehash, struct flow_offload, tuplehash[0]);
 
                if (nf_flow_has_expired(flow) ||
-                   nf_flow_is_dying(flow))
+                   (flow->flags & (FLOW_OFFLOAD_DYING |
+                                   FLOW_OFFLOAD_TEARDOWN)))
                        flow_offload_del(flow_table, flow);
        }
 out:
@@ -258,7 +331,7 @@ static int nf_flow_offload_gc_step(struct nf_flowtable *flow_table)
        return 1;
 }
 
-void nf_flow_offload_work_gc(struct work_struct *work)
+static void nf_flow_offload_work_gc(struct work_struct *work)
 {
        struct nf_flowtable *flow_table;
 
@@ -266,42 +339,6 @@ void nf_flow_offload_work_gc(struct work_struct *work)
        nf_flow_offload_gc_step(flow_table);
        queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ);
 }
-EXPORT_SYMBOL_GPL(nf_flow_offload_work_gc);
-
-static u32 flow_offload_hash(const void *data, u32 len, u32 seed)
-{
-       const struct flow_offload_tuple *tuple = data;
-
-       return jhash(tuple, offsetof(struct flow_offload_tuple, dir), seed);
-}
-
-static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed)
-{
-       const struct flow_offload_tuple_rhash *tuplehash = data;
-
-       return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, dir), seed);
-}
-
-static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg,
-                                       const void *ptr)
-{
-       const struct flow_offload_tuple *tuple = arg->key;
-       const struct flow_offload_tuple_rhash *x = ptr;
-
-       if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, dir)))
-               return 1;
-
-       return 0;
-}
-
-const struct rhashtable_params nf_flow_offload_rhash_params = {
-       .head_offset            = offsetof(struct flow_offload_tuple_rhash, node),
-       .hashfn                 = flow_offload_hash,
-       .obj_hashfn             = flow_offload_hash_obj,
-       .obj_cmpfn              = flow_offload_hash_cmp,
-       .automatic_shrinking    = true,
-};
-EXPORT_SYMBOL_GPL(nf_flow_offload_rhash_params);
 
 static int nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff,
                                __be16 port, __be16 new_port)
@@ -419,33 +456,69 @@ int nf_flow_dnat_port(const struct flow_offload *flow,
 }
 EXPORT_SYMBOL_GPL(nf_flow_dnat_port);
 
+int nf_flow_table_init(struct nf_flowtable *flowtable)
+{
+       int err;
+
+       INIT_DEFERRABLE_WORK(&flowtable->gc_work, nf_flow_offload_work_gc);
+
+       err = rhashtable_init(&flowtable->rhashtable,
+                             &nf_flow_offload_rhash_params);
+       if (err < 0)
+               return err;
+
+       queue_delayed_work(system_power_efficient_wq,
+                          &flowtable->gc_work, HZ);
+
+       mutex_lock(&flowtable_lock);
+       list_add(&flowtable->list, &flowtables);
+       mutex_unlock(&flowtable_lock);
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(nf_flow_table_init);
+
 static void nf_flow_table_do_cleanup(struct flow_offload *flow, void *data)
 {
        struct net_device *dev = data;
 
-       if (dev && flow->tuplehash[0].tuple.iifidx != dev->ifindex)
+       if (!dev) {
+               flow_offload_teardown(flow);
                return;
+       }
 
-       flow_offload_dead(flow);
+       if (flow->tuplehash[0].tuple.iifidx == dev->ifindex ||
+           flow->tuplehash[1].tuple.iifidx == dev->ifindex)
+               flow_offload_dead(flow);
 }
 
 static void nf_flow_table_iterate_cleanup(struct nf_flowtable *flowtable,
-                                         void *data)
+                                         struct net_device *dev)
 {
-       nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, data);
+       nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, dev);
        flush_delayed_work(&flowtable->gc_work);
 }
 
 void nf_flow_table_cleanup(struct net *net, struct net_device *dev)
 {
-       nft_flow_table_iterate(net, nf_flow_table_iterate_cleanup, dev);
+       struct nf_flowtable *flowtable;
+
+       mutex_lock(&flowtable_lock);
+       list_for_each_entry(flowtable, &flowtables, list)
+               nf_flow_table_iterate_cleanup(flowtable, dev);
+       mutex_unlock(&flowtable_lock);
 }
 EXPORT_SYMBOL_GPL(nf_flow_table_cleanup);
 
 void nf_flow_table_free(struct nf_flowtable *flow_table)
 {
+       mutex_lock(&flowtable_lock);
+       list_del(&flow_table->list);
+       mutex_unlock(&flowtable_lock);
+       cancel_delayed_work_sync(&flow_table->gc_work);
        nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL);
        WARN_ON(!nf_flow_offload_gc_step(flow_table));
+       rhashtable_destroy(&flow_table->rhashtable);
 }
 EXPORT_SYMBOL_GPL(nf_flow_table_free);
 
index 375a1881d93defc83b3a6b81dfb69dd949336710..99771aa7e7eabd4530d686413406e1de4af29549 100644 (file)
@@ -22,8 +22,7 @@ nf_flow_offload_inet_hook(void *priv, struct sk_buff *skb,
 
 static struct nf_flowtable_type flowtable_inet = {
        .family         = NFPROTO_INET,
-       .params         = &nf_flow_offload_rhash_params,
-       .gc             = nf_flow_offload_work_gc,
+       .init           = nf_flow_table_init,
        .free           = nf_flow_table_free,
        .hook           = nf_flow_offload_inet_hook,
        .owner          = THIS_MODULE,
diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
new file mode 100644 (file)
index 0000000..82451b7
--- /dev/null
@@ -0,0 +1,487 @@
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <linux/rhashtable.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/netdevice.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
+#include <net/neighbour.h>
+#include <net/netfilter/nf_flow_table.h>
+/* For layer 4 checksum field offset. */
+#include <linux/tcp.h>
+#include <linux/udp.h>
+
+static int nf_flow_state_check(struct flow_offload *flow, int proto,
+                              struct sk_buff *skb, unsigned int thoff)
+{
+       struct tcphdr *tcph;
+
+       if (proto != IPPROTO_TCP)
+               return 0;
+
+       if (!pskb_may_pull(skb, thoff + sizeof(*tcph)))
+               return -1;
+
+       tcph = (void *)(skb_network_header(skb) + thoff);
+       if (unlikely(tcph->fin || tcph->rst)) {
+               flow_offload_teardown(flow);
+               return -1;
+       }
+
+       return 0;
+}
+
+static int nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff,
+                             __be32 addr, __be32 new_addr)
+{
+       struct tcphdr *tcph;
+
+       if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
+           skb_try_make_writable(skb, thoff + sizeof(*tcph)))
+               return -1;
+
+       tcph = (void *)(skb_network_header(skb) + thoff);
+       inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, true);
+
+       return 0;
+}
+
+static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
+                             __be32 addr, __be32 new_addr)
+{
+       struct udphdr *udph;
+
+       if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
+           skb_try_make_writable(skb, thoff + sizeof(*udph)))
+               return -1;
+
+       udph = (void *)(skb_network_header(skb) + thoff);
+       if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
+               inet_proto_csum_replace4(&udph->check, skb, addr,
+                                        new_addr, true);
+               if (!udph->check)
+                       udph->check = CSUM_MANGLED_0;
+       }
+
+       return 0;
+}
+
+static int nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph,
+                                 unsigned int thoff, __be32 addr,
+                                 __be32 new_addr)
+{
+       switch (iph->protocol) {
+       case IPPROTO_TCP:
+               if (nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr) < 0)
+                       return NF_DROP;
+               break;
+       case IPPROTO_UDP:
+               if (nf_flow_nat_ip_udp(skb, thoff, addr, new_addr) < 0)
+                       return NF_DROP;
+               break;
+       }
+
+       return 0;
+}
+
+static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb,
+                          struct iphdr *iph, unsigned int thoff,
+                          enum flow_offload_tuple_dir dir)
+{
+       __be32 addr, new_addr;
+
+       switch (dir) {
+       case FLOW_OFFLOAD_DIR_ORIGINAL:
+               addr = iph->saddr;
+               new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr;
+               iph->saddr = new_addr;
+               break;
+       case FLOW_OFFLOAD_DIR_REPLY:
+               addr = iph->daddr;
+               new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
+               iph->daddr = new_addr;
+               break;
+       default:
+               return -1;
+       }
+       csum_replace4(&iph->check, addr, new_addr);
+
+       return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
+}
+
+static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb,
+                          struct iphdr *iph, unsigned int thoff,
+                          enum flow_offload_tuple_dir dir)
+{
+       __be32 addr, new_addr;
+
+       switch (dir) {
+       case FLOW_OFFLOAD_DIR_ORIGINAL:
+               addr = iph->daddr;
+               new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr;
+               iph->daddr = new_addr;
+               break;
+       case FLOW_OFFLOAD_DIR_REPLY:
+               addr = iph->saddr;
+               new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
+               iph->saddr = new_addr;
+               break;
+       default:
+               return -1;
+       }
+       csum_replace4(&iph->check, addr, new_addr);
+
+       return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
+}
+
+static int nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
+                         unsigned int thoff, enum flow_offload_tuple_dir dir)
+{
+       struct iphdr *iph = ip_hdr(skb);
+
+       if (flow->flags & FLOW_OFFLOAD_SNAT &&
+           (nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
+            nf_flow_snat_ip(flow, skb, iph, thoff, dir) < 0))
+               return -1;
+       if (flow->flags & FLOW_OFFLOAD_DNAT &&
+           (nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
+            nf_flow_dnat_ip(flow, skb, iph, thoff, dir) < 0))
+               return -1;
+
+       return 0;
+}
+
+static bool ip_has_options(unsigned int thoff)
+{
+       return thoff != sizeof(struct iphdr);
+}
+
+static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
+                           struct flow_offload_tuple *tuple)
+{
+       struct flow_ports *ports;
+       unsigned int thoff;
+       struct iphdr *iph;
+
+       if (!pskb_may_pull(skb, sizeof(*iph)))
+               return -1;
+
+       iph = ip_hdr(skb);
+       thoff = iph->ihl * 4;
+
+       if (ip_is_fragment(iph) ||
+           unlikely(ip_has_options(thoff)))
+               return -1;
+
+       if (iph->protocol != IPPROTO_TCP &&
+           iph->protocol != IPPROTO_UDP)
+               return -1;
+
+       thoff = iph->ihl * 4;
+       if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
+               return -1;
+
+       ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
+
+       tuple->src_v4.s_addr    = iph->saddr;
+       tuple->dst_v4.s_addr    = iph->daddr;
+       tuple->src_port         = ports->source;
+       tuple->dst_port         = ports->dest;
+       tuple->l3proto          = AF_INET;
+       tuple->l4proto          = iph->protocol;
+       tuple->iifidx           = dev->ifindex;
+
+       return 0;
+}
+
+/* Based on ip_exceeds_mtu(). */
+static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
+{
+       if (skb->len <= mtu)
+               return false;
+
+       if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
+               return false;
+
+       return true;
+}
+
+unsigned int
+nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
+                       const struct nf_hook_state *state)
+{
+       struct flow_offload_tuple_rhash *tuplehash;
+       struct nf_flowtable *flow_table = priv;
+       struct flow_offload_tuple tuple = {};
+       enum flow_offload_tuple_dir dir;
+       struct flow_offload *flow;
+       struct net_device *outdev;
+       const struct rtable *rt;
+       unsigned int thoff;
+       struct iphdr *iph;
+       __be32 nexthop;
+
+       if (skb->protocol != htons(ETH_P_IP))
+               return NF_ACCEPT;
+
+       if (nf_flow_tuple_ip(skb, state->in, &tuple) < 0)
+               return NF_ACCEPT;
+
+       tuplehash = flow_offload_lookup(flow_table, &tuple);
+       if (tuplehash == NULL)
+               return NF_ACCEPT;
+
+       outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx);
+       if (!outdev)
+               return NF_ACCEPT;
+
+       dir = tuplehash->tuple.dir;
+       flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
+       rt = (const struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
+
+       if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)) &&
+           (ip_hdr(skb)->frag_off & htons(IP_DF)) != 0)
+               return NF_ACCEPT;
+
+       if (skb_try_make_writable(skb, sizeof(*iph)))
+               return NF_DROP;
+
+       thoff = ip_hdr(skb)->ihl * 4;
+       if (nf_flow_state_check(flow, ip_hdr(skb)->protocol, skb, thoff))
+               return NF_ACCEPT;
+
+       if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) &&
+           nf_flow_nat_ip(flow, skb, thoff, dir) < 0)
+               return NF_DROP;
+
+       flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
+       iph = ip_hdr(skb);
+       ip_decrease_ttl(iph);
+
+       skb->dev = outdev;
+       nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
+       neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
+
+       return NF_STOLEN;
+}
+EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook);
+
+static int nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff,
+                               struct in6_addr *addr,
+                               struct in6_addr *new_addr)
+{
+       struct tcphdr *tcph;
+
+       if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
+           skb_try_make_writable(skb, thoff + sizeof(*tcph)))
+               return -1;
+
+       tcph = (void *)(skb_network_header(skb) + thoff);
+       inet_proto_csum_replace16(&tcph->check, skb, addr->s6_addr32,
+                                 new_addr->s6_addr32, true);
+
+       return 0;
+}
+
+static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
+                               struct in6_addr *addr,
+                               struct in6_addr *new_addr)
+{
+       struct udphdr *udph;
+
+       if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
+           skb_try_make_writable(skb, thoff + sizeof(*udph)))
+               return -1;
+
+       udph = (void *)(skb_network_header(skb) + thoff);
+       if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
+               inet_proto_csum_replace16(&udph->check, skb, addr->s6_addr32,
+                                         new_addr->s6_addr32, true);
+               if (!udph->check)
+                       udph->check = CSUM_MANGLED_0;
+       }
+
+       return 0;
+}
+
+static int nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h,
+                                   unsigned int thoff, struct in6_addr *addr,
+                                   struct in6_addr *new_addr)
+{
+       switch (ip6h->nexthdr) {
+       case IPPROTO_TCP:
+               if (nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr) < 0)
+                       return NF_DROP;
+               break;
+       case IPPROTO_UDP:
+               if (nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr) < 0)
+                       return NF_DROP;
+               break;
+       }
+
+       return 0;
+}
+
+static int nf_flow_snat_ipv6(const struct flow_offload *flow,
+                            struct sk_buff *skb, struct ipv6hdr *ip6h,
+                            unsigned int thoff,
+                            enum flow_offload_tuple_dir dir)
+{
+       struct in6_addr addr, new_addr;
+
+       switch (dir) {
+       case FLOW_OFFLOAD_DIR_ORIGINAL:
+               addr = ip6h->saddr;
+               new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6;
+               ip6h->saddr = new_addr;
+               break;
+       case FLOW_OFFLOAD_DIR_REPLY:
+               addr = ip6h->daddr;
+               new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6;
+               ip6h->daddr = new_addr;
+               break;
+       default:
+               return -1;
+       }
+
+       return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
+}
+
+static int nf_flow_dnat_ipv6(const struct flow_offload *flow,
+                            struct sk_buff *skb, struct ipv6hdr *ip6h,
+                            unsigned int thoff,
+                            enum flow_offload_tuple_dir dir)
+{
+       struct in6_addr addr, new_addr;
+
+       switch (dir) {
+       case FLOW_OFFLOAD_DIR_ORIGINAL:
+               addr = ip6h->daddr;
+               new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6;
+               ip6h->daddr = new_addr;
+               break;
+       case FLOW_OFFLOAD_DIR_REPLY:
+               addr = ip6h->saddr;
+               new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6;
+               ip6h->saddr = new_addr;
+               break;
+       default:
+               return -1;
+       }
+
+       return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
+}
+
+static int nf_flow_nat_ipv6(const struct flow_offload *flow,
+                           struct sk_buff *skb,
+                           enum flow_offload_tuple_dir dir)
+{
+       struct ipv6hdr *ip6h = ipv6_hdr(skb);
+       unsigned int thoff = sizeof(*ip6h);
+
+       if (flow->flags & FLOW_OFFLOAD_SNAT &&
+           (nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
+            nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
+               return -1;
+       if (flow->flags & FLOW_OFFLOAD_DNAT &&
+           (nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
+            nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
+               return -1;
+
+       return 0;
+}
+
+static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
+                             struct flow_offload_tuple *tuple)
+{
+       struct flow_ports *ports;
+       struct ipv6hdr *ip6h;
+       unsigned int thoff;
+
+       if (!pskb_may_pull(skb, sizeof(*ip6h)))
+               return -1;
+
+       ip6h = ipv6_hdr(skb);
+
+       if (ip6h->nexthdr != IPPROTO_TCP &&
+           ip6h->nexthdr != IPPROTO_UDP)
+               return -1;
+
+       thoff = sizeof(*ip6h);
+       if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
+               return -1;
+
+       ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
+
+       tuple->src_v6           = ip6h->saddr;
+       tuple->dst_v6           = ip6h->daddr;
+       tuple->src_port         = ports->source;
+       tuple->dst_port         = ports->dest;
+       tuple->l3proto          = AF_INET6;
+       tuple->l4proto          = ip6h->nexthdr;
+       tuple->iifidx           = dev->ifindex;
+
+       return 0;
+}
+
+unsigned int
+nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
+                         const struct nf_hook_state *state)
+{
+       struct flow_offload_tuple_rhash *tuplehash;
+       struct nf_flowtable *flow_table = priv;
+       struct flow_offload_tuple tuple = {};
+       enum flow_offload_tuple_dir dir;
+       struct flow_offload *flow;
+       struct net_device *outdev;
+       struct in6_addr *nexthop;
+       struct ipv6hdr *ip6h;
+       struct rt6_info *rt;
+
+       if (skb->protocol != htons(ETH_P_IPV6))
+               return NF_ACCEPT;
+
+       if (nf_flow_tuple_ipv6(skb, state->in, &tuple) < 0)
+               return NF_ACCEPT;
+
+       tuplehash = flow_offload_lookup(flow_table, &tuple);
+       if (tuplehash == NULL)
+               return NF_ACCEPT;
+
+       outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx);
+       if (!outdev)
+               return NF_ACCEPT;
+
+       dir = tuplehash->tuple.dir;
+       flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
+       rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst_cache;
+
+       if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
+               return NF_ACCEPT;
+
+       if (nf_flow_state_check(flow, ipv6_hdr(skb)->nexthdr, skb,
+                               sizeof(*ip6h)))
+               return NF_ACCEPT;
+
+       if (skb_try_make_writable(skb, sizeof(*ip6h)))
+               return NF_DROP;
+
+       if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) &&
+           nf_flow_nat_ipv6(flow, skb, dir) < 0)
+               return NF_DROP;
+
+       flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
+       ip6h = ipv6_hdr(skb);
+       ip6h->hop_limit--;
+
+       skb->dev = outdev;
+       nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
+       neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
+
+       return NF_STOLEN;
+}
+EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook);
index 617693ff9f4cbdc620046c220840a87d5b5e5046..37b3c9913b08e16918b239cf7c3decd8258324ed 100644 (file)
@@ -157,7 +157,7 @@ EXPORT_SYMBOL(nf_nat_used_tuple);
 static int in_range(const struct nf_nat_l3proto *l3proto,
                    const struct nf_nat_l4proto *l4proto,
                    const struct nf_conntrack_tuple *tuple,
-                   const struct nf_nat_range *range)
+                   const struct nf_nat_range2 *range)
 {
        /* If we are supposed to map IPs, then we must be in the
         * range specified, otherwise let this drag us onto a new src IP.
@@ -194,7 +194,7 @@ find_appropriate_src(struct net *net,
                     const struct nf_nat_l4proto *l4proto,
                     const struct nf_conntrack_tuple *tuple,
                     struct nf_conntrack_tuple *result,
-                    const struct nf_nat_range *range)
+                    const struct nf_nat_range2 *range)
 {
        unsigned int h = hash_by_src(net, tuple);
        const struct nf_conn *ct;
@@ -224,7 +224,7 @@ find_appropriate_src(struct net *net,
 static void
 find_best_ips_proto(const struct nf_conntrack_zone *zone,
                    struct nf_conntrack_tuple *tuple,
-                   const struct nf_nat_range *range,
+                   const struct nf_nat_range2 *range,
                    const struct nf_conn *ct,
                    enum nf_nat_manip_type maniptype)
 {
@@ -298,7 +298,7 @@ find_best_ips_proto(const struct nf_conntrack_zone *zone,
 static void
 get_unique_tuple(struct nf_conntrack_tuple *tuple,
                 const struct nf_conntrack_tuple *orig_tuple,
-                const struct nf_nat_range *range,
+                const struct nf_nat_range2 *range,
                 struct nf_conn *ct,
                 enum nf_nat_manip_type maniptype)
 {
@@ -349,9 +349,10 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
        /* Only bother mapping if it's not already in range and unique */
        if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) {
                if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) {
-                       if (l4proto->in_range(tuple, maniptype,
-                                             &range->min_proto,
-                                             &range->max_proto) &&
+                       if (!(range->flags & NF_NAT_RANGE_PROTO_OFFSET) &&
+                           l4proto->in_range(tuple, maniptype,
+                                 &range->min_proto,
+                                 &range->max_proto) &&
                            (range->min_proto.all == range->max_proto.all ||
                             !nf_nat_used_tuple(tuple, ct)))
                                goto out;
@@ -360,7 +361,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
                }
        }
 
-       /* Last change: get protocol to try to obtain unique tuple. */
+       /* Last chance: get protocol to try to obtain unique tuple. */
        l4proto->unique_tuple(l3proto, tuple, range, maniptype, ct);
 out:
        rcu_read_unlock();
@@ -381,7 +382,7 @@ EXPORT_SYMBOL_GPL(nf_ct_nat_ext_add);
 
 unsigned int
 nf_nat_setup_info(struct nf_conn *ct,
-                 const struct nf_nat_range *range,
+                 const struct nf_nat_range2 *range,
                  enum nf_nat_manip_type maniptype)
 {
        struct net *net = nf_ct_net(ct);
@@ -459,7 +460,7 @@ __nf_nat_alloc_null_binding(struct nf_conn *ct, enum nf_nat_manip_type manip)
                (manip == NF_NAT_MANIP_SRC ?
                ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3 :
                ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3);
-       struct nf_nat_range range = {
+       struct nf_nat_range2 range = {
                .flags          = NF_NAT_RANGE_MAP_IPS,
                .min_addr       = ip,
                .max_addr       = ip,
@@ -702,7 +703,7 @@ static const struct nla_policy protonat_nla_policy[CTA_PROTONAT_MAX+1] = {
 
 static int nfnetlink_parse_nat_proto(struct nlattr *attr,
                                     const struct nf_conn *ct,
-                                    struct nf_nat_range *range)
+                                    struct nf_nat_range2 *range)
 {
        struct nlattr *tb[CTA_PROTONAT_MAX+1];
        const struct nf_nat_l4proto *l4proto;
@@ -730,7 +731,7 @@ static const struct nla_policy nat_nla_policy[CTA_NAT_MAX+1] = {
 
 static int
 nfnetlink_parse_nat(const struct nlattr *nat,
-                   const struct nf_conn *ct, struct nf_nat_range *range,
+                   const struct nf_conn *ct, struct nf_nat_range2 *range,
                    const struct nf_nat_l3proto *l3proto)
 {
        struct nlattr *tb[CTA_NAT_MAX+1];
@@ -758,7 +759,7 @@ nfnetlink_parse_nat_setup(struct nf_conn *ct,
                          enum nf_nat_manip_type manip,
                          const struct nlattr *attr)
 {
-       struct nf_nat_range range;
+       struct nf_nat_range2 range;
        const struct nf_nat_l3proto *l3proto;
        int err;
 
index 607a373379b40cf5cef2d4bba1bfefa58a8dfa33..99606baedda4903dc4fa360ac63d28fad1109b7e 100644 (file)
@@ -191,7 +191,7 @@ EXPORT_SYMBOL(nf_nat_mangle_udp_packet);
 void nf_nat_follow_master(struct nf_conn *ct,
                          struct nf_conntrack_expect *exp)
 {
-       struct nf_nat_range range;
+       struct nf_nat_range2 range;
 
        /* This must be a fresh one. */
        BUG_ON(ct->status & IPS_NAT_DONE_MASK);
index 7d7466dbf66338f817bb6698b9dbd637de26d3ed..5d849d835561777f45dc70050509095a08be9cac 100644 (file)
@@ -36,7 +36,7 @@ EXPORT_SYMBOL_GPL(nf_nat_l4proto_in_range);
 
 void nf_nat_l4proto_unique_tuple(const struct nf_nat_l3proto *l3proto,
                                 struct nf_conntrack_tuple *tuple,
-                                const struct nf_nat_range *range,
+                                const struct nf_nat_range2 *range,
                                 enum nf_nat_manip_type maniptype,
                                 const struct nf_conn *ct,
                                 u16 *rover)
@@ -83,6 +83,8 @@ void nf_nat_l4proto_unique_tuple(const struct nf_nat_l3proto *l3proto,
                                                  : tuple->src.u.all);
        } else if (range->flags & NF_NAT_RANGE_PROTO_RANDOM_FULLY) {
                off = prandom_u32();
+       } else if (range->flags & NF_NAT_RANGE_PROTO_OFFSET) {
+               off = (ntohs(*portptr) - ntohs(range->base_proto.all));
        } else {
                off = *rover;
        }
@@ -91,7 +93,8 @@ void nf_nat_l4proto_unique_tuple(const struct nf_nat_l3proto *l3proto,
                *portptr = htons(min + off % range_size);
                if (++i != range_size && nf_nat_used_tuple(tuple, ct))
                        continue;
-               if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL))
+               if (!(range->flags & (NF_NAT_RANGE_PROTO_RANDOM_ALL|
+                                       NF_NAT_RANGE_PROTO_OFFSET)))
                        *rover = off;
                return;
        }
@@ -100,7 +103,7 @@ EXPORT_SYMBOL_GPL(nf_nat_l4proto_unique_tuple);
 
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
 int nf_nat_l4proto_nlattr_to_range(struct nlattr *tb[],
-                                  struct nf_nat_range *range)
+                                  struct nf_nat_range2 *range)
 {
        if (tb[CTA_PROTONAT_PORT_MIN]) {
                range->min_proto.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MIN]);
index 269fcd5dc34c495104494ea2890dcc75044d54b6..67ea0d83aa5a8a2399b81628e59b395b598b5267 100644 (file)
@@ -23,7 +23,7 @@ static u_int16_t dccp_port_rover;
 static void
 dccp_unique_tuple(const struct nf_nat_l3proto *l3proto,
                  struct nf_conntrack_tuple *tuple,
-                 const struct nf_nat_range *range,
+                 const struct nf_nat_range2 *range,
                  enum nf_nat_manip_type maniptype,
                  const struct nf_conn *ct)
 {
index c57ee3240b1d5e848077c61f37c72a40ed0d1afd..1c5d9b65fbbabb9ce887a1bebbc0f55ee6aeb831 100644 (file)
@@ -17,7 +17,7 @@ static u_int16_t nf_sctp_port_rover;
 static void
 sctp_unique_tuple(const struct nf_nat_l3proto *l3proto,
                  struct nf_conntrack_tuple *tuple,
-                 const struct nf_nat_range *range,
+                 const struct nf_nat_range2 *range,
                  enum nf_nat_manip_type maniptype,
                  const struct nf_conn *ct)
 {
index 4f8820fc514804d775274330f590fe0d1dbab54f..f15fcd475f98783c1b610e3b3b1e35aac4b7c832 100644 (file)
@@ -23,7 +23,7 @@ static u16 tcp_port_rover;
 static void
 tcp_unique_tuple(const struct nf_nat_l3proto *l3proto,
                 struct nf_conntrack_tuple *tuple,
-                const struct nf_nat_range *range,
+                const struct nf_nat_range2 *range,
                 enum nf_nat_manip_type maniptype,
                 const struct nf_conn *ct)
 {
index edd4a77dc09a837e71e4322d328033ee0af90ee9..5790f70a83b28154490a2a774f8a0f7288a38414 100644 (file)
@@ -22,7 +22,7 @@ static u16 udp_port_rover;
 static void
 udp_unique_tuple(const struct nf_nat_l3proto *l3proto,
                 struct nf_conntrack_tuple *tuple,
-                const struct nf_nat_range *range,
+                const struct nf_nat_range2 *range,
                 enum nf_nat_manip_type maniptype,
                 const struct nf_conn *ct)
 {
@@ -100,7 +100,7 @@ static bool udplite_manip_pkt(struct sk_buff *skb,
 static void
 udplite_unique_tuple(const struct nf_nat_l3proto *l3proto,
                     struct nf_conntrack_tuple *tuple,
-                    const struct nf_nat_range *range,
+                    const struct nf_nat_range2 *range,
                     enum nf_nat_manip_type maniptype,
                     const struct nf_conn *ct)
 {
index 6e494d5844128077dadc9f1cf1c44f2f039043f0..c5db3e251232b092c6cd55a3beb7a8b9b1e8a062 100644 (file)
@@ -27,7 +27,7 @@ static bool unknown_in_range(const struct nf_conntrack_tuple *tuple,
 
 static void unknown_unique_tuple(const struct nf_nat_l3proto *l3proto,
                                 struct nf_conntrack_tuple *tuple,
-                                const struct nf_nat_range *range,
+                                const struct nf_nat_range2 *range,
                                 enum nf_nat_manip_type maniptype,
                                 const struct nf_conn *ct)
 {
index 25b06b959118d287d574ae649dae4a863aab6f13..7c4bb0a773ca2237670bd24469ac18b441761c63 100644 (file)
@@ -36,7 +36,7 @@ nf_nat_redirect_ipv4(struct sk_buff *skb,
        struct nf_conn *ct;
        enum ip_conntrack_info ctinfo;
        __be32 newdst;
-       struct nf_nat_range newrange;
+       struct nf_nat_range2 newrange;
 
        WARN_ON(hooknum != NF_INET_PRE_ROUTING &&
                hooknum != NF_INET_LOCAL_OUT);
@@ -82,10 +82,10 @@ EXPORT_SYMBOL_GPL(nf_nat_redirect_ipv4);
 static const struct in6_addr loopback_addr = IN6ADDR_LOOPBACK_INIT;
 
 unsigned int
-nf_nat_redirect_ipv6(struct sk_buff *skb, const struct nf_nat_range *range,
+nf_nat_redirect_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
                     unsigned int hooknum)
 {
-       struct nf_nat_range newrange;
+       struct nf_nat_range2 newrange;
        struct in6_addr newdst;
        enum ip_conntrack_info ctinfo;
        struct nf_conn *ct;
index 791fac4fd74534e0481d409a182c2d5e5544deac..1f30860749817c2be1c8e4c65e3b6f29b851ac24 100644 (file)
@@ -316,7 +316,7 @@ static void nf_nat_sip_seq_adjust(struct sk_buff *skb, unsigned int protoff,
 static void nf_nat_sip_expected(struct nf_conn *ct,
                                struct nf_conntrack_expect *exp)
 {
-       struct nf_nat_range range;
+       struct nf_nat_range2 range;
 
        /* This must be a fresh one. */
        BUG_ON(ct->status & IPS_NAT_DONE_MASK);
diff --git a/net/netfilter/nf_osf.c b/net/netfilter/nf_osf.c
new file mode 100644 (file)
index 0000000..5ba5c7b
--- /dev/null
@@ -0,0 +1,218 @@
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/module.h>
+#include <linux/kernel.h>
+
+#include <linux/capability.h>
+#include <linux/if.h>
+#include <linux/inetdevice.h>
+#include <linux/ip.h>
+#include <linux/list.h>
+#include <linux/rculist.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <linux/tcp.h>
+
+#include <net/ip.h>
+#include <net/tcp.h>
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/x_tables.h>
+#include <net/netfilter/nf_log.h>
+#include <linux/netfilter/nf_osf.h>
+
+static inline int nf_osf_ttl(const struct sk_buff *skb,
+                            const struct nf_osf_info *info,
+                            unsigned char f_ttl)
+{
+       const struct iphdr *ip = ip_hdr(skb);
+
+       if (info->flags & NF_OSF_TTL) {
+               if (info->ttl == NF_OSF_TTL_TRUE)
+                       return ip->ttl == f_ttl;
+               if (info->ttl == NF_OSF_TTL_NOCHECK)
+                       return 1;
+               else if (ip->ttl <= f_ttl)
+                       return 1;
+               else {
+                       struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
+                       int ret = 0;
+
+                       for_ifa(in_dev) {
+                               if (inet_ifa_match(ip->saddr, ifa)) {
+                                       ret = (ip->ttl == f_ttl);
+                                       break;
+                               }
+                       }
+                       endfor_ifa(in_dev);
+
+                       return ret;
+               }
+       }
+
+       return ip->ttl == f_ttl;
+}
+
+bool
+nf_osf_match(const struct sk_buff *skb, u_int8_t family,
+            int hooknum, struct net_device *in, struct net_device *out,
+            const struct nf_osf_info *info, struct net *net,
+            const struct list_head *nf_osf_fingers)
+{
+       const unsigned char *optp = NULL, *_optp = NULL;
+       unsigned int optsize = 0, check_WSS = 0;
+       int fmatch = FMATCH_WRONG, fcount = 0;
+       const struct iphdr *ip = ip_hdr(skb);
+       const struct nf_osf_user_finger *f;
+       unsigned char opts[MAX_IPOPTLEN];
+       const struct nf_osf_finger *kf;
+       u16 window, totlen, mss = 0;
+       const struct tcphdr *tcp;
+       struct tcphdr _tcph;
+       bool df;
+
+       tcp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(struct tcphdr), &_tcph);
+       if (!tcp)
+               return false;
+
+       if (!tcp->syn)
+               return false;
+
+       totlen = ntohs(ip->tot_len);
+       df = ntohs(ip->frag_off) & IP_DF;
+       window = ntohs(tcp->window);
+
+       if (tcp->doff * 4 > sizeof(struct tcphdr)) {
+               optsize = tcp->doff * 4 - sizeof(struct tcphdr);
+
+               _optp = optp = skb_header_pointer(skb, ip_hdrlen(skb) +
+                               sizeof(struct tcphdr), optsize, opts);
+       }
+
+       list_for_each_entry_rcu(kf, &nf_osf_fingers[df], finger_entry) {
+               int foptsize, optnum;
+
+               f = &kf->finger;
+
+               if (!(info->flags & NF_OSF_LOG) && strcmp(info->genre, f->genre))
+                       continue;
+
+               optp = _optp;
+               fmatch = FMATCH_WRONG;
+
+               if (totlen != f->ss || !nf_osf_ttl(skb, info, f->ttl))
+                       continue;
+
+               /*
+                * Should not happen if userspace parser was written correctly.
+                */
+               if (f->wss.wc >= OSF_WSS_MAX)
+                       continue;
+
+               /* Check options */
+
+               foptsize = 0;
+               for (optnum = 0; optnum < f->opt_num; ++optnum)
+                       foptsize += f->opt[optnum].length;
+
+               if (foptsize > MAX_IPOPTLEN ||
+                   optsize > MAX_IPOPTLEN ||
+                   optsize != foptsize)
+                       continue;
+
+               check_WSS = f->wss.wc;
+
+               for (optnum = 0; optnum < f->opt_num; ++optnum) {
+                       if (f->opt[optnum].kind == (*optp)) {
+                               __u32 len = f->opt[optnum].length;
+                               const __u8 *optend = optp + len;
+
+                               fmatch = FMATCH_OK;
+
+                               switch (*optp) {
+                               case OSFOPT_MSS:
+                                       mss = optp[3];
+                                       mss <<= 8;
+                                       mss |= optp[2];
+
+                                       mss = ntohs((__force __be16)mss);
+                                       break;
+                               case OSFOPT_TS:
+                                       break;
+                               }
+
+                               optp = optend;
+                       } else
+                               fmatch = FMATCH_OPT_WRONG;
+
+                       if (fmatch != FMATCH_OK)
+                               break;
+               }
+
+               if (fmatch != FMATCH_OPT_WRONG) {
+                       fmatch = FMATCH_WRONG;
+
+                       switch (check_WSS) {
+                       case OSF_WSS_PLAIN:
+                               if (f->wss.val == 0 || window == f->wss.val)
+                                       fmatch = FMATCH_OK;
+                               break;
+                       case OSF_WSS_MSS:
+                               /*
+                                * Some smart modems decrease mangle MSS to
+                                * SMART_MSS_2, so we check standard, decreased
+                                * and the one provided in the fingerprint MSS
+                                * values.
+                                */
+#define SMART_MSS_1    1460
+#define SMART_MSS_2    1448
+                               if (window == f->wss.val * mss ||
+                                   window == f->wss.val * SMART_MSS_1 ||
+                                   window == f->wss.val * SMART_MSS_2)
+                                       fmatch = FMATCH_OK;
+                               break;
+                       case OSF_WSS_MTU:
+                               if (window == f->wss.val * (mss + 40) ||
+                                   window == f->wss.val * (SMART_MSS_1 + 40) ||
+                                   window == f->wss.val * (SMART_MSS_2 + 40))
+                                       fmatch = FMATCH_OK;
+                               break;
+                       case OSF_WSS_MODULO:
+                               if ((window % f->wss.val) == 0)
+                                       fmatch = FMATCH_OK;
+                               break;
+                       }
+               }
+
+               if (fmatch != FMATCH_OK)
+                       continue;
+
+               fcount++;
+
+               if (info->flags & NF_OSF_LOG)
+                       nf_log_packet(net, family, hooknum, skb,
+                                     in, out, NULL,
+                                     "%s [%s:%s] : %pI4:%d -> %pI4:%d hops=%d\n",
+                                     f->genre, f->version, f->subtype,
+                                     &ip->saddr, ntohs(tcp->source),
+                                     &ip->daddr, ntohs(tcp->dest),
+                                     f->ttl - ip->ttl);
+
+               if ((info->flags & NF_OSF_LOG) &&
+                   info->loglevel == NF_OSF_LOGLEVEL_FIRST)
+                       break;
+       }
+
+       if (!fcount && (info->flags & NF_OSF_LOG))
+               nf_log_packet(net, family, hooknum, skb, in, out, NULL,
+                             "Remote OS is not known: %pI4:%u -> %pI4:%u\n",
+                             &ip->saddr, ntohs(tcp->source),
+                             &ip->daddr, ntohs(tcp->dest));
+
+       if (fcount)
+               fmatch = FMATCH_OK;
+
+       return fmatch == FMATCH_OK;
+}
+EXPORT_SYMBOL_GPL(nf_osf_match);
+
+MODULE_LICENSE("GPL");
index 9134cc429ad485aa86301019f50fb4f5d2172b52..a5f3743fda659f3c1dea3806ae1dc798a9a99933 100644 (file)
@@ -214,6 +214,34 @@ static int nft_delchain(struct nft_ctx *ctx)
        return err;
 }
 
+static void nft_rule_expr_activate(const struct nft_ctx *ctx,
+                                  struct nft_rule *rule)
+{
+       struct nft_expr *expr;
+
+       expr = nft_expr_first(rule);
+       while (expr != nft_expr_last(rule) && expr->ops) {
+               if (expr->ops->activate)
+                       expr->ops->activate(ctx, expr);
+
+               expr = nft_expr_next(expr);
+       }
+}
+
+static void nft_rule_expr_deactivate(const struct nft_ctx *ctx,
+                                    struct nft_rule *rule)
+{
+       struct nft_expr *expr;
+
+       expr = nft_expr_first(rule);
+       while (expr != nft_expr_last(rule) && expr->ops) {
+               if (expr->ops->deactivate)
+                       expr->ops->deactivate(ctx, expr);
+
+               expr = nft_expr_next(expr);
+       }
+}
+
 static int
 nf_tables_delrule_deactivate(struct nft_ctx *ctx, struct nft_rule *rule)
 {
@@ -259,6 +287,7 @@ static int nft_delrule(struct nft_ctx *ctx, struct nft_rule *rule)
                nft_trans_destroy(trans);
                return err;
        }
+       nft_rule_expr_deactivate(ctx, rule);
 
        return 0;
 }
@@ -386,13 +415,17 @@ static struct nft_table *nft_table_lookup(const struct net *net,
 {
        struct nft_table *table;
 
+       if (nla == NULL)
+               return ERR_PTR(-EINVAL);
+
        list_for_each_entry(table, &net->nft.tables, list) {
                if (!nla_strcmp(nla, table->name) &&
                    table->family == family &&
                    nft_active_genmask(table, genmask))
                        return table;
        }
-       return NULL;
+
+       return ERR_PTR(-ENOENT);
 }
 
 static struct nft_table *nft_table_lookup_byhandle(const struct net *net,
@@ -406,37 +439,6 @@ static struct nft_table *nft_table_lookup_byhandle(const struct net *net,
                    nft_active_genmask(table, genmask))
                        return table;
        }
-       return NULL;
-}
-
-static struct nft_table *nf_tables_table_lookup(const struct net *net,
-                                               const struct nlattr *nla,
-                                               u8 family, u8 genmask)
-{
-       struct nft_table *table;
-
-       if (nla == NULL)
-               return ERR_PTR(-EINVAL);
-
-       table = nft_table_lookup(net, nla, family, genmask);
-       if (table != NULL)
-               return table;
-
-       return ERR_PTR(-ENOENT);
-}
-
-static struct nft_table *nf_tables_table_lookup_byhandle(const struct net *net,
-                                                        const struct nlattr *nla,
-                                                        u8 genmask)
-{
-       struct nft_table *table;
-
-       if (nla == NULL)
-               return ERR_PTR(-EINVAL);
-
-       table = nft_table_lookup_byhandle(net, nla, genmask);
-       if (table != NULL)
-               return table;
 
        return ERR_PTR(-ENOENT);
 }
@@ -608,10 +610,11 @@ static int nf_tables_gettable(struct net *net, struct sock *nlsk,
                return netlink_dump_start(nlsk, skb, nlh, &c);
        }
 
-       table = nf_tables_table_lookup(net, nla[NFTA_TABLE_NAME], family,
-                                      genmask);
-       if (IS_ERR(table))
+       table = nft_table_lookup(net, nla[NFTA_TABLE_NAME], family, genmask);
+       if (IS_ERR(table)) {
+               NL_SET_BAD_ATTR(extack, nla[NFTA_TABLE_NAME]);
                return PTR_ERR(table);
+       }
 
        skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
        if (!skb2)
@@ -727,21 +730,23 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk,
 {
        const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
        u8 genmask = nft_genmask_next(net);
-       const struct nlattr *name;
-       struct nft_table *table;
        int family = nfmsg->nfgen_family;
+       const struct nlattr *attr;
+       struct nft_table *table;
        u32 flags = 0;
        struct nft_ctx ctx;
        int err;
 
-       name = nla[NFTA_TABLE_NAME];
-       table = nf_tables_table_lookup(net, name, family, genmask);
+       attr = nla[NFTA_TABLE_NAME];
+       table = nft_table_lookup(net, attr, family, genmask);
        if (IS_ERR(table)) {
                if (PTR_ERR(table) != -ENOENT)
                        return PTR_ERR(table);
        } else {
-               if (nlh->nlmsg_flags & NLM_F_EXCL)
+               if (nlh->nlmsg_flags & NLM_F_EXCL) {
+                       NL_SET_BAD_ATTR(extack, attr);
                        return -EEXIST;
+               }
                if (nlh->nlmsg_flags & NLM_F_REPLACE)
                        return -EOPNOTSUPP;
 
@@ -760,7 +765,7 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk,
        if (table == NULL)
                goto err_kzalloc;
 
-       table->name = nla_strdup(name, GFP_KERNEL);
+       table->name = nla_strdup(attr, GFP_KERNEL);
        if (table->name == NULL)
                goto err_strdup;
 
@@ -883,8 +888,9 @@ static int nf_tables_deltable(struct net *net, struct sock *nlsk,
 {
        const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
        u8 genmask = nft_genmask_next(net);
-       struct nft_table *table;
        int family = nfmsg->nfgen_family;
+       const struct nlattr *attr;
+       struct nft_table *table;
        struct nft_ctx ctx;
 
        nft_ctx_init(&ctx, net, skb, nlh, 0, NULL, NULL, nla);
@@ -892,16 +898,18 @@ static int nf_tables_deltable(struct net *net, struct sock *nlsk,
            (!nla[NFTA_TABLE_NAME] && !nla[NFTA_TABLE_HANDLE]))
                return nft_flush(&ctx, family);
 
-       if (nla[NFTA_TABLE_HANDLE])
-               table = nf_tables_table_lookup_byhandle(net,
-                                                       nla[NFTA_TABLE_HANDLE],
-                                                       genmask);
-       else
-               table = nf_tables_table_lookup(net, nla[NFTA_TABLE_NAME],
-                                              family, genmask);
+       if (nla[NFTA_TABLE_HANDLE]) {
+               attr = nla[NFTA_TABLE_HANDLE];
+               table = nft_table_lookup_byhandle(net, attr, genmask);
+       } else {
+               attr = nla[NFTA_TABLE_NAME];
+               table = nft_table_lookup(net, attr, family, genmask);
+       }
 
-       if (IS_ERR(table))
+       if (IS_ERR(table)) {
+               NL_SET_BAD_ATTR(extack, attr);
                return PTR_ERR(table);
+       }
 
        if (nlh->nlmsg_flags & NLM_F_NONREC &&
            table->use > 0)
@@ -949,8 +957,7 @@ EXPORT_SYMBOL_GPL(nft_unregister_chain_type);
  */
 
 static struct nft_chain *
-nf_tables_chain_lookup_byhandle(const struct nft_table *table, u64 handle,
-                               u8 genmask)
+nft_chain_lookup_byhandle(const struct nft_table *table, u64 handle, u8 genmask)
 {
        struct nft_chain *chain;
 
@@ -963,9 +970,8 @@ nf_tables_chain_lookup_byhandle(const struct nft_table *table, u64 handle,
        return ERR_PTR(-ENOENT);
 }
 
-static struct nft_chain *nf_tables_chain_lookup(const struct nft_table *table,
-                                               const struct nlattr *nla,
-                                               u8 genmask)
+static struct nft_chain *nft_chain_lookup(const struct nft_table *table,
+                                         const struct nlattr *nla, u8 genmask)
 {
        struct nft_chain *chain;
 
@@ -1194,14 +1200,17 @@ static int nf_tables_getchain(struct net *net, struct sock *nlsk,
                return netlink_dump_start(nlsk, skb, nlh, &c);
        }
 
-       table = nf_tables_table_lookup(net, nla[NFTA_CHAIN_TABLE], family,
-                                      genmask);
-       if (IS_ERR(table))
+       table = nft_table_lookup(net, nla[NFTA_CHAIN_TABLE], family, genmask);
+       if (IS_ERR(table)) {
+               NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_TABLE]);
                return PTR_ERR(table);
+       }
 
-       chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME], genmask);
-       if (IS_ERR(chain))
+       chain = nft_chain_lookup(table, nla[NFTA_CHAIN_NAME], genmask);
+       if (IS_ERR(chain)) {
+               NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_NAME]);
                return PTR_ERR(chain);
+       }
 
        skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
        if (!skb2)
@@ -1513,8 +1522,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
            nla[NFTA_CHAIN_NAME]) {
                struct nft_chain *chain2;
 
-               chain2 = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME],
-                                               genmask);
+               chain2 = nft_chain_lookup(table, nla[NFTA_CHAIN_NAME], genmask);
                if (!IS_ERR(chain2))
                        return -EEXIST;
        }
@@ -1564,9 +1572,9 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
                              struct netlink_ext_ack *extack)
 {
        const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
-       const struct nlattr * uninitialized_var(name);
        u8 genmask = nft_genmask_next(net);
        int family = nfmsg->nfgen_family;
+       const struct nlattr *attr;
        struct nft_table *table;
        struct nft_chain *chain;
        u8 policy = NF_ACCEPT;
@@ -1576,36 +1584,46 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
 
        create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
 
-       table = nf_tables_table_lookup(net, nla[NFTA_CHAIN_TABLE], family,
-                                      genmask);
-       if (IS_ERR(table))
+       table = nft_table_lookup(net, nla[NFTA_CHAIN_TABLE], family, genmask);
+       if (IS_ERR(table)) {
+               NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_TABLE]);
                return PTR_ERR(table);
+       }
 
        chain = NULL;
-       name = nla[NFTA_CHAIN_NAME];
+       attr = nla[NFTA_CHAIN_NAME];
 
        if (nla[NFTA_CHAIN_HANDLE]) {
                handle = be64_to_cpu(nla_get_be64(nla[NFTA_CHAIN_HANDLE]));
-               chain = nf_tables_chain_lookup_byhandle(table, handle, genmask);
-               if (IS_ERR(chain))
+               chain = nft_chain_lookup_byhandle(table, handle, genmask);
+               if (IS_ERR(chain)) {
+                       NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_HANDLE]);
                        return PTR_ERR(chain);
+               }
+               attr = nla[NFTA_CHAIN_HANDLE];
        } else {
-               chain = nf_tables_chain_lookup(table, name, genmask);
+               chain = nft_chain_lookup(table, attr, genmask);
                if (IS_ERR(chain)) {
-                       if (PTR_ERR(chain) != -ENOENT)
+                       if (PTR_ERR(chain) != -ENOENT) {
+                               NL_SET_BAD_ATTR(extack, attr);
                                return PTR_ERR(chain);
+                       }
                        chain = NULL;
                }
        }
 
        if (nla[NFTA_CHAIN_POLICY]) {
                if (chain != NULL &&
-                   !nft_is_base_chain(chain))
+                   !nft_is_base_chain(chain)) {
+                       NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_POLICY]);
                        return -EOPNOTSUPP;
+               }
 
                if (chain == NULL &&
-                   nla[NFTA_CHAIN_HOOK] == NULL)
+                   nla[NFTA_CHAIN_HOOK] == NULL) {
+                       NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_POLICY]);
                        return -EOPNOTSUPP;
+               }
 
                policy = ntohl(nla_get_be32(nla[NFTA_CHAIN_POLICY]));
                switch (policy) {
@@ -1620,8 +1638,10 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
        nft_ctx_init(&ctx, net, skb, nlh, family, table, chain, nla);
 
        if (chain != NULL) {
-               if (nlh->nlmsg_flags & NLM_F_EXCL)
+               if (nlh->nlmsg_flags & NLM_F_EXCL) {
+                       NL_SET_BAD_ATTR(extack, attr);
                        return -EEXIST;
+               }
                if (nlh->nlmsg_flags & NLM_F_REPLACE)
                        return -EOPNOTSUPP;
 
@@ -1638,28 +1658,34 @@ static int nf_tables_delchain(struct net *net, struct sock *nlsk,
 {
        const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
        u8 genmask = nft_genmask_next(net);
+       int family = nfmsg->nfgen_family;
+       const struct nlattr *attr;
        struct nft_table *table;
        struct nft_chain *chain;
        struct nft_rule *rule;
-       int family = nfmsg->nfgen_family;
        struct nft_ctx ctx;
        u64 handle;
        u32 use;
        int err;
 
-       table = nf_tables_table_lookup(net, nla[NFTA_CHAIN_TABLE], family,
-                                      genmask);
-       if (IS_ERR(table))
+       table = nft_table_lookup(net, nla[NFTA_CHAIN_TABLE], family, genmask);
+       if (IS_ERR(table)) {
+               NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_TABLE]);
                return PTR_ERR(table);
+       }
 
        if (nla[NFTA_CHAIN_HANDLE]) {
-               handle = be64_to_cpu(nla_get_be64(nla[NFTA_CHAIN_HANDLE]));
-               chain = nf_tables_chain_lookup_byhandle(table, handle, genmask);
+               attr = nla[NFTA_CHAIN_HANDLE];
+               handle = be64_to_cpu(nla_get_be64(attr));
+               chain = nft_chain_lookup_byhandle(table, handle, genmask);
        } else {
-               chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME], genmask);
+               attr = nla[NFTA_CHAIN_NAME];
+               chain = nft_chain_lookup(table, attr, genmask);
        }
-       if (IS_ERR(chain))
+       if (IS_ERR(chain)) {
+               NL_SET_BAD_ATTR(extack, attr);
                return PTR_ERR(chain);
+       }
 
        if (nlh->nlmsg_flags & NLM_F_NONREC &&
            chain->use > 0)
@@ -1681,8 +1707,10 @@ static int nf_tables_delchain(struct net *net, struct sock *nlsk,
        /* There are rules and elements that are still holding references to us,
         * we cannot do a recursive removal in this case.
         */
-       if (use > 0)
+       if (use > 0) {
+               NL_SET_BAD_ATTR(extack, attr);
                return -EBUSY;
+       }
 
        return nft_delchain(&ctx);
 }
@@ -1939,8 +1967,8 @@ void nft_expr_destroy(const struct nft_ctx *ctx, struct nft_expr *expr)
  * Rules
  */
 
-static struct nft_rule *__nf_tables_rule_lookup(const struct nft_chain *chain,
-                                               u64 handle)
+static struct nft_rule *__nft_rule_lookup(const struct nft_chain *chain,
+                                         u64 handle)
 {
        struct nft_rule *rule;
 
@@ -1953,13 +1981,13 @@ static struct nft_rule *__nf_tables_rule_lookup(const struct nft_chain *chain,
        return ERR_PTR(-ENOENT);
 }
 
-static struct nft_rule *nf_tables_rule_lookup(const struct nft_chain *chain,
-                                             const struct nlattr *nla)
+static struct nft_rule *nft_rule_lookup(const struct nft_chain *chain,
+                                       const struct nlattr *nla)
 {
        if (nla == NULL)
                return ERR_PTR(-EINVAL);
 
-       return __nf_tables_rule_lookup(chain, be64_to_cpu(nla_get_be64(nla)));
+       return __nft_rule_lookup(chain, be64_to_cpu(nla_get_be64(nla)));
 }
 
 static const struct nla_policy nft_rule_policy[NFTA_RULE_MAX + 1] = {
@@ -2191,18 +2219,23 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk,
                return netlink_dump_start(nlsk, skb, nlh, &c);
        }
 
-       table = nf_tables_table_lookup(net, nla[NFTA_RULE_TABLE], family,
-                                      genmask);
-       if (IS_ERR(table))
+       table = nft_table_lookup(net, nla[NFTA_RULE_TABLE], family, genmask);
+       if (IS_ERR(table)) {
+               NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_TABLE]);
                return PTR_ERR(table);
+       }
 
-       chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN], genmask);
-       if (IS_ERR(chain))
+       chain = nft_chain_lookup(table, nla[NFTA_RULE_CHAIN], genmask);
+       if (IS_ERR(chain)) {
+               NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN]);
                return PTR_ERR(chain);
+       }
 
-       rule = nf_tables_rule_lookup(chain, nla[NFTA_RULE_HANDLE]);
-       if (IS_ERR(rule))
+       rule = nft_rule_lookup(chain, nla[NFTA_RULE_HANDLE]);
+       if (IS_ERR(rule)) {
+               NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_HANDLE]);
                return PTR_ERR(rule);
+       }
 
        skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
        if (!skb2)
@@ -2238,6 +2271,13 @@ static void nf_tables_rule_destroy(const struct nft_ctx *ctx,
        kfree(rule);
 }
 
+static void nf_tables_rule_release(const struct nft_ctx *ctx,
+                                  struct nft_rule *rule)
+{
+       nft_rule_expr_deactivate(ctx, rule);
+       nf_tables_rule_destroy(ctx, rule);
+}
+
 #define NFT_RULE_MAXEXPRS      128
 
 static struct nft_expr_info *info;
@@ -2265,23 +2305,30 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
 
        create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
 
-       table = nf_tables_table_lookup(net, nla[NFTA_RULE_TABLE], family,
-                                      genmask);
-       if (IS_ERR(table))
+       table = nft_table_lookup(net, nla[NFTA_RULE_TABLE], family, genmask);
+       if (IS_ERR(table)) {
+               NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_TABLE]);
                return PTR_ERR(table);
+       }
 
-       chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN], genmask);
-       if (IS_ERR(chain))
+       chain = nft_chain_lookup(table, nla[NFTA_RULE_CHAIN], genmask);
+       if (IS_ERR(chain)) {
+               NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN]);
                return PTR_ERR(chain);
+       }
 
        if (nla[NFTA_RULE_HANDLE]) {
                handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_HANDLE]));
-               rule = __nf_tables_rule_lookup(chain, handle);
-               if (IS_ERR(rule))
+               rule = __nft_rule_lookup(chain, handle);
+               if (IS_ERR(rule)) {
+                       NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_HANDLE]);
                        return PTR_ERR(rule);
+               }
 
-               if (nlh->nlmsg_flags & NLM_F_EXCL)
+               if (nlh->nlmsg_flags & NLM_F_EXCL) {
+                       NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_HANDLE]);
                        return -EEXIST;
+               }
                if (nlh->nlmsg_flags & NLM_F_REPLACE)
                        old_rule = rule;
                else
@@ -2300,9 +2347,11 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
                        return -EOPNOTSUPP;
 
                pos_handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_POSITION]));
-               old_rule = __nf_tables_rule_lookup(chain, pos_handle);
-               if (IS_ERR(old_rule))
+               old_rule = __nft_rule_lookup(chain, pos_handle);
+               if (IS_ERR(old_rule)) {
+                       NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_POSITION]);
                        return PTR_ERR(old_rule);
+               }
        }
 
        nft_ctx_init(&ctx, net, skb, nlh, family, table, chain, nla);
@@ -2361,43 +2410,48 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
        }
 
        if (nlh->nlmsg_flags & NLM_F_REPLACE) {
-               if (nft_is_active_next(net, old_rule)) {
-                       trans = nft_trans_rule_add(&ctx, NFT_MSG_DELRULE,
-                                                  old_rule);
-                       if (trans == NULL) {
-                               err = -ENOMEM;
-                               goto err2;
-                       }
-                       nft_deactivate_next(net, old_rule);
-                       chain->use--;
-                       list_add_tail_rcu(&rule->list, &old_rule->list);
-               } else {
+               if (!nft_is_active_next(net, old_rule)) {
                        err = -ENOENT;
                        goto err2;
                }
-       } else if (nlh->nlmsg_flags & NLM_F_APPEND)
-               if (old_rule)
-                       list_add_rcu(&rule->list, &old_rule->list);
-               else
-                       list_add_tail_rcu(&rule->list, &chain->rules);
-       else {
-               if (old_rule)
-                       list_add_tail_rcu(&rule->list, &old_rule->list);
-               else
-                       list_add_rcu(&rule->list, &chain->rules);
-       }
+               trans = nft_trans_rule_add(&ctx, NFT_MSG_DELRULE,
+                                          old_rule);
+               if (trans == NULL) {
+                       err = -ENOMEM;
+                       goto err2;
+               }
+               nft_deactivate_next(net, old_rule);
+               chain->use--;
 
-       if (nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule) == NULL) {
-               err = -ENOMEM;
-               goto err3;
+               if (nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule) == NULL) {
+                       err = -ENOMEM;
+                       goto err2;
+               }
+
+               list_add_tail_rcu(&rule->list, &old_rule->list);
+       } else {
+               if (nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule) == NULL) {
+                       err = -ENOMEM;
+                       goto err2;
+               }
+
+               if (nlh->nlmsg_flags & NLM_F_APPEND) {
+                       if (old_rule)
+                               list_add_rcu(&rule->list, &old_rule->list);
+                       else
+                               list_add_tail_rcu(&rule->list, &chain->rules);
+                } else {
+                       if (old_rule)
+                               list_add_tail_rcu(&rule->list, &old_rule->list);
+                       else
+                               list_add_rcu(&rule->list, &chain->rules);
+               }
        }
        chain->use++;
        return 0;
 
-err3:
-       list_del_rcu(&rule->list);
 err2:
-       nf_tables_rule_destroy(&ctx, rule);
+       nf_tables_rule_release(&ctx, rule);
 err1:
        for (i = 0; i < n; i++) {
                if (info[i].ops != NULL)
@@ -2435,32 +2489,37 @@ static int nf_tables_delrule(struct net *net, struct sock *nlsk,
        int family = nfmsg->nfgen_family, err = 0;
        struct nft_ctx ctx;
 
-       table = nf_tables_table_lookup(net, nla[NFTA_RULE_TABLE], family,
-                                      genmask);
-       if (IS_ERR(table))
+       table = nft_table_lookup(net, nla[NFTA_RULE_TABLE], family, genmask);
+       if (IS_ERR(table)) {
+               NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_TABLE]);
                return PTR_ERR(table);
+       }
 
        if (nla[NFTA_RULE_CHAIN]) {
-               chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN],
-                                              genmask);
-               if (IS_ERR(chain))
+               chain = nft_chain_lookup(table, nla[NFTA_RULE_CHAIN], genmask);
+               if (IS_ERR(chain)) {
+                       NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN]);
                        return PTR_ERR(chain);
+               }
        }
 
        nft_ctx_init(&ctx, net, skb, nlh, family, table, chain, nla);
 
        if (chain) {
                if (nla[NFTA_RULE_HANDLE]) {
-                       rule = nf_tables_rule_lookup(chain,
-                                                    nla[NFTA_RULE_HANDLE]);
-                       if (IS_ERR(rule))
+                       rule = nft_rule_lookup(chain, nla[NFTA_RULE_HANDLE]);
+                       if (IS_ERR(rule)) {
+                               NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_HANDLE]);
                                return PTR_ERR(rule);
+                       }
 
                        err = nft_delrule(&ctx, rule);
                } else if (nla[NFTA_RULE_ID]) {
                        rule = nft_rule_lookup_byid(net, nla[NFTA_RULE_ID]);
-                       if (IS_ERR(rule))
+                       if (IS_ERR(rule)) {
+                               NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_ID]);
                                return PTR_ERR(rule);
+                       }
 
                        err = nft_delrule(&ctx, rule);
                } else {
@@ -2505,14 +2564,12 @@ void nft_unregister_set(struct nft_set_type *type)
 EXPORT_SYMBOL_GPL(nft_unregister_set);
 
 #define NFT_SET_FEATURES       (NFT_SET_INTERVAL | NFT_SET_MAP | \
-                                NFT_SET_TIMEOUT | NFT_SET_OBJECT)
+                                NFT_SET_TIMEOUT | NFT_SET_OBJECT | \
+                                NFT_SET_EVAL)
 
-static bool nft_set_ops_candidate(const struct nft_set_ops *ops, u32 flags)
+static bool nft_set_ops_candidate(const struct nft_set_type *type, u32 flags)
 {
-       if ((flags & NFT_SET_EVAL) && !ops->update)
-               return false;
-
-       return (flags & ops->features) == (flags & NFT_SET_FEATURES);
+       return (flags & type->features) == (flags & NFT_SET_FEATURES);
 }
 
 /*
@@ -2549,14 +2606,9 @@ nft_select_set_ops(const struct nft_ctx *ctx,
        best.space  = ~0;
 
        list_for_each_entry(type, &nf_tables_set_types, list) {
-               if (!type->select_ops)
-                       ops = type->ops;
-               else
-                       ops = type->select_ops(ctx, desc, flags);
-               if (!ops)
-                       continue;
+               ops = &type->ops;
 
-               if (!nft_set_ops_candidate(ops, flags))
+               if (!nft_set_ops_candidate(type, flags))
                        continue;
                if (!ops->estimate(desc, flags, &est))
                        continue;
@@ -2587,7 +2639,7 @@ nft_select_set_ops(const struct nft_ctx *ctx,
                if (!try_module_get(type->owner))
                        continue;
                if (bops != NULL)
-                       module_put(bops->type->owner);
+                       module_put(to_set_type(bops)->owner);
 
                bops = ops;
                best = est;
@@ -2628,6 +2680,7 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, struct net *net,
                                     const struct sk_buff *skb,
                                     const struct nlmsghdr *nlh,
                                     const struct nlattr * const nla[],
+                                    struct netlink_ext_ack *extack,
                                     u8 genmask)
 {
        const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
@@ -2635,18 +2688,20 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, struct net *net,
        struct nft_table *table = NULL;
 
        if (nla[NFTA_SET_TABLE] != NULL) {
-               table = nf_tables_table_lookup(net, nla[NFTA_SET_TABLE],
-                                              family, genmask);
-               if (IS_ERR(table))
+               table = nft_table_lookup(net, nla[NFTA_SET_TABLE], family,
+                                        genmask);
+               if (IS_ERR(table)) {
+                       NL_SET_BAD_ATTR(extack, nla[NFTA_SET_TABLE]);
                        return PTR_ERR(table);
+               }
        }
 
        nft_ctx_init(ctx, net, skb, nlh, family, table, NULL, nla);
        return 0;
 }
 
-static struct nft_set *nf_tables_set_lookup(const struct nft_table *table,
-                                           const struct nlattr *nla, u8 genmask)
+static struct nft_set *nft_set_lookup(const struct nft_table *table,
+                                     const struct nlattr *nla, u8 genmask)
 {
        struct nft_set *set;
 
@@ -2661,14 +2716,12 @@ static struct nft_set *nf_tables_set_lookup(const struct nft_table *table,
        return ERR_PTR(-ENOENT);
 }
 
-static struct nft_set *nf_tables_set_lookup_byhandle(const struct nft_table *table,
-                                                    const struct nlattr *nla, u8 genmask)
+static struct nft_set *nft_set_lookup_byhandle(const struct nft_table *table,
+                                              const struct nlattr *nla,
+                                              u8 genmask)
 {
        struct nft_set *set;
 
-       if (nla == NULL)
-               return ERR_PTR(-EINVAL);
-
        list_for_each_entry(set, &table->sets, list) {
                if (be64_to_cpu(nla_get_be64(nla)) == set->handle &&
                    nft_active_genmask(set, genmask))
@@ -2677,9 +2730,8 @@ static struct nft_set *nf_tables_set_lookup_byhandle(const struct nft_table *tab
        return ERR_PTR(-ENOENT);
 }
 
-static struct nft_set *nf_tables_set_lookup_byid(const struct net *net,
-                                                const struct nlattr *nla,
-                                                u8 genmask)
+static struct nft_set *nft_set_lookup_byid(const struct net *net,
+                                          const struct nlattr *nla, u8 genmask)
 {
        struct nft_trans *trans;
        u32 id = ntohl(nla_get_be32(nla));
@@ -2703,12 +2755,12 @@ struct nft_set *nft_set_lookup_global(const struct net *net,
 {
        struct nft_set *set;
 
-       set = nf_tables_set_lookup(table, nla_set_name, genmask);
+       set = nft_set_lookup(table, nla_set_name, genmask);
        if (IS_ERR(set)) {
                if (!nla_set_id)
                        return set;
 
-               set = nf_tables_set_lookup_byid(net, nla_set_id, genmask);
+               set = nft_set_lookup_byid(net, nla_set_id, genmask);
        }
        return set;
 }
@@ -2768,6 +2820,27 @@ static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set,
        return 0;
 }
 
+static int nf_msecs_to_jiffies64(const struct nlattr *nla, u64 *result)
+{
+       u64 ms = be64_to_cpu(nla_get_be64(nla));
+       u64 max = (u64)(~((u64)0));
+
+       max = div_u64(max, NSEC_PER_MSEC);
+       if (ms >= max)
+               return -ERANGE;
+
+       ms *= NSEC_PER_MSEC;
+       *result = nsecs_to_jiffies64(ms);
+       return 0;
+}
+
+static u64 nf_jiffies64_to_msecs(u64 input)
+{
+       u64 ms = jiffies64_to_nsecs(input);
+
+       return cpu_to_be64(div_u64(ms, NSEC_PER_MSEC));
+}
+
 static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
                              const struct nft_set *set, u16 event, u16 flags)
 {
@@ -2815,7 +2888,7 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
 
        if (set->timeout &&
            nla_put_be64(skb, NFTA_SET_TIMEOUT,
-                        cpu_to_be64(jiffies_to_msecs(set->timeout)),
+                        nf_jiffies64_to_msecs(set->timeout),
                         NFTA_SET_PAD))
                goto nla_put_failure;
        if (set->gc_int &&
@@ -2953,7 +3026,8 @@ static int nf_tables_getset(struct net *net, struct sock *nlsk,
        int err;
 
        /* Verify existence before starting dump */
-       err = nft_ctx_init_from_setattr(&ctx, net, skb, nlh, nla, genmask);
+       err = nft_ctx_init_from_setattr(&ctx, net, skb, nlh, nla, extack,
+                                       genmask);
        if (err < 0)
                return err;
 
@@ -2980,7 +3054,7 @@ static int nf_tables_getset(struct net *net, struct sock *nlsk,
        if (!nla[NFTA_SET_TABLE])
                return -EINVAL;
 
-       set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME], genmask);
+       set = nft_set_lookup(ctx.table, nla[NFTA_SET_NAME], genmask);
        if (IS_ERR(set))
                return PTR_ERR(set);
 
@@ -3110,8 +3184,10 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
        if (nla[NFTA_SET_TIMEOUT] != NULL) {
                if (!(flags & NFT_SET_TIMEOUT))
                        return -EINVAL;
-               timeout = msecs_to_jiffies(be64_to_cpu(nla_get_be64(
-                                               nla[NFTA_SET_TIMEOUT])));
+
+               err = nf_msecs_to_jiffies64(nla[NFTA_SET_TIMEOUT], &timeout);
+               if (err)
+                       return err;
        }
        gc_int = 0;
        if (nla[NFTA_SET_GC_INTERVAL] != NULL) {
@@ -3132,22 +3208,28 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
 
        create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
 
-       table = nf_tables_table_lookup(net, nla[NFTA_SET_TABLE], family,
-                                      genmask);
-       if (IS_ERR(table))
+       table = nft_table_lookup(net, nla[NFTA_SET_TABLE], family, genmask);
+       if (IS_ERR(table)) {
+               NL_SET_BAD_ATTR(extack, nla[NFTA_SET_TABLE]);
                return PTR_ERR(table);
+       }
 
        nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
 
-       set = nf_tables_set_lookup(table, nla[NFTA_SET_NAME], genmask);
+       set = nft_set_lookup(table, nla[NFTA_SET_NAME], genmask);
        if (IS_ERR(set)) {
-               if (PTR_ERR(set) != -ENOENT)
+               if (PTR_ERR(set) != -ENOENT) {
+                       NL_SET_BAD_ATTR(extack, nla[NFTA_SET_NAME]);
                        return PTR_ERR(set);
+               }
        } else {
-               if (nlh->nlmsg_flags & NLM_F_EXCL)
+               if (nlh->nlmsg_flags & NLM_F_EXCL) {
+                       NL_SET_BAD_ATTR(extack, nla[NFTA_SET_NAME]);
                        return -EEXIST;
+               }
                if (nlh->nlmsg_flags & NLM_F_REPLACE)
                        return -EOPNOTSUPP;
+
                return 0;
        }
 
@@ -3207,29 +3289,31 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
 
        err = ops->init(set, &desc, nla);
        if (err < 0)
-               goto err2;
+               goto err3;
 
        err = nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set);
        if (err < 0)
-               goto err3;
+               goto err4;
 
        list_add_tail_rcu(&set->list, &table->sets);
        table->use++;
        return 0;
 
-err3:
+err4:
        ops->destroy(set);
+err3:
+       kfree(set->name);
 err2:
        kvfree(set);
 err1:
-       module_put(ops->type->owner);
+       module_put(to_set_type(ops)->owner);
        return err;
 }
 
 static void nft_set_destroy(struct nft_set *set)
 {
        set->ops->destroy(set);
-       module_put(set->ops->type->owner);
+       module_put(to_set_type(set->ops)->owner);
        kfree(set->name);
        kvfree(set);
 }
@@ -3248,6 +3332,7 @@ static int nf_tables_delset(struct net *net, struct sock *nlsk,
 {
        const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
        u8 genmask = nft_genmask_next(net);
+       const struct nlattr *attr;
        struct nft_set *set;
        struct nft_ctx ctx;
        int err;
@@ -3257,20 +3342,28 @@ static int nf_tables_delset(struct net *net, struct sock *nlsk,
        if (nla[NFTA_SET_TABLE] == NULL)
                return -EINVAL;
 
-       err = nft_ctx_init_from_setattr(&ctx, net, skb, nlh, nla, genmask);
+       err = nft_ctx_init_from_setattr(&ctx, net, skb, nlh, nla, extack,
+                                       genmask);
        if (err < 0)
                return err;
 
-       if (nla[NFTA_SET_HANDLE])
-               set = nf_tables_set_lookup_byhandle(ctx.table, nla[NFTA_SET_HANDLE], genmask);
-       else
-               set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME], genmask);
-       if (IS_ERR(set))
-               return PTR_ERR(set);
+       if (nla[NFTA_SET_HANDLE]) {
+               attr = nla[NFTA_SET_HANDLE];
+               set = nft_set_lookup_byhandle(ctx.table, attr, genmask);
+       } else {
+               attr = nla[NFTA_SET_NAME];
+               set = nft_set_lookup(ctx.table, attr, genmask);
+       }
 
+       if (IS_ERR(set)) {
+               NL_SET_BAD_ATTR(extack, attr);
+               return PTR_ERR(set);
+       }
        if (!list_empty(&set->bindings) ||
-           (nlh->nlmsg_flags & NLM_F_NONREC && atomic_read(&set->nelems) > 0))
+           (nlh->nlmsg_flags & NLM_F_NONREC && atomic_read(&set->nelems) > 0)) {
+               NL_SET_BAD_ATTR(extack, attr);
                return -EBUSY;
+       }
 
        return nft_delset(&ctx, set);
 }
@@ -3360,8 +3453,8 @@ const struct nft_set_ext_type nft_set_ext_types[] = {
                .align  = __alignof__(u64),
        },
        [NFT_SET_EXT_EXPIRATION]        = {
-               .len    = sizeof(unsigned long),
-               .align  = __alignof__(unsigned long),
+               .len    = sizeof(u64),
+               .align  = __alignof__(u64),
        },
        [NFT_SET_EXT_USERDATA]          = {
                .len    = sizeof(struct nft_userdata),
@@ -3398,16 +3491,19 @@ static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx, struct net *net,
                                      const struct sk_buff *skb,
                                      const struct nlmsghdr *nlh,
                                      const struct nlattr * const nla[],
+                                     struct netlink_ext_ack *extack,
                                      u8 genmask)
 {
        const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
        int family = nfmsg->nfgen_family;
        struct nft_table *table;
 
-       table = nf_tables_table_lookup(net, nla[NFTA_SET_ELEM_LIST_TABLE],
-                                      family, genmask);
-       if (IS_ERR(table))
+       table = nft_table_lookup(net, nla[NFTA_SET_ELEM_LIST_TABLE], family,
+                                genmask);
+       if (IS_ERR(table)) {
+               NL_SET_BAD_ATTR(extack, nla[NFTA_SET_ELEM_LIST_TABLE]);
                return PTR_ERR(table);
+       }
 
        nft_ctx_init(ctx, net, skb, nlh, family, table, NULL, nla);
        return 0;
@@ -3451,22 +3547,21 @@ static int nf_tables_fill_setelem(struct sk_buff *skb,
 
        if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT) &&
            nla_put_be64(skb, NFTA_SET_ELEM_TIMEOUT,
-                        cpu_to_be64(jiffies_to_msecs(
-                                               *nft_set_ext_timeout(ext))),
+                        nf_jiffies64_to_msecs(*nft_set_ext_timeout(ext)),
                         NFTA_SET_ELEM_PAD))
                goto nla_put_failure;
 
        if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) {
-               unsigned long expires, now = jiffies;
+               u64 expires, now = get_jiffies_64();
 
                expires = *nft_set_ext_expiration(ext);
-               if (time_before(now, expires))
+               if (time_before64(now, expires))
                        expires -= now;
                else
                        expires = 0;
 
                if (nla_put_be64(skb, NFTA_SET_ELEM_EXPIRATION,
-                                cpu_to_be64(jiffies_to_msecs(expires)),
+                                nf_jiffies64_to_msecs(expires),
                                 NFTA_SET_ELEM_PAD))
                        goto nla_put_failure;
        }
@@ -3737,12 +3832,12 @@ static int nf_tables_getsetelem(struct net *net, struct sock *nlsk,
        struct nft_ctx ctx;
        int rem, err = 0;
 
-       err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, genmask);
+       err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, extack,
+                                        genmask);
        if (err < 0)
                return err;
 
-       set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET],
-                                  genmask);
+       set = nft_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET], genmask);
        if (IS_ERR(set))
                return PTR_ERR(set);
 
@@ -3841,7 +3936,7 @@ void *nft_set_elem_init(const struct nft_set *set,
                memcpy(nft_set_ext_data(ext), data, set->dlen);
        if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION))
                *nft_set_ext_expiration(ext) =
-                       jiffies + timeout;
+                       get_jiffies_64() + timeout;
        if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT))
                *nft_set_ext_timeout(ext) = timeout;
 
@@ -3928,8 +4023,10 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
        if (nla[NFTA_SET_ELEM_TIMEOUT] != NULL) {
                if (!(set->flags & NFT_SET_TIMEOUT))
                        return -EINVAL;
-               timeout = msecs_to_jiffies(be64_to_cpu(nla_get_be64(
-                                       nla[NFTA_SET_ELEM_TIMEOUT])));
+               err = nf_msecs_to_jiffies64(nla[NFTA_SET_ELEM_TIMEOUT],
+                                           &timeout);
+               if (err)
+                       return err;
        } else if (set->flags & NFT_SET_TIMEOUT) {
                timeout = set->timeout;
        }
@@ -3954,8 +4051,8 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
                        err = -EINVAL;
                        goto err2;
                }
-               obj = nf_tables_obj_lookup(ctx->table, nla[NFTA_SET_ELEM_OBJREF],
-                                          set->objtype, genmask);
+               obj = nft_obj_lookup(ctx->table, nla[NFTA_SET_ELEM_OBJREF],
+                                    set->objtype, genmask);
                if (IS_ERR(obj)) {
                        err = PTR_ERR(obj);
                        goto err2;
@@ -4037,8 +4134,10 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
                        if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA) ^
                            nft_set_ext_exists(ext2, NFT_SET_EXT_DATA) ||
                            nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF) ^
-                           nft_set_ext_exists(ext2, NFT_SET_EXT_OBJREF))
-                               return -EBUSY;
+                           nft_set_ext_exists(ext2, NFT_SET_EXT_OBJREF)) {
+                               err = -EBUSY;
+                               goto err5;
+                       }
                        if ((nft_set_ext_exists(ext, NFT_SET_EXT_DATA) &&
                             nft_set_ext_exists(ext2, NFT_SET_EXT_DATA) &&
                             memcmp(nft_set_ext_data(ext),
@@ -4092,7 +4191,8 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk,
        if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL)
                return -EINVAL;
 
-       err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, genmask);
+       err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, extack,
+                                        genmask);
        if (err < 0)
                return err;
 
@@ -4123,7 +4223,7 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk,
  *     NFT_GOTO verdicts. This function must be called on active data objects
  *     from the second phase of the commit protocol.
  */
-static void nft_data_hold(const struct nft_data *data, enum nft_data_types type)
+void nft_data_hold(const struct nft_data *data, enum nft_data_types type)
 {
        if (type == NFT_DATA_VERDICT) {
                switch (data->verdict.code) {
@@ -4280,12 +4380,12 @@ static int nf_tables_delsetelem(struct net *net, struct sock *nlsk,
        struct nft_ctx ctx;
        int rem, err = 0;
 
-       err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, genmask);
+       err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, extack,
+                                        genmask);
        if (err < 0)
                return err;
 
-       set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET],
-                                  genmask);
+       set = nft_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET], genmask);
        if (IS_ERR(set))
                return PTR_ERR(set);
        if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT)
@@ -4373,9 +4473,9 @@ void nft_unregister_obj(struct nft_object_type *obj_type)
 }
 EXPORT_SYMBOL_GPL(nft_unregister_obj);
 
-struct nft_object *nf_tables_obj_lookup(const struct nft_table *table,
-                                       const struct nlattr *nla,
-                                       u32 objtype, u8 genmask)
+struct nft_object *nft_obj_lookup(const struct nft_table *table,
+                                 const struct nlattr *nla, u32 objtype,
+                                 u8 genmask)
 {
        struct nft_object *obj;
 
@@ -4387,11 +4487,11 @@ struct nft_object *nf_tables_obj_lookup(const struct nft_table *table,
        }
        return ERR_PTR(-ENOENT);
 }
-EXPORT_SYMBOL_GPL(nf_tables_obj_lookup);
+EXPORT_SYMBOL_GPL(nft_obj_lookup);
 
-static struct nft_object *nf_tables_obj_lookup_byhandle(const struct nft_table *table,
-                                                       const struct nlattr *nla,
-                                                       u32 objtype, u8 genmask)
+static struct nft_object *nft_obj_lookup_byhandle(const struct nft_table *table,
+                                                 const struct nlattr *nla,
+                                                 u32 objtype, u8 genmask)
 {
        struct nft_object *obj;
 
@@ -4535,22 +4635,25 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk,
            !nla[NFTA_OBJ_DATA])
                return -EINVAL;
 
-       table = nf_tables_table_lookup(net, nla[NFTA_OBJ_TABLE], family,
-                                      genmask);
-       if (IS_ERR(table))
+       table = nft_table_lookup(net, nla[NFTA_OBJ_TABLE], family, genmask);
+       if (IS_ERR(table)) {
+               NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_TABLE]);
                return PTR_ERR(table);
+       }
 
        objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
-       obj = nf_tables_obj_lookup(table, nla[NFTA_OBJ_NAME], objtype, genmask);
+       obj = nft_obj_lookup(table, nla[NFTA_OBJ_NAME], objtype, genmask);
        if (IS_ERR(obj)) {
                err = PTR_ERR(obj);
-               if (err != -ENOENT)
+               if (err != -ENOENT) {
+                       NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_NAME]);
                        return err;
-
+               }
        } else {
-               if (nlh->nlmsg_flags & NLM_F_EXCL)
+               if (nlh->nlmsg_flags & NLM_F_EXCL) {
+                       NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_NAME]);
                        return -EEXIST;
-
+               }
                return 0;
        }
 
@@ -4761,15 +4864,18 @@ static int nf_tables_getobj(struct net *net, struct sock *nlsk,
            !nla[NFTA_OBJ_TYPE])
                return -EINVAL;
 
-       table = nf_tables_table_lookup(net, nla[NFTA_OBJ_TABLE], family,
-                                      genmask);
-       if (IS_ERR(table))
+       table = nft_table_lookup(net, nla[NFTA_OBJ_TABLE], family, genmask);
+       if (IS_ERR(table)) {
+               NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_TABLE]);
                return PTR_ERR(table);
+       }
 
        objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
-       obj = nf_tables_obj_lookup(table, nla[NFTA_OBJ_NAME], objtype, genmask);
-       if (IS_ERR(obj))
+       obj = nft_obj_lookup(table, nla[NFTA_OBJ_NAME], objtype, genmask);
+       if (IS_ERR(obj)) {
+               NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_NAME]);
                return PTR_ERR(obj);
+       }
 
        skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
        if (!skb2)
@@ -4808,6 +4914,7 @@ static int nf_tables_delobj(struct net *net, struct sock *nlsk,
        const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
        u8 genmask = nft_genmask_next(net);
        int family = nfmsg->nfgen_family;
+       const struct nlattr *attr;
        struct nft_table *table;
        struct nft_object *obj;
        struct nft_ctx ctx;
@@ -4817,22 +4924,29 @@ static int nf_tables_delobj(struct net *net, struct sock *nlsk,
            (!nla[NFTA_OBJ_NAME] && !nla[NFTA_OBJ_HANDLE]))
                return -EINVAL;
 
-       table = nf_tables_table_lookup(net, nla[NFTA_OBJ_TABLE], family,
-                                      genmask);
-       if (IS_ERR(table))
+       table = nft_table_lookup(net, nla[NFTA_OBJ_TABLE], family, genmask);
+       if (IS_ERR(table)) {
+               NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_TABLE]);
                return PTR_ERR(table);
+       }
 
        objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
-       if (nla[NFTA_OBJ_HANDLE])
-               obj = nf_tables_obj_lookup_byhandle(table, nla[NFTA_OBJ_HANDLE],
-                                                   objtype, genmask);
-       else
-               obj = nf_tables_obj_lookup(table, nla[NFTA_OBJ_NAME],
-                                          objtype, genmask);
-       if (IS_ERR(obj))
+       if (nla[NFTA_OBJ_HANDLE]) {
+               attr = nla[NFTA_OBJ_HANDLE];
+               obj = nft_obj_lookup_byhandle(table, attr, objtype, genmask);
+       } else {
+               attr = nla[NFTA_OBJ_NAME];
+               obj = nft_obj_lookup(table, attr, objtype, genmask);
+       }
+
+       if (IS_ERR(obj)) {
+               NL_SET_BAD_ATTR(extack, attr);
                return PTR_ERR(obj);
-       if (obj->use > 0)
+       }
+       if (obj->use > 0) {
+               NL_SET_BAD_ATTR(extack, attr);
                return -EBUSY;
+       }
 
        nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
 
@@ -4903,9 +5017,8 @@ static const struct nla_policy nft_flowtable_policy[NFTA_FLOWTABLE_MAX + 1] = {
        [NFTA_FLOWTABLE_HANDLE]         = { .type = NLA_U64 },
 };
 
-struct nft_flowtable *nf_tables_flowtable_lookup(const struct nft_table *table,
-                                                const struct nlattr *nla,
-                                                u8 genmask)
+struct nft_flowtable *nft_flowtable_lookup(const struct nft_table *table,
+                                          const struct nlattr *nla, u8 genmask)
 {
        struct nft_flowtable *flowtable;
 
@@ -4916,11 +5029,11 @@ struct nft_flowtable *nf_tables_flowtable_lookup(const struct nft_table *table,
        }
        return ERR_PTR(-ENOENT);
 }
-EXPORT_SYMBOL_GPL(nf_tables_flowtable_lookup);
+EXPORT_SYMBOL_GPL(nft_flowtable_lookup);
 
 static struct nft_flowtable *
-nf_tables_flowtable_lookup_byhandle(const struct nft_table *table,
-                                   const struct nlattr *nla, u8 genmask)
+nft_flowtable_lookup_byhandle(const struct nft_table *table,
+                             const struct nlattr *nla, u8 genmask)
 {
        struct nft_flowtable *flowtable;
 
@@ -5019,7 +5132,7 @@ static int nf_tables_flowtable_parse_hook(const struct nft_ctx *ctx,
                flowtable->ops[i].pf            = NFPROTO_NETDEV;
                flowtable->ops[i].hooknum       = hooknum;
                flowtable->ops[i].priority      = priority;
-               flowtable->ops[i].priv          = &flowtable->data.rhashtable;
+               flowtable->ops[i].priv          = &flowtable->data;
                flowtable->ops[i].hook          = flowtable->data.type->hook;
                flowtable->ops[i].dev           = dev_array[i];
                flowtable->dev_name[i]          = kstrdup(dev_array[i]->name,
@@ -5060,23 +5173,6 @@ static const struct nf_flowtable_type *nft_flowtable_type_get(u8 family)
        return ERR_PTR(-ENOENT);
 }
 
-void nft_flow_table_iterate(struct net *net,
-                           void (*iter)(struct nf_flowtable *flowtable, void *data),
-                           void *data)
-{
-       struct nft_flowtable *flowtable;
-       const struct nft_table *table;
-
-       nfnl_lock(NFNL_SUBSYS_NFTABLES);
-       list_for_each_entry(table, &net->nft.tables, list) {
-               list_for_each_entry(flowtable, &table->flowtables, list) {
-                       iter(&flowtable->data, data);
-               }
-       }
-       nfnl_unlock(NFNL_SUBSYS_NFTABLES);
-}
-EXPORT_SYMBOL_GPL(nft_flow_table_iterate);
-
 static void nft_unregister_flowtable_net_hooks(struct net *net,
                                               struct nft_flowtable *flowtable)
 {
@@ -5110,20 +5206,26 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
            !nla[NFTA_FLOWTABLE_HOOK])
                return -EINVAL;
 
-       table = nf_tables_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE],
-                                      family, genmask);
-       if (IS_ERR(table))
+       table = nft_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE], family,
+                                genmask);
+       if (IS_ERR(table)) {
+               NL_SET_BAD_ATTR(extack, nla[NFTA_FLOWTABLE_TABLE]);
                return PTR_ERR(table);
+       }
 
-       flowtable = nf_tables_flowtable_lookup(table, nla[NFTA_FLOWTABLE_NAME],
-                                              genmask);
+       flowtable = nft_flowtable_lookup(table, nla[NFTA_FLOWTABLE_NAME],
+                                        genmask);
        if (IS_ERR(flowtable)) {
                err = PTR_ERR(flowtable);
-               if (err != -ENOENT)
+               if (err != -ENOENT) {
+                       NL_SET_BAD_ATTR(extack, nla[NFTA_FLOWTABLE_NAME]);
                        return err;
+               }
        } else {
-               if (nlh->nlmsg_flags & NLM_F_EXCL)
+               if (nlh->nlmsg_flags & NLM_F_EXCL) {
+                       NL_SET_BAD_ATTR(extack, nla[NFTA_FLOWTABLE_NAME]);
                        return -EEXIST;
+               }
 
                return 0;
        }
@@ -5150,14 +5252,14 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
        }
 
        flowtable->data.type = type;
-       err = rhashtable_init(&flowtable->data.rhashtable, type->params);
+       err = type->init(&flowtable->data);
        if (err < 0)
                goto err3;
 
        err = nf_tables_flowtable_parse_hook(&ctx, nla[NFTA_FLOWTABLE_HOOK],
                                             flowtable);
        if (err < 0)
-               goto err3;
+               goto err4;
 
        for (i = 0; i < flowtable->ops_len; i++) {
                if (!flowtable->ops[i].dev)
@@ -5171,37 +5273,35 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
                                if (flowtable->ops[i].dev == ft->ops[k].dev &&
                                    flowtable->ops[i].pf == ft->ops[k].pf) {
                                        err = -EBUSY;
-                                       goto err4;
+                                       goto err5;
                                }
                        }
                }
 
                err = nf_register_net_hook(net, &flowtable->ops[i]);
                if (err < 0)
-                       goto err4;
+                       goto err5;
        }
 
        err = nft_trans_flowtable_add(&ctx, NFT_MSG_NEWFLOWTABLE, flowtable);
        if (err < 0)
-               goto err5;
-
-       INIT_DEFERRABLE_WORK(&flowtable->data.gc_work, type->gc);
-       queue_delayed_work(system_power_efficient_wq,
-                          &flowtable->data.gc_work, HZ);
+               goto err6;
 
        list_add_tail_rcu(&flowtable->list, &table->flowtables);
        table->use++;
 
        return 0;
-err5:
+err6:
        i = flowtable->ops_len;
-err4:
+err5:
        for (k = i - 1; k >= 0; k--) {
                kfree(flowtable->dev_name[k]);
                nf_unregister_net_hook(net, &flowtable->ops[k]);
        }
 
        kfree(flowtable->ops);
+err4:
+       flowtable->data.type->free(&flowtable->data);
 err3:
        module_put(type->owner);
 err2:
@@ -5221,6 +5321,7 @@ static int nf_tables_delflowtable(struct net *net, struct sock *nlsk,
        u8 genmask = nft_genmask_next(net);
        int family = nfmsg->nfgen_family;
        struct nft_flowtable *flowtable;
+       const struct nlattr *attr;
        struct nft_table *table;
        struct nft_ctx ctx;
 
@@ -5229,23 +5330,29 @@ static int nf_tables_delflowtable(struct net *net, struct sock *nlsk,
             !nla[NFTA_FLOWTABLE_HANDLE]))
                return -EINVAL;
 
-       table = nf_tables_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE],
-                                      family, genmask);
-       if (IS_ERR(table))
+       table = nft_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE], family,
+                                genmask);
+       if (IS_ERR(table)) {
+               NL_SET_BAD_ATTR(extack, nla[NFTA_FLOWTABLE_TABLE]);
                return PTR_ERR(table);
+       }
 
-       if (nla[NFTA_FLOWTABLE_HANDLE])
-               flowtable = nf_tables_flowtable_lookup_byhandle(table,
-                                                               nla[NFTA_FLOWTABLE_HANDLE],
-                                                               genmask);
-       else
-               flowtable = nf_tables_flowtable_lookup(table,
-                                                      nla[NFTA_FLOWTABLE_NAME],
-                                                      genmask);
-       if (IS_ERR(flowtable))
-                return PTR_ERR(flowtable);
-       if (flowtable->use > 0)
+       if (nla[NFTA_FLOWTABLE_HANDLE]) {
+               attr = nla[NFTA_FLOWTABLE_HANDLE];
+               flowtable = nft_flowtable_lookup_byhandle(table, attr, genmask);
+       } else {
+               attr = nla[NFTA_FLOWTABLE_NAME];
+               flowtable = nft_flowtable_lookup(table, attr, genmask);
+       }
+
+       if (IS_ERR(flowtable)) {
+               NL_SET_BAD_ATTR(extack, attr);
+               return PTR_ERR(flowtable);
+       }
+       if (flowtable->use > 0) {
+               NL_SET_BAD_ATTR(extack, attr);
                return -EBUSY;
+       }
 
        nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
 
@@ -5426,13 +5533,13 @@ static int nf_tables_getflowtable(struct net *net, struct sock *nlsk,
        if (!nla[NFTA_FLOWTABLE_NAME])
                return -EINVAL;
 
-       table = nf_tables_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE],
-                                      family, genmask);
+       table = nft_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE], family,
+                                genmask);
        if (IS_ERR(table))
                return PTR_ERR(table);
 
-       flowtable = nf_tables_flowtable_lookup(table, nla[NFTA_FLOWTABLE_NAME],
-                                              genmask);
+       flowtable = nft_flowtable_lookup(table, nla[NFTA_FLOWTABLE_NAME],
+                                        genmask);
        if (IS_ERR(flowtable))
                return PTR_ERR(flowtable);
 
@@ -5485,11 +5592,9 @@ static void nf_tables_flowtable_notify(struct nft_ctx *ctx,
 
 static void nf_tables_flowtable_destroy(struct nft_flowtable *flowtable)
 {
-       cancel_delayed_work_sync(&flowtable->data.gc_work);
        kfree(flowtable->ops);
        kfree(flowtable->name);
        flowtable->data.type->free(&flowtable->data);
-       rhashtable_destroy(&flowtable->data.rhashtable);
        module_put(flowtable->data.type->owner);
 }
 
@@ -5738,7 +5843,7 @@ static void nft_chain_commit_update(struct nft_trans *trans)
        struct nft_base_chain *basechain;
 
        if (nft_trans_chain_name(trans))
-               strcpy(trans->ctx.chain->name, nft_trans_chain_name(trans));
+               swap(trans->ctx.chain->name, nft_trans_chain_name(trans));
 
        if (!nft_is_base_chain(trans->ctx.chain))
                return;
@@ -5754,7 +5859,7 @@ static void nft_chain_commit_update(struct nft_trans *trans)
        }
 }
 
-static void nf_tables_commit_release(struct nft_trans *trans)
+static void nft_commit_release(struct nft_trans *trans)
 {
        switch (trans->msg_type) {
        case NFT_MSG_DELTABLE:
@@ -5783,6 +5888,21 @@ static void nf_tables_commit_release(struct nft_trans *trans)
        kfree(trans);
 }
 
+static void nf_tables_commit_release(struct net *net)
+{
+       struct nft_trans *trans, *next;
+
+       if (list_empty(&net->nft.commit_list))
+               return;
+
+       synchronize_rcu();
+
+       list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
+               list_del(&trans->list);
+               nft_commit_release(trans);
+       }
+}
+
 static int nf_tables_commit(struct net *net, struct sk_buff *skb)
 {
        struct nft_trans *trans, *next;
@@ -5913,13 +6033,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
                }
        }
 
-       synchronize_rcu();
-
-       list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
-               list_del(&trans->list);
-               nf_tables_commit_release(trans);
-       }
-
+       nf_tables_commit_release(net);
        nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN);
 
        return 0;
@@ -5999,10 +6113,12 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb)
                case NFT_MSG_NEWRULE:
                        trans->ctx.chain->use--;
                        list_del_rcu(&nft_trans_rule(trans)->list);
+                       nft_rule_expr_deactivate(&trans->ctx, nft_trans_rule(trans));
                        break;
                case NFT_MSG_DELRULE:
                        trans->ctx.chain->use++;
                        nft_clear(trans->ctx.net, nft_trans_rule(trans));
+                       nft_rule_expr_activate(&trans->ctx, nft_trans_rule(trans));
                        nft_trans_destroy(trans);
                        break;
                case NFT_MSG_NEWSET:
@@ -6403,8 +6519,8 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
        case NFT_GOTO:
                if (!tb[NFTA_VERDICT_CHAIN])
                        return -EINVAL;
-               chain = nf_tables_chain_lookup(ctx->table,
-                                              tb[NFTA_VERDICT_CHAIN], genmask);
+               chain = nft_chain_lookup(ctx->table, tb[NFTA_VERDICT_CHAIN],
+                                        genmask);
                if (IS_ERR(chain))
                        return PTR_ERR(chain);
                if (nft_is_base_chain(chain))
@@ -6578,7 +6694,7 @@ int __nft_release_basechain(struct nft_ctx *ctx)
        list_for_each_entry_safe(rule, nr, &ctx->chain->rules, list) {
                list_del(&rule->list);
                ctx->chain->use--;
-               nf_tables_rule_destroy(ctx, rule);
+               nf_tables_rule_release(ctx, rule);
        }
        list_del(&ctx->chain->list);
        ctx->table->use--;
@@ -6616,7 +6732,7 @@ static void __nft_release_tables(struct net *net)
                        list_for_each_entry_safe(rule, nr, &chain->rules, list) {
                                list_del(&rule->list);
                                chain->use--;
-                               nf_tables_rule_destroy(&ctx, rule);
+                               nf_tables_rule_release(&ctx, rule);
                        }
                }
                list_for_each_entry_safe(flowtable, nf, &table->flowtables, list) {
index dfd0bf3810d2e81690a4a2d3ac5648cb8d881090..ebb9799350ed62ae37c49a50a76c980fb26f5913 100644 (file)
@@ -119,15 +119,22 @@ DEFINE_STATIC_KEY_FALSE(nft_counters_enabled);
 static noinline void nft_update_chain_stats(const struct nft_chain *chain,
                                            const struct nft_pktinfo *pkt)
 {
+       struct nft_base_chain *base_chain;
        struct nft_stats *stats;
 
-       local_bh_disable();
-       stats = this_cpu_ptr(rcu_dereference(nft_base_chain(chain)->stats));
-       u64_stats_update_begin(&stats->syncp);
-       stats->pkts++;
-       stats->bytes += pkt->skb->len;
-       u64_stats_update_end(&stats->syncp);
-       local_bh_enable();
+       base_chain = nft_base_chain(chain);
+       if (!base_chain->stats)
+               return;
+
+       stats = this_cpu_ptr(rcu_dereference(base_chain->stats));
+       if (stats) {
+               local_bh_disable();
+               u64_stats_update_begin(&stats->syncp);
+               stats->pkts++;
+               stats->bytes += pkt->skb->len;
+               u64_stats_update_end(&stats->syncp);
+               local_bh_enable();
+       }
 }
 
 struct nft_jumpstack {
@@ -251,6 +258,9 @@ static struct nft_expr_type *nft_basic_types[] = {
        &nft_payload_type,
        &nft_dynset_type,
        &nft_range_type,
+       &nft_meta_type,
+       &nft_rt_type,
+       &nft_exthdr_type,
 };
 
 int __init nf_tables_core_module_init(void)
index b9505bcd3827d9604bfcf4371e807ede4f09f93f..6ddf89183e7b47e6c029b28cf5b524c73a790498 100644 (file)
@@ -115,7 +115,7 @@ static int nfnl_acct_new(struct net *net, struct sock *nfnl,
                nfacct->flags = flags;
        }
 
-       strncpy(nfacct->name, nla_data(tb[NFACCT_NAME]), NFACCT_NAME_MAX);
+       nla_strlcpy(nfacct->name, nla_data(tb[NFACCT_NAME]), NFACCT_NAME_MAX);
 
        if (tb[NFACCT_BYTES]) {
                atomic64_set(&nfacct->bytes,
index 4a4b293fb2e54cbcf5df1ccd1c2b9950770bf4d8..fa026b269b3691d5186e28020eb2b08e93dc3679 100644 (file)
@@ -149,8 +149,8 @@ nfnl_cthelper_expect_policy(struct nf_conntrack_expect_policy *expect_policy,
            !tb[NFCTH_POLICY_EXPECT_TIMEOUT])
                return -EINVAL;
 
-       strncpy(expect_policy->name,
-               nla_data(tb[NFCTH_POLICY_NAME]), NF_CT_HELPER_NAME_LEN);
+       nla_strlcpy(expect_policy->name,
+                   nla_data(tb[NFCTH_POLICY_NAME]), NF_CT_HELPER_NAME_LEN);
        expect_policy->max_expected =
                ntohl(nla_get_be32(tb[NFCTH_POLICY_EXPECT_MAX]));
        if (expect_policy->max_expected > NF_CT_EXPECT_MAX_CNT)
@@ -234,7 +234,8 @@ nfnl_cthelper_create(const struct nlattr * const tb[],
        if (ret < 0)
                goto err1;
 
-       strncpy(helper->name, nla_data(tb[NFCTH_NAME]), NF_CT_HELPER_NAME_LEN);
+       nla_strlcpy(helper->name,
+                   nla_data(tb[NFCTH_NAME]), NF_CT_HELPER_NAME_LEN);
        size = ntohl(nla_get_be32(tb[NFCTH_PRIV_DATA_LEN]));
        if (size > FIELD_SIZEOF(struct nf_conn_help, data)) {
                ret = -ENOMEM;
index 7b46aa4c478d35a0a94d2214ffcb2441c9f9c582..e5cc4d9b9ce708ff22053d999ae7645c9059364c 100644 (file)
@@ -37,7 +37,6 @@
 #include <net/sock.h>
 #include <net/netfilter/nf_log.h>
 #include <net/netns/generic.h>
-#include <net/netfilter/nfnetlink_log.h>
 
 #include <linux/atomic.h>
 #include <linux/refcount.h>
@@ -47,6 +46,7 @@
 #include "../bridge/br_private.h"
 #endif
 
+#define NFULNL_COPY_DISABLED   0xff
 #define NFULNL_NLBUFSIZ_DEFAULT        NLMSG_GOODSIZE
 #define NFULNL_TIMEOUT_DEFAULT         100     /* every second */
 #define NFULNL_QTHRESH_DEFAULT         100     /* 100 packets */
@@ -618,7 +618,7 @@ static const struct nf_loginfo default_loginfo = {
 };
 
 /* log handler for internal netfilter logging api */
-void
+static void
 nfulnl_log_packet(struct net *net,
                  u_int8_t pf,
                  unsigned int hooknum,
@@ -633,7 +633,7 @@ nfulnl_log_packet(struct net *net,
        struct nfulnl_instance *inst;
        const struct nf_loginfo *li;
        unsigned int qthreshold;
-       unsigned int plen;
+       unsigned int plen = 0;
        struct nfnl_log_net *log = nfnl_log_pernet(net);
        const struct nfnl_ct_hook *nfnl_ct = NULL;
        struct nf_conn *ct = NULL;
@@ -648,7 +648,6 @@ nfulnl_log_packet(struct net *net,
        if (!inst)
                return;
 
-       plen = 0;
        if (prefix)
                plen = strlen(prefix) + 1;
 
@@ -760,7 +759,6 @@ nfulnl_log_packet(struct net *net,
        /* FIXME: statistics */
        goto unlock_and_release;
 }
-EXPORT_SYMBOL_GPL(nfulnl_log_packet);
 
 static int
 nfulnl_rcv_nl_event(struct notifier_block *this,
index 8e23726b90810d3a7d49c145e5b735245d669b57..1d99a1efdafcda5e209eadb975c4e53c859d5495 100644 (file)
@@ -27,14 +27,31 @@ struct nft_xt {
        struct list_head        head;
        struct nft_expr_ops     ops;
        unsigned int            refcnt;
+
+       /* Unlike other expressions, ops doesn't have static storage duration.
+        * nft core assumes they do.  We use kfree_rcu so that nft core can
+        * can check expr->ops->size even after nft_compat->destroy() frees
+        * the nft_xt struct that holds the ops structure.
+        */
+       struct rcu_head         rcu_head;
+};
+
+/* Used for matches where *info is larger than X byte */
+#define NFT_MATCH_LARGE_THRESH 192
+
+struct nft_xt_match_priv {
+       void *info;
 };
 
-static void nft_xt_put(struct nft_xt *xt)
+static bool nft_xt_put(struct nft_xt *xt)
 {
        if (--xt->refcnt == 0) {
                list_del(&xt->head);
-               kfree(xt);
+               kfree_rcu(xt, rcu_head);
+               return true;
        }
+
+       return false;
 }
 
 static int nft_compat_chain_validate_dependency(const char *tablename,
@@ -226,6 +243,7 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
        struct xt_target *target = expr->ops->data;
        struct xt_tgchk_param par;
        size_t size = XT_ALIGN(nla_len(tb[NFTA_TARGET_INFO]));
+       struct nft_xt *nft_xt;
        u16 proto = 0;
        bool inv = false;
        union nft_entry e = {};
@@ -236,25 +254,22 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
        if (ctx->nla[NFTA_RULE_COMPAT]) {
                ret = nft_parse_compat(ctx->nla[NFTA_RULE_COMPAT], &proto, &inv);
                if (ret < 0)
-                       goto err;
+                       return ret;
        }
 
        nft_target_set_tgchk_param(&par, ctx, target, info, &e, proto, inv);
 
        ret = xt_check_target(&par, size, proto, inv);
        if (ret < 0)
-               goto err;
+               return ret;
 
        /* The standard target cannot be used */
-       if (target->target == NULL) {
-               ret = -EINVAL;
-               goto err;
-       }
+       if (!target->target)
+               return -EINVAL;
 
+       nft_xt = container_of(expr->ops, struct nft_xt, ops);
+       nft_xt->refcnt++;
        return 0;
-err:
-       module_put(target->me);
-       return ret;
 }
 
 static void
@@ -271,8 +286,8 @@ nft_target_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
        if (par.target->destroy != NULL)
                par.target->destroy(&par);
 
-       nft_xt_put(container_of(expr->ops, struct nft_xt, ops));
-       module_put(target->me);
+       if (nft_xt_put(container_of(expr->ops, struct nft_xt, ops)))
+               module_put(target->me);
 }
 
 static int nft_target_dump(struct sk_buff *skb, const struct nft_expr *expr)
@@ -316,11 +331,11 @@ static int nft_target_validate(const struct nft_ctx *ctx,
        return 0;
 }
 
-static void nft_match_eval(const struct nft_expr *expr,
-                          struct nft_regs *regs,
-                          const struct nft_pktinfo *pkt)
+static void __nft_match_eval(const struct nft_expr *expr,
+                            struct nft_regs *regs,
+                            const struct nft_pktinfo *pkt,
+                            void *info)
 {
-       void *info = nft_expr_priv(expr);
        struct xt_match *match = expr->ops->data;
        struct sk_buff *skb = pkt->skb;
        bool ret;
@@ -344,6 +359,22 @@ static void nft_match_eval(const struct nft_expr *expr,
        }
 }
 
+static void nft_match_large_eval(const struct nft_expr *expr,
+                                struct nft_regs *regs,
+                                const struct nft_pktinfo *pkt)
+{
+       struct nft_xt_match_priv *priv = nft_expr_priv(expr);
+
+       __nft_match_eval(expr, regs, pkt, priv->info);
+}
+
+static void nft_match_eval(const struct nft_expr *expr,
+                          struct nft_regs *regs,
+                          const struct nft_pktinfo *pkt)
+{
+       __nft_match_eval(expr, regs, pkt, nft_expr_priv(expr));
+}
+
 static const struct nla_policy nft_match_policy[NFTA_MATCH_MAX + 1] = {
        [NFTA_MATCH_NAME]       = { .type = NLA_NUL_STRING },
        [NFTA_MATCH_REV]        = { .type = NLA_U32 },
@@ -404,13 +435,14 @@ static void match_compat_from_user(struct xt_match *m, void *in, void *out)
 }
 
 static int
-nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
-               const struct nlattr * const tb[])
+__nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+                const struct nlattr * const tb[],
+                void *info)
 {
-       void *info = nft_expr_priv(expr);
        struct xt_match *match = expr->ops->data;
        struct xt_mtchk_param par;
        size_t size = XT_ALIGN(nla_len(tb[NFTA_MATCH_INFO]));
+       struct nft_xt *nft_xt;
        u16 proto = 0;
        bool inv = false;
        union nft_entry e = {};
@@ -421,26 +453,50 @@ nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
        if (ctx->nla[NFTA_RULE_COMPAT]) {
                ret = nft_parse_compat(ctx->nla[NFTA_RULE_COMPAT], &proto, &inv);
                if (ret < 0)
-                       goto err;
+                       return ret;
        }
 
        nft_match_set_mtchk_param(&par, ctx, match, info, &e, proto, inv);
 
        ret = xt_check_match(&par, size, proto, inv);
        if (ret < 0)
-               goto err;
+               return ret;
 
+       nft_xt = container_of(expr->ops, struct nft_xt, ops);
+       nft_xt->refcnt++;
        return 0;
-err:
-       module_put(match->me);
+}
+
+static int
+nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+              const struct nlattr * const tb[])
+{
+       return __nft_match_init(ctx, expr, tb, nft_expr_priv(expr));
+}
+
+static int
+nft_match_large_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+                    const struct nlattr * const tb[])
+{
+       struct nft_xt_match_priv *priv = nft_expr_priv(expr);
+       struct xt_match *m = expr->ops->data;
+       int ret;
+
+       priv->info = kmalloc(XT_ALIGN(m->matchsize), GFP_KERNEL);
+       if (!priv->info)
+               return -ENOMEM;
+
+       ret = __nft_match_init(ctx, expr, tb, priv->info);
+       if (ret)
+               kfree(priv->info);
        return ret;
 }
 
 static void
-nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
+__nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr,
+                   void *info)
 {
        struct xt_match *match = expr->ops->data;
-       void *info = nft_expr_priv(expr);
        struct xt_mtdtor_param par;
 
        par.net = ctx->net;
@@ -450,13 +506,28 @@ nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
        if (par.match->destroy != NULL)
                par.match->destroy(&par);
 
-       nft_xt_put(container_of(expr->ops, struct nft_xt, ops));
-       module_put(match->me);
+       if (nft_xt_put(container_of(expr->ops, struct nft_xt, ops)))
+               module_put(match->me);
 }
 
-static int nft_match_dump(struct sk_buff *skb, const struct nft_expr *expr)
+static void
+nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
+{
+       __nft_match_destroy(ctx, expr, nft_expr_priv(expr));
+}
+
+static void
+nft_match_large_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
+{
+       struct nft_xt_match_priv *priv = nft_expr_priv(expr);
+
+       __nft_match_destroy(ctx, expr, priv->info);
+       kfree(priv->info);
+}
+
+static int __nft_match_dump(struct sk_buff *skb, const struct nft_expr *expr,
+                           void *info)
 {
-       void *info = nft_expr_priv(expr);
        struct xt_match *match = expr->ops->data;
 
        if (nla_put_string(skb, NFTA_MATCH_NAME, match->name) ||
@@ -470,6 +541,18 @@ static int nft_match_dump(struct sk_buff *skb, const struct nft_expr *expr)
        return -1;
 }
 
+static int nft_match_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+       return __nft_match_dump(skb, expr, nft_expr_priv(expr));
+}
+
+static int nft_match_large_dump(struct sk_buff *skb, const struct nft_expr *e)
+{
+       struct nft_xt_match_priv *priv = nft_expr_priv(e);
+
+       return __nft_match_dump(skb, e, priv->info);
+}
+
 static int nft_match_validate(const struct nft_ctx *ctx,
                              const struct nft_expr *expr,
                              const struct nft_data **data)
@@ -637,6 +720,7 @@ nft_match_select_ops(const struct nft_ctx *ctx,
 {
        struct nft_xt *nft_match;
        struct xt_match *match;
+       unsigned int matchsize;
        char *mt_name;
        u32 rev, family;
        int err;
@@ -654,13 +738,8 @@ nft_match_select_ops(const struct nft_ctx *ctx,
        list_for_each_entry(nft_match, &nft_match_list, head) {
                struct xt_match *match = nft_match->ops.data;
 
-               if (nft_match_cmp(match, mt_name, rev, family)) {
-                       if (!try_module_get(match->me))
-                               return ERR_PTR(-ENOENT);
-
-                       nft_match->refcnt++;
+               if (nft_match_cmp(match, mt_name, rev, family))
                        return &nft_match->ops;
-               }
        }
 
        match = xt_request_find_match(family, mt_name, rev);
@@ -679,9 +758,8 @@ nft_match_select_ops(const struct nft_ctx *ctx,
                goto err;
        }
 
-       nft_match->refcnt = 1;
+       nft_match->refcnt = 0;
        nft_match->ops.type = &nft_match_type;
-       nft_match->ops.size = NFT_EXPR_SIZE(XT_ALIGN(match->matchsize));
        nft_match->ops.eval = nft_match_eval;
        nft_match->ops.init = nft_match_init;
        nft_match->ops.destroy = nft_match_destroy;
@@ -689,6 +767,18 @@ nft_match_select_ops(const struct nft_ctx *ctx,
        nft_match->ops.validate = nft_match_validate;
        nft_match->ops.data = match;
 
+       matchsize = NFT_EXPR_SIZE(XT_ALIGN(match->matchsize));
+       if (matchsize > NFT_MATCH_LARGE_THRESH) {
+               matchsize = NFT_EXPR_SIZE(sizeof(struct nft_xt_match_priv));
+
+               nft_match->ops.eval = nft_match_large_eval;
+               nft_match->ops.init = nft_match_large_init;
+               nft_match->ops.destroy = nft_match_large_destroy;
+               nft_match->ops.dump = nft_match_large_dump;
+       }
+
+       nft_match->ops.size = matchsize;
+
        list_add(&nft_match->head, &nft_match_list);
 
        return &nft_match->ops;
@@ -739,13 +829,8 @@ nft_target_select_ops(const struct nft_ctx *ctx,
        list_for_each_entry(nft_target, &nft_target_list, head) {
                struct xt_target *target = nft_target->ops.data;
 
-               if (nft_target_cmp(target, tg_name, rev, family)) {
-                       if (!try_module_get(target->me))
-                               return ERR_PTR(-ENOENT);
-
-                       nft_target->refcnt++;
+               if (nft_target_cmp(target, tg_name, rev, family))
                        return &nft_target->ops;
-               }
        }
 
        target = xt_request_find_target(family, tg_name, rev);
@@ -764,7 +849,7 @@ nft_target_select_ops(const struct nft_ctx *ctx,
                goto err;
        }
 
-       nft_target->refcnt = 1;
+       nft_target->refcnt = 0;
        nft_target->ops.type = &nft_target_type;
        nft_target->ops.size = NFT_EXPR_SIZE(XT_ALIGN(target->targetsize));
        nft_target->ops.init = nft_target_init;
@@ -823,6 +908,32 @@ static int __init nft_compat_module_init(void)
 
 static void __exit nft_compat_module_exit(void)
 {
+       struct nft_xt *xt, *next;
+
+       /* list should be empty here, it can be non-empty only in case there
+        * was an error that caused nft_xt expr to not be initialized fully
+        * and noone else requested the same expression later.
+        *
+        * In this case, the lists contain 0-refcount entries that still
+        * hold module reference.
+        */
+       list_for_each_entry_safe(xt, next, &nft_target_list, head) {
+               struct xt_target *target = xt->ops.data;
+
+               if (WARN_ON_ONCE(xt->refcnt))
+                       continue;
+               module_put(target->me);
+               kfree(xt);
+       }
+
+       list_for_each_entry_safe(xt, next, &nft_match_list, head) {
+               struct xt_match *match = xt->ops.data;
+
+               if (WARN_ON_ONCE(xt->refcnt))
+                       continue;
+               module_put(match->me);
+               kfree(xt);
+       }
        nfnetlink_subsys_unregister(&nfnl_compat_subsys);
        nft_unregister_expr(&nft_target_type);
        nft_unregister_expr(&nft_match_type);
index 04863fad05ddd23c33f679232fd926ac57888590..b07a3fd9eeea3e73446c9e9533993dbc3fb30c33 100644 (file)
@@ -36,7 +36,7 @@ static void *nft_dynset_new(struct nft_set *set, const struct nft_expr *expr,
        u64 timeout;
        void *elem;
 
-       if (set->size && !atomic_add_unless(&set->nelems, 1, set->size))
+       if (!atomic_add_unless(&set->nelems, 1, set->size))
                return NULL;
 
        timeout = priv->timeout ? : set->timeout;
@@ -81,7 +81,7 @@ static void nft_dynset_eval(const struct nft_expr *expr,
                if (priv->op == NFT_DYNSET_OP_UPDATE &&
                    nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) {
                        timeout = priv->timeout ? : set->timeout;
-                       *nft_set_ext_expiration(ext) = jiffies + timeout;
+                       *nft_set_ext_expiration(ext) = get_jiffies_64() + timeout;
                }
 
                if (sexpr != NULL)
@@ -216,6 +216,9 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
        if (err < 0)
                goto err1;
 
+       if (set->size == 0)
+               set->size = 0xffff;
+
        priv->set = set;
        return 0;
 
index 47ec1046ad11536e337f709d1f41a267a77cf1d3..a940c9fd9045e5495a42d93f76db26283c6de5f1 100644 (file)
 
 #include <asm/unaligned.h>
 #include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/module.h>
 #include <linux/netlink.h>
 #include <linux/netfilter.h>
 #include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
 #include <net/netfilter/nf_tables.h>
 #include <net/tcp.h>
 
@@ -353,7 +352,6 @@ static int nft_exthdr_dump_set(struct sk_buff *skb, const struct nft_expr *expr)
        return nft_exthdr_dump_common(skb, priv);
 }
 
-static struct nft_expr_type nft_exthdr_type;
 static const struct nft_expr_ops nft_exthdr_ipv6_ops = {
        .type           = &nft_exthdr_type,
        .size           = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)),
@@ -407,27 +405,10 @@ nft_exthdr_select_ops(const struct nft_ctx *ctx,
        return ERR_PTR(-EOPNOTSUPP);
 }
 
-static struct nft_expr_type nft_exthdr_type __read_mostly = {
+struct nft_expr_type nft_exthdr_type __read_mostly = {
        .name           = "exthdr",
        .select_ops     = nft_exthdr_select_ops,
        .policy         = nft_exthdr_policy,
        .maxattr        = NFTA_EXTHDR_MAX,
        .owner          = THIS_MODULE,
 };
-
-static int __init nft_exthdr_module_init(void)
-{
-       return nft_register_expr(&nft_exthdr_type);
-}
-
-static void __exit nft_exthdr_module_exit(void)
-{
-       nft_unregister_expr(&nft_exthdr_type);
-}
-
-module_init(nft_exthdr_module_init);
-module_exit(nft_exthdr_module_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_ALIAS_NFT_EXPR("exthdr");
index b65829b2be2286cf8eef82f7e071ec7a83be352c..d6bab8c3cbb0297a047852d15ef9d50f09fbb0ef 100644 (file)
@@ -142,9 +142,8 @@ static int nft_flow_offload_init(const struct nft_ctx *ctx,
        if (!tb[NFTA_FLOW_TABLE_NAME])
                return -EINVAL;
 
-       flowtable = nf_tables_flowtable_lookup(ctx->table,
-                                              tb[NFTA_FLOW_TABLE_NAME],
-                                              genmask);
+       flowtable = nft_flowtable_lookup(ctx->table, tb[NFTA_FLOW_TABLE_NAME],
+                                        genmask);
        if (IS_ERR(flowtable))
                return PTR_ERR(flowtable);
 
index 24f2f7567ddb779af44ee5050df36292f413d4d6..e235c17f1b8b7211d33d95f5a348bc7a6ed2046f 100644 (file)
@@ -97,7 +97,7 @@ static int nft_jhash_init(const struct nft_ctx *ctx,
        priv->len = len;
 
        priv->modulus = ntohl(nla_get_be32(tb[NFTA_HASH_MODULUS]));
-       if (priv->modulus <= 1)
+       if (priv->modulus < 1)
                return -ERANGE;
 
        if (priv->offset + priv->modulus - 1 < priv->offset)
index 4717d77969271c324087ed7677df636b414e54ad..aa87ff8beae82cf733303b1b32d8b50ba5af65b7 100644 (file)
@@ -69,8 +69,16 @@ static int nft_immediate_init(const struct nft_ctx *ctx,
        return err;
 }
 
-static void nft_immediate_destroy(const struct nft_ctx *ctx,
-                                 const struct nft_expr *expr)
+static void nft_immediate_activate(const struct nft_ctx *ctx,
+                                  const struct nft_expr *expr)
+{
+       const struct nft_immediate_expr *priv = nft_expr_priv(expr);
+
+       return nft_data_hold(&priv->data, nft_dreg_to_type(priv->dreg));
+}
+
+static void nft_immediate_deactivate(const struct nft_ctx *ctx,
+                                    const struct nft_expr *expr)
 {
        const struct nft_immediate_expr *priv = nft_expr_priv(expr);
 
@@ -108,7 +116,8 @@ static const struct nft_expr_ops nft_imm_ops = {
        .size           = NFT_EXPR_SIZE(sizeof(struct nft_immediate_expr)),
        .eval           = nft_immediate_eval,
        .init           = nft_immediate_init,
-       .destroy        = nft_immediate_destroy,
+       .activate       = nft_immediate_activate,
+       .deactivate     = nft_immediate_deactivate,
        .dump           = nft_immediate_dump,
        .validate       = nft_immediate_validate,
 };
index 8fb91940e2e7246db0cb0ff9e307dda56cc60f88..5348bd058c885a31d81754a374185dae3b36f2cb 100644 (file)
@@ -1,5 +1,7 @@
 /*
  * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2014 Intel Corporation
+ * Author: Tomasz Bursztyka <tomasz.bursztyka@linux.intel.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -9,8 +11,6 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/module.h>
 #include <linux/netlink.h>
 #include <linux/netfilter.h>
 #include <linux/netfilter/nf_tables.h>
 #include <net/tcp_states.h> /* for TCP_TIME_WAIT */
 #include <net/netfilter/nf_tables.h>
 #include <net/netfilter/nf_tables_core.h>
-#include <net/netfilter/nft_meta.h>
 
 #include <uapi/linux/netfilter_bridge.h> /* NF_BR_PRE_ROUTING */
 
+struct nft_meta {
+       enum nft_meta_keys      key:8;
+       union {
+               enum nft_registers      dreg:8;
+               enum nft_registers      sreg:8;
+       };
+};
+
 static DEFINE_PER_CPU(struct rnd_state, nft_prandom_state);
 
-void nft_meta_get_eval(const struct nft_expr *expr,
-                      struct nft_regs *regs,
-                      const struct nft_pktinfo *pkt)
+#ifdef CONFIG_NF_TABLES_BRIDGE
+#include "../bridge/br_private.h"
+#endif
+
+static void nft_meta_get_eval(const struct nft_expr *expr,
+                             struct nft_regs *regs,
+                             const struct nft_pktinfo *pkt)
 {
        const struct nft_meta *priv = nft_expr_priv(expr);
        const struct sk_buff *skb = pkt->skb;
        const struct net_device *in = nft_in(pkt), *out = nft_out(pkt);
        struct sock *sk;
        u32 *dest = &regs->data[priv->dreg];
+#ifdef CONFIG_NF_TABLES_BRIDGE
+       const struct net_bridge_port *p;
+#endif
 
        switch (priv->key) {
        case NFT_META_LEN:
@@ -214,6 +228,18 @@ void nft_meta_get_eval(const struct nft_expr *expr,
        case NFT_META_SECPATH:
                nft_reg_store8(dest, !!skb->sp);
                break;
+#endif
+#ifdef CONFIG_NF_TABLES_BRIDGE
+       case NFT_META_BRI_IIFNAME:
+               if (in == NULL || (p = br_port_get_rcu(in)) == NULL)
+                       goto err;
+               strncpy((char *)dest, p->br->dev->name, IFNAMSIZ);
+               return;
+       case NFT_META_BRI_OIFNAME:
+               if (out == NULL || (p = br_port_get_rcu(out)) == NULL)
+                       goto err;
+               strncpy((char *)dest, p->br->dev->name, IFNAMSIZ);
+               return;
 #endif
        default:
                WARN_ON(1);
@@ -224,11 +250,10 @@ void nft_meta_get_eval(const struct nft_expr *expr,
 err:
        regs->verdict.code = NFT_BREAK;
 }
-EXPORT_SYMBOL_GPL(nft_meta_get_eval);
 
-void nft_meta_set_eval(const struct nft_expr *expr,
-                      struct nft_regs *regs,
-                      const struct nft_pktinfo *pkt)
+static void nft_meta_set_eval(const struct nft_expr *expr,
+                             struct nft_regs *regs,
+                              const struct nft_pktinfo *pkt)
 {
        const struct nft_meta *meta = nft_expr_priv(expr);
        struct sk_buff *skb = pkt->skb;
@@ -258,18 +283,16 @@ void nft_meta_set_eval(const struct nft_expr *expr,
                WARN_ON(1);
        }
 }
-EXPORT_SYMBOL_GPL(nft_meta_set_eval);
 
-const struct nla_policy nft_meta_policy[NFTA_META_MAX + 1] = {
+static const struct nla_policy nft_meta_policy[NFTA_META_MAX + 1] = {
        [NFTA_META_DREG]        = { .type = NLA_U32 },
        [NFTA_META_KEY]         = { .type = NLA_U32 },
        [NFTA_META_SREG]        = { .type = NLA_U32 },
 };
-EXPORT_SYMBOL_GPL(nft_meta_policy);
 
-int nft_meta_get_init(const struct nft_ctx *ctx,
-                     const struct nft_expr *expr,
-                     const struct nlattr * const tb[])
+static int nft_meta_get_init(const struct nft_ctx *ctx,
+                            const struct nft_expr *expr,
+                            const struct nlattr * const tb[])
 {
        struct nft_meta *priv = nft_expr_priv(expr);
        unsigned int len;
@@ -317,6 +340,14 @@ int nft_meta_get_init(const struct nft_ctx *ctx,
        case NFT_META_SECPATH:
                len = sizeof(u8);
                break;
+#endif
+#ifdef CONFIG_NF_TABLES_BRIDGE
+       case NFT_META_BRI_IIFNAME:
+       case NFT_META_BRI_OIFNAME:
+               if (ctx->family != NFPROTO_BRIDGE)
+                       return -EOPNOTSUPP;
+               len = IFNAMSIZ;
+               break;
 #endif
        default:
                return -EOPNOTSUPP;
@@ -326,7 +357,6 @@ int nft_meta_get_init(const struct nft_ctx *ctx,
        return nft_validate_register_store(ctx, priv->dreg, NULL,
                                           NFT_DATA_VALUE, len);
 }
-EXPORT_SYMBOL_GPL(nft_meta_get_init);
 
 static int nft_meta_get_validate(const struct nft_ctx *ctx,
                                 const struct nft_expr *expr,
@@ -360,9 +390,9 @@ static int nft_meta_get_validate(const struct nft_ctx *ctx,
 #endif
 }
 
-int nft_meta_set_validate(const struct nft_ctx *ctx,
-                         const struct nft_expr *expr,
-                         const struct nft_data **data)
+static int nft_meta_set_validate(const struct nft_ctx *ctx,
+                                const struct nft_expr *expr,
+                                const struct nft_data **data)
 {
        struct nft_meta *priv = nft_expr_priv(expr);
        unsigned int hooks;
@@ -388,11 +418,10 @@ int nft_meta_set_validate(const struct nft_ctx *ctx,
 
        return nft_chain_validate_hooks(ctx->chain, hooks);
 }
-EXPORT_SYMBOL_GPL(nft_meta_set_validate);
 
-int nft_meta_set_init(const struct nft_ctx *ctx,
-                     const struct nft_expr *expr,
-                     const struct nlattr * const tb[])
+static int nft_meta_set_init(const struct nft_ctx *ctx,
+                            const struct nft_expr *expr,
+                            const struct nlattr * const tb[])
 {
        struct nft_meta *priv = nft_expr_priv(expr);
        unsigned int len;
@@ -424,10 +453,9 @@ int nft_meta_set_init(const struct nft_ctx *ctx,
 
        return 0;
 }
-EXPORT_SYMBOL_GPL(nft_meta_set_init);
 
-int nft_meta_get_dump(struct sk_buff *skb,
-                     const struct nft_expr *expr)
+static int nft_meta_get_dump(struct sk_buff *skb,
+                            const struct nft_expr *expr)
 {
        const struct nft_meta *priv = nft_expr_priv(expr);
 
@@ -440,10 +468,8 @@ int nft_meta_get_dump(struct sk_buff *skb,
 nla_put_failure:
        return -1;
 }
-EXPORT_SYMBOL_GPL(nft_meta_get_dump);
 
-int nft_meta_set_dump(struct sk_buff *skb,
-                     const struct nft_expr *expr)
+static int nft_meta_set_dump(struct sk_buff *skb, const struct nft_expr *expr)
 {
        const struct nft_meta *priv = nft_expr_priv(expr);
 
@@ -457,19 +483,16 @@ int nft_meta_set_dump(struct sk_buff *skb,
 nla_put_failure:
        return -1;
 }
-EXPORT_SYMBOL_GPL(nft_meta_set_dump);
 
-void nft_meta_set_destroy(const struct nft_ctx *ctx,
-                         const struct nft_expr *expr)
+static void nft_meta_set_destroy(const struct nft_ctx *ctx,
+                                const struct nft_expr *expr)
 {
        const struct nft_meta *priv = nft_expr_priv(expr);
 
        if (priv->key == NFT_META_NFTRACE)
                static_branch_dec(&nft_trace_enabled);
 }
-EXPORT_SYMBOL_GPL(nft_meta_set_destroy);
 
-static struct nft_expr_type nft_meta_type;
 static const struct nft_expr_ops nft_meta_get_ops = {
        .type           = &nft_meta_type,
        .size           = NFT_EXPR_SIZE(sizeof(struct nft_meta)),
@@ -508,27 +531,10 @@ nft_meta_select_ops(const struct nft_ctx *ctx,
        return ERR_PTR(-EINVAL);
 }
 
-static struct nft_expr_type nft_meta_type __read_mostly = {
+struct nft_expr_type nft_meta_type __read_mostly = {
        .name           = "meta",
        .select_ops     = nft_meta_select_ops,
        .policy         = nft_meta_policy,
        .maxattr        = NFTA_META_MAX,
        .owner          = THIS_MODULE,
 };
-
-static int __init nft_meta_module_init(void)
-{
-       return nft_register_expr(&nft_meta_type);
-}
-
-static void __exit nft_meta_module_exit(void)
-{
-       nft_unregister_expr(&nft_meta_type);
-}
-
-module_init(nft_meta_module_init);
-module_exit(nft_meta_module_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_ALIAS_NFT_EXPR("meta");
index 1f36954c2ba917cbfae4143ee2a22afd71e82f84..c15807d10b912f6e63963e036fc486eb5ae1cd6e 100644 (file)
@@ -43,7 +43,7 @@ static void nft_nat_eval(const struct nft_expr *expr,
        const struct nft_nat *priv = nft_expr_priv(expr);
        enum ip_conntrack_info ctinfo;
        struct nf_conn *ct = nf_ct_get(pkt->skb, &ctinfo);
-       struct nf_nat_range range;
+       struct nf_nat_range2 range;
 
        memset(&range, 0, sizeof(range));
        if (priv->sreg_addr_min) {
index 5a3a52c71545ac15ae5c906dbf53d6d425c3c557..8a64db8f2e69d900adb1a98caf08587f30c00f62 100644 (file)
@@ -24,13 +24,11 @@ struct nft_ng_inc {
        u32                     modulus;
        atomic_t                counter;
        u32                     offset;
+       struct nft_set          *map;
 };
 
-static void nft_ng_inc_eval(const struct nft_expr *expr,
-                           struct nft_regs *regs,
-                           const struct nft_pktinfo *pkt)
+static u32 nft_ng_inc_gen(struct nft_ng_inc *priv)
 {
-       struct nft_ng_inc *priv = nft_expr_priv(expr);
        u32 nval, oval;
 
        do {
@@ -38,7 +36,36 @@ static void nft_ng_inc_eval(const struct nft_expr *expr,
                nval = (oval + 1 < priv->modulus) ? oval + 1 : 0;
        } while (atomic_cmpxchg(&priv->counter, oval, nval) != oval);
 
-       regs->data[priv->dreg] = nval + priv->offset;
+       return nval + priv->offset;
+}
+
+static void nft_ng_inc_eval(const struct nft_expr *expr,
+                           struct nft_regs *regs,
+                           const struct nft_pktinfo *pkt)
+{
+       struct nft_ng_inc *priv = nft_expr_priv(expr);
+
+       regs->data[priv->dreg] = nft_ng_inc_gen(priv);
+}
+
+static void nft_ng_inc_map_eval(const struct nft_expr *expr,
+                               struct nft_regs *regs,
+                               const struct nft_pktinfo *pkt)
+{
+       struct nft_ng_inc *priv = nft_expr_priv(expr);
+       const struct nft_set *map = priv->map;
+       const struct nft_set_ext *ext;
+       u32 result;
+       bool found;
+
+       result = nft_ng_inc_gen(priv);
+       found = map->ops->lookup(nft_net(pkt), map, &result, &ext);
+
+       if (!found)
+               return;
+
+       nft_data_copy(&regs->data[priv->dreg],
+                     nft_set_ext_data(ext), map->dlen);
 }
 
 static const struct nla_policy nft_ng_policy[NFTA_NG_MAX + 1] = {
@@ -46,6 +73,9 @@ static const struct nla_policy nft_ng_policy[NFTA_NG_MAX + 1] = {
        [NFTA_NG_MODULUS]       = { .type = NLA_U32 },
        [NFTA_NG_TYPE]          = { .type = NLA_U32 },
        [NFTA_NG_OFFSET]        = { .type = NLA_U32 },
+       [NFTA_NG_SET_NAME]      = { .type = NLA_STRING,
+                                   .len = NFT_SET_MAXNAMELEN - 1 },
+       [NFTA_NG_SET_ID]        = { .type = NLA_U32 },
 };
 
 static int nft_ng_inc_init(const struct nft_ctx *ctx,
@@ -71,6 +101,25 @@ static int nft_ng_inc_init(const struct nft_ctx *ctx,
                                           NFT_DATA_VALUE, sizeof(u32));
 }
 
+static int nft_ng_inc_map_init(const struct nft_ctx *ctx,
+                              const struct nft_expr *expr,
+                              const struct nlattr * const tb[])
+{
+       struct nft_ng_inc *priv = nft_expr_priv(expr);
+       u8 genmask = nft_genmask_next(ctx->net);
+
+       nft_ng_inc_init(ctx, expr, tb);
+
+       priv->map = nft_set_lookup_global(ctx->net, ctx->table,
+                                         tb[NFTA_NG_SET_NAME],
+                                         tb[NFTA_NG_SET_ID], genmask);
+
+       if (IS_ERR(priv->map))
+               return PTR_ERR(priv->map);
+
+       return 0;
+}
+
 static int nft_ng_dump(struct sk_buff *skb, enum nft_registers dreg,
                       u32 modulus, enum nft_ng_types type, u32 offset)
 {
@@ -97,6 +146,22 @@ static int nft_ng_inc_dump(struct sk_buff *skb, const struct nft_expr *expr)
                           priv->offset);
 }
 
+static int nft_ng_inc_map_dump(struct sk_buff *skb,
+                              const struct nft_expr *expr)
+{
+       const struct nft_ng_inc *priv = nft_expr_priv(expr);
+
+       if (nft_ng_dump(skb, priv->dreg, priv->modulus,
+                       NFT_NG_INCREMENTAL, priv->offset) ||
+           nla_put_string(skb, NFTA_NG_SET_NAME, priv->map->name))
+               goto nla_put_failure;
+
+       return 0;
+
+nla_put_failure:
+       return -1;
+}
+
 struct nft_ng_random {
        enum nft_registers      dreg:8;
        u32                     modulus;
@@ -156,6 +221,14 @@ static const struct nft_expr_ops nft_ng_inc_ops = {
        .dump           = nft_ng_inc_dump,
 };
 
+static const struct nft_expr_ops nft_ng_inc_map_ops = {
+       .type           = &nft_ng_type,
+       .size           = NFT_EXPR_SIZE(sizeof(struct nft_ng_inc)),
+       .eval           = nft_ng_inc_map_eval,
+       .init           = nft_ng_inc_map_init,
+       .dump           = nft_ng_inc_map_dump,
+};
+
 static const struct nft_expr_ops nft_ng_random_ops = {
        .type           = &nft_ng_type,
        .size           = NFT_EXPR_SIZE(sizeof(struct nft_ng_random)),
@@ -178,6 +251,8 @@ nft_ng_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[])
 
        switch (type) {
        case NFT_NG_INCREMENTAL:
+               if (tb[NFTA_NG_SET_NAME])
+                       return &nft_ng_inc_map_ops;
                return &nft_ng_inc_ops;
        case NFT_NG_RANDOM:
                return &nft_ng_random_ops;
index 0b02407773ad270eb0cea79e2ef2e32cf320cfa6..cdf348f751eca0c22018d99954d98aac66499d8d 100644 (file)
@@ -38,8 +38,8 @@ static int nft_objref_init(const struct nft_ctx *ctx,
                return -EINVAL;
 
        objtype = ntohl(nla_get_be32(tb[NFTA_OBJREF_IMM_TYPE]));
-       obj = nf_tables_obj_lookup(ctx->table, tb[NFTA_OBJREF_IMM_NAME], objtype,
-                                  genmask);
+       obj = nft_obj_lookup(ctx->table, tb[NFTA_OBJREF_IMM_NAME], objtype,
+                            genmask);
        if (IS_ERR(obj))
                return -ENOENT;
 
index 11a2071b6dd41feb6a2913105096b84331706b71..76dba9f6b6f627de7de1ada08320cc2ed5a12b24 100644 (file)
@@ -7,8 +7,6 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/module.h>
 #include <linux/netlink.h>
 #include <linux/netfilter.h>
 #include <linux/netfilter/nf_tables.h>
@@ -179,7 +177,6 @@ static int nft_rt_validate(const struct nft_ctx *ctx, const struct nft_expr *exp
        return nft_chain_validate_hooks(ctx->chain, hooks);
 }
 
-static struct nft_expr_type nft_rt_type;
 static const struct nft_expr_ops nft_rt_get_ops = {
        .type           = &nft_rt_type,
        .size           = NFT_EXPR_SIZE(sizeof(struct nft_rt)),
@@ -189,27 +186,10 @@ static const struct nft_expr_ops nft_rt_get_ops = {
        .validate       = nft_rt_validate,
 };
 
-static struct nft_expr_type nft_rt_type __read_mostly = {
+struct nft_expr_type nft_rt_type __read_mostly = {
        .name           = "rt",
        .ops            = &nft_rt_get_ops,
        .policy         = nft_rt_policy,
        .maxattr        = NFTA_RT_MAX,
        .owner          = THIS_MODULE,
 };
-
-static int __init nft_rt_module_init(void)
-{
-       return nft_register_expr(&nft_rt_type);
-}
-
-static void __exit nft_rt_module_exit(void)
-{
-       nft_unregister_expr(&nft_rt_type);
-}
-
-module_init(nft_rt_module_init);
-module_exit(nft_rt_module_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Anders K. Pedersen <akp@cohaesio.com>");
-MODULE_ALIAS_NFT_EXPR("rt");
index 45fb2752fb6373043cb81fa5dfd8043dc2b800cb..d6626e01c7ee6b0c25a2197f75309030edca34c6 100644 (file)
@@ -296,27 +296,23 @@ static bool nft_bitmap_estimate(const struct nft_set_desc *desc, u32 features,
        return true;
 }
 
-static struct nft_set_type nft_bitmap_type;
-static struct nft_set_ops nft_bitmap_ops __read_mostly = {
-       .type           = &nft_bitmap_type,
-       .privsize       = nft_bitmap_privsize,
-       .elemsize       = offsetof(struct nft_bitmap_elem, ext),
-       .estimate       = nft_bitmap_estimate,
-       .init           = nft_bitmap_init,
-       .destroy        = nft_bitmap_destroy,
-       .insert         = nft_bitmap_insert,
-       .remove         = nft_bitmap_remove,
-       .deactivate     = nft_bitmap_deactivate,
-       .flush          = nft_bitmap_flush,
-       .activate       = nft_bitmap_activate,
-       .lookup         = nft_bitmap_lookup,
-       .walk           = nft_bitmap_walk,
-       .get            = nft_bitmap_get,
-};
-
 static struct nft_set_type nft_bitmap_type __read_mostly = {
-       .ops            = &nft_bitmap_ops,
        .owner          = THIS_MODULE,
+       .ops            = {
+               .privsize       = nft_bitmap_privsize,
+               .elemsize       = offsetof(struct nft_bitmap_elem, ext),
+               .estimate       = nft_bitmap_estimate,
+               .init           = nft_bitmap_init,
+               .destroy        = nft_bitmap_destroy,
+               .insert         = nft_bitmap_insert,
+               .remove         = nft_bitmap_remove,
+               .deactivate     = nft_bitmap_deactivate,
+               .flush          = nft_bitmap_flush,
+               .activate       = nft_bitmap_activate,
+               .lookup         = nft_bitmap_lookup,
+               .walk           = nft_bitmap_walk,
+               .get            = nft_bitmap_get,
+       },
 };
 
 static int __init nft_bitmap_module_init(void)
index fc9c6d5d64cd7561fc98cc3c82ba85ba7cd8dedc..dbf1f4ad077c56444d9f258d9a6c0afa38360f4e 100644 (file)
@@ -605,6 +605,12 @@ static void nft_hash_destroy(const struct nft_set *set)
 static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features,
                              struct nft_set_estimate *est)
 {
+       if (!desc->size)
+               return false;
+
+       if (desc->klen == 4)
+               return false;
+
        est->size   = sizeof(struct nft_hash) +
                      nft_hash_buckets(desc->size) * sizeof(struct hlist_head) +
                      desc->size * sizeof(struct nft_hash_elem);
@@ -614,91 +620,100 @@ static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features,
        return true;
 }
 
-static struct nft_set_type nft_hash_type;
-static struct nft_set_ops nft_rhash_ops __read_mostly = {
-       .type           = &nft_hash_type,
-       .privsize       = nft_rhash_privsize,
-       .elemsize       = offsetof(struct nft_rhash_elem, ext),
-       .estimate       = nft_rhash_estimate,
-       .init           = nft_rhash_init,
-       .destroy        = nft_rhash_destroy,
-       .insert         = nft_rhash_insert,
-       .activate       = nft_rhash_activate,
-       .deactivate     = nft_rhash_deactivate,
-       .flush          = nft_rhash_flush,
-       .remove         = nft_rhash_remove,
-       .lookup         = nft_rhash_lookup,
-       .update         = nft_rhash_update,
-       .walk           = nft_rhash_walk,
-       .get            = nft_rhash_get,
-       .features       = NFT_SET_MAP | NFT_SET_OBJECT | NFT_SET_TIMEOUT,
-};
+static bool nft_hash_fast_estimate(const struct nft_set_desc *desc, u32 features,
+                             struct nft_set_estimate *est)
+{
+       if (!desc->size)
+               return false;
 
-static struct nft_set_ops nft_hash_ops __read_mostly = {
-       .type           = &nft_hash_type,
-       .privsize       = nft_hash_privsize,
-       .elemsize       = offsetof(struct nft_hash_elem, ext),
-       .estimate       = nft_hash_estimate,
-       .init           = nft_hash_init,
-       .destroy        = nft_hash_destroy,
-       .insert         = nft_hash_insert,
-       .activate       = nft_hash_activate,
-       .deactivate     = nft_hash_deactivate,
-       .flush          = nft_hash_flush,
-       .remove         = nft_hash_remove,
-       .lookup         = nft_hash_lookup,
-       .walk           = nft_hash_walk,
-       .get            = nft_hash_get,
-       .features       = NFT_SET_MAP | NFT_SET_OBJECT,
-};
+       if (desc->klen != 4)
+               return false;
 
-static struct nft_set_ops nft_hash_fast_ops __read_mostly = {
-       .type           = &nft_hash_type,
-       .privsize       = nft_hash_privsize,
-       .elemsize       = offsetof(struct nft_hash_elem, ext),
-       .estimate       = nft_hash_estimate,
-       .init           = nft_hash_init,
-       .destroy        = nft_hash_destroy,
-       .insert         = nft_hash_insert,
-       .activate       = nft_hash_activate,
-       .deactivate     = nft_hash_deactivate,
-       .flush          = nft_hash_flush,
-       .remove         = nft_hash_remove,
-       .lookup         = nft_hash_lookup_fast,
-       .walk           = nft_hash_walk,
-       .get            = nft_hash_get,
-       .features       = NFT_SET_MAP | NFT_SET_OBJECT,
-};
-
-static const struct nft_set_ops *
-nft_hash_select_ops(const struct nft_ctx *ctx, const struct nft_set_desc *desc,
-                   u32 flags)
-{
-       if (desc->size && !(flags & (NFT_SET_EVAL | NFT_SET_TIMEOUT))) {
-               switch (desc->klen) {
-               case 4:
-                       return &nft_hash_fast_ops;
-               default:
-                       return &nft_hash_ops;
-               }
-       }
+       est->size   = sizeof(struct nft_hash) +
+                     nft_hash_buckets(desc->size) * sizeof(struct hlist_head) +
+                     desc->size * sizeof(struct nft_hash_elem);
+       est->lookup = NFT_SET_CLASS_O_1;
+       est->space  = NFT_SET_CLASS_O_N;
 
-       return &nft_rhash_ops;
+       return true;
 }
 
+static struct nft_set_type nft_rhash_type __read_mostly = {
+       .owner          = THIS_MODULE,
+       .features       = NFT_SET_MAP | NFT_SET_OBJECT |
+                         NFT_SET_TIMEOUT | NFT_SET_EVAL,
+       .ops            = {
+               .privsize       = nft_rhash_privsize,
+               .elemsize       = offsetof(struct nft_rhash_elem, ext),
+               .estimate       = nft_rhash_estimate,
+               .init           = nft_rhash_init,
+               .destroy        = nft_rhash_destroy,
+               .insert         = nft_rhash_insert,
+               .activate       = nft_rhash_activate,
+               .deactivate     = nft_rhash_deactivate,
+               .flush          = nft_rhash_flush,
+               .remove         = nft_rhash_remove,
+               .lookup         = nft_rhash_lookup,
+               .update         = nft_rhash_update,
+               .walk           = nft_rhash_walk,
+               .get            = nft_rhash_get,
+       },
+};
+
 static struct nft_set_type nft_hash_type __read_mostly = {
-       .select_ops     = nft_hash_select_ops,
        .owner          = THIS_MODULE,
+       .features       = NFT_SET_MAP | NFT_SET_OBJECT,
+       .ops            = {
+               .privsize       = nft_hash_privsize,
+               .elemsize       = offsetof(struct nft_hash_elem, ext),
+               .estimate       = nft_hash_estimate,
+               .init           = nft_hash_init,
+               .destroy        = nft_hash_destroy,
+               .insert         = nft_hash_insert,
+               .activate       = nft_hash_activate,
+               .deactivate     = nft_hash_deactivate,
+               .flush          = nft_hash_flush,
+               .remove         = nft_hash_remove,
+               .lookup         = nft_hash_lookup,
+               .walk           = nft_hash_walk,
+               .get            = nft_hash_get,
+       },
+};
+
+static struct nft_set_type nft_hash_fast_type __read_mostly = {
+       .owner          = THIS_MODULE,
+       .features       = NFT_SET_MAP | NFT_SET_OBJECT,
+       .ops            = {
+               .privsize       = nft_hash_privsize,
+               .elemsize       = offsetof(struct nft_hash_elem, ext),
+               .estimate       = nft_hash_fast_estimate,
+               .init           = nft_hash_init,
+               .destroy        = nft_hash_destroy,
+               .insert         = nft_hash_insert,
+               .activate       = nft_hash_activate,
+               .deactivate     = nft_hash_deactivate,
+               .flush          = nft_hash_flush,
+               .remove         = nft_hash_remove,
+               .lookup         = nft_hash_lookup_fast,
+               .walk           = nft_hash_walk,
+               .get            = nft_hash_get,
+       },
 };
 
 static int __init nft_hash_module_init(void)
 {
-       return nft_register_set(&nft_hash_type);
+       if (nft_register_set(&nft_hash_fast_type) ||
+           nft_register_set(&nft_hash_type) ||
+           nft_register_set(&nft_rhash_type))
+               return 1;
+       return 0;
 }
 
 static void __exit nft_hash_module_exit(void)
 {
+       nft_unregister_set(&nft_rhash_type);
        nft_unregister_set(&nft_hash_type);
+       nft_unregister_set(&nft_hash_fast_type);
 }
 
 module_init(nft_hash_module_init);
index e6f08bc5f359bb17a3391f98e9d56dd596ecc805..22c57d7612c477a447c27b61325dbac04f93b4d9 100644 (file)
@@ -393,28 +393,24 @@ static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features,
        return true;
 }
 
-static struct nft_set_type nft_rbtree_type;
-static struct nft_set_ops nft_rbtree_ops __read_mostly = {
-       .type           = &nft_rbtree_type,
-       .privsize       = nft_rbtree_privsize,
-       .elemsize       = offsetof(struct nft_rbtree_elem, ext),
-       .estimate       = nft_rbtree_estimate,
-       .init           = nft_rbtree_init,
-       .destroy        = nft_rbtree_destroy,
-       .insert         = nft_rbtree_insert,
-       .remove         = nft_rbtree_remove,
-       .deactivate     = nft_rbtree_deactivate,
-       .flush          = nft_rbtree_flush,
-       .activate       = nft_rbtree_activate,
-       .lookup         = nft_rbtree_lookup,
-       .walk           = nft_rbtree_walk,
-       .get            = nft_rbtree_get,
-       .features       = NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT,
-};
-
 static struct nft_set_type nft_rbtree_type __read_mostly = {
-       .ops            = &nft_rbtree_ops,
        .owner          = THIS_MODULE,
+       .features       = NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT,
+       .ops            = {
+               .privsize       = nft_rbtree_privsize,
+               .elemsize       = offsetof(struct nft_rbtree_elem, ext),
+               .estimate       = nft_rbtree_estimate,
+               .init           = nft_rbtree_init,
+               .destroy        = nft_rbtree_destroy,
+               .insert         = nft_rbtree_insert,
+               .remove         = nft_rbtree_remove,
+               .deactivate     = nft_rbtree_deactivate,
+               .flush          = nft_rbtree_flush,
+               .activate       = nft_rbtree_activate,
+               .lookup         = nft_rbtree_lookup,
+               .walk           = nft_rbtree_walk,
+               .get            = nft_rbtree_get,
+       },
 };
 
 static int __init nft_rbtree_module_init(void)
index 71325fef647da706f4dc23eaee28c3a6d1b6799d..cb7cb300c3bc9b85c0c3a6a84de304e03105bb22 100644 (file)
@@ -183,6 +183,9 @@ struct xt_match *xt_find_match(u8 af, const char *name, u8 revision)
        struct xt_match *m;
        int err = -ENOENT;
 
+       if (strnlen(name, XT_EXTENSION_MAXNAMELEN) == XT_EXTENSION_MAXNAMELEN)
+               return ERR_PTR(-EINVAL);
+
        mutex_lock(&xt[af].mutex);
        list_for_each_entry(m, &xt[af].match, list) {
                if (strcmp(m->name, name) == 0) {
@@ -229,6 +232,9 @@ struct xt_target *xt_find_target(u8 af, const char *name, u8 revision)
        struct xt_target *t;
        int err = -ENOENT;
 
+       if (strnlen(name, XT_EXTENSION_MAXNAMELEN) == XT_EXTENSION_MAXNAMELEN)
+               return ERR_PTR(-EINVAL);
+
        mutex_lock(&xt[af].mutex);
        list_for_each_entry(t, &xt[af].target, list) {
                if (strcmp(t->name, name) == 0) {
index 58aa9dd3c5b7559c7001e7ab74e9ef75bca106cf..1d437875e15a11b58d93fad76a65278b108d0e26 100644 (file)
@@ -21,8 +21,8 @@
 static unsigned int
 netmap_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 {
-       const struct nf_nat_range *range = par->targinfo;
-       struct nf_nat_range newrange;
+       const struct nf_nat_range2 *range = par->targinfo;
+       struct nf_nat_range2 newrange;
        struct nf_conn *ct;
        enum ip_conntrack_info ctinfo;
        union nf_inet_addr new_addr, netmask;
@@ -56,7 +56,7 @@ netmap_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 
 static int netmap_tg6_checkentry(const struct xt_tgchk_param *par)
 {
-       const struct nf_nat_range *range = par->targinfo;
+       const struct nf_nat_range2 *range = par->targinfo;
 
        if (!(range->flags & NF_NAT_RANGE_MAP_IPS))
                return -EINVAL;
@@ -75,7 +75,7 @@ netmap_tg4(struct sk_buff *skb, const struct xt_action_param *par)
        enum ip_conntrack_info ctinfo;
        __be32 new_ip, netmask;
        const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
-       struct nf_nat_range newrange;
+       struct nf_nat_range2 newrange;
 
        WARN_ON(xt_hooknum(par) != NF_INET_PRE_ROUTING &&
                xt_hooknum(par) != NF_INET_POST_ROUTING &&
index c7f8958cea4a9e24ec26500f92a9af659dbfbb2a..1ed0cac585c46b52adfe8b546309b71f88059571 100644 (file)
@@ -13,7 +13,6 @@
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter/xt_NFLOG.h>
 #include <net/netfilter/nf_log.h>
-#include <net/netfilter/nfnetlink_log.h>
 
 MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
 MODULE_DESCRIPTION("Xtables: packet logging to netlink using NFLOG");
@@ -37,8 +36,9 @@ nflog_tg(struct sk_buff *skb, const struct xt_action_param *par)
        if (info->flags & XT_NFLOG_F_COPY_LEN)
                li.u.ulog.flags |= NF_LOG_F_COPY_LEN;
 
-       nfulnl_log_packet(net, xt_family(par), xt_hooknum(par), skb,
-                         xt_in(par), xt_out(par), &li, info->prefix);
+       nf_log_packet(net, xt_family(par), xt_hooknum(par), skb, xt_in(par),
+                     xt_out(par), &li, "%s", info->prefix);
+
        return XT_CONTINUE;
 }
 
@@ -50,7 +50,13 @@ static int nflog_tg_check(const struct xt_tgchk_param *par)
                return -EINVAL;
        if (info->prefix[sizeof(info->prefix) - 1] != '\0')
                return -EINVAL;
-       return 0;
+
+       return nf_logger_find_get(par->family, NF_LOG_TYPE_ULOG);
+}
+
+static void nflog_tg_destroy(const struct xt_tgdtor_param *par)
+{
+       nf_logger_put(par->family, NF_LOG_TYPE_ULOG);
 }
 
 static struct xt_target nflog_tg_reg __read_mostly = {
@@ -58,6 +64,7 @@ static struct xt_target nflog_tg_reg __read_mostly = {
        .revision   = 0,
        .family     = NFPROTO_UNSPEC,
        .checkentry = nflog_tg_check,
+       .destroy    = nflog_tg_destroy,
        .target     = nflog_tg,
        .targetsize = sizeof(struct xt_nflog_info),
        .me         = THIS_MODULE,
index 98a4c6d4f1cb9dc37b0f8839a075050c04287388..5ce9461e979c4f30d03bd7f68d96ed433e44d739 100644 (file)
@@ -36,7 +36,7 @@ redirect_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 
 static int redirect_tg6_checkentry(const struct xt_tgchk_param *par)
 {
-       const struct nf_nat_range *range = par->targinfo;
+       const struct nf_nat_range2 *range = par->targinfo;
 
        if (range->flags & NF_NAT_RANGE_MAP_IPS)
                return -EINVAL;
index 773da82190dc32afc82d9b3a2d792f92660a4612..94df000abb92d657addb2cad35028a2d3f08e024 100644 (file)
@@ -36,11 +36,10 @@ MODULE_ALIAS("ipt_connmark");
 MODULE_ALIAS("ip6t_connmark");
 
 static unsigned int
-connmark_tg_shift(struct sk_buff *skb,
-               const struct xt_connmark_tginfo1 *info,
-               u8 shift_bits, u8 shift_dir)
+connmark_tg_shift(struct sk_buff *skb, const struct xt_connmark_tginfo2 *info)
 {
        enum ip_conntrack_info ctinfo;
+       u_int32_t new_targetmark;
        struct nf_conn *ct;
        u_int32_t newmark;
 
@@ -51,34 +50,39 @@ connmark_tg_shift(struct sk_buff *skb,
        switch (info->mode) {
        case XT_CONNMARK_SET:
                newmark = (ct->mark & ~info->ctmask) ^ info->ctmark;
-               if (shift_dir == D_SHIFT_RIGHT)
-                       newmark >>= shift_bits;
+               if (info->shift_dir == D_SHIFT_RIGHT)
+                       newmark >>= info->shift_bits;
                else
-                       newmark <<= shift_bits;
+                       newmark <<= info->shift_bits;
+
                if (ct->mark != newmark) {
                        ct->mark = newmark;
                        nf_conntrack_event_cache(IPCT_MARK, ct);
                }
                break;
        case XT_CONNMARK_SAVE:
-               newmark = (ct->mark & ~info->ctmask) ^
-                         (skb->mark & info->nfmask);
-               if (shift_dir == D_SHIFT_RIGHT)
-                       newmark >>= shift_bits;
+               new_targetmark = (skb->mark & info->nfmask);
+               if (info->shift_dir == D_SHIFT_RIGHT)
+                       new_targetmark >>= info->shift_bits;
                else
-                       newmark <<= shift_bits;
+                       new_targetmark <<= info->shift_bits;
+
+               newmark = (ct->mark & ~info->ctmask) ^
+                         new_targetmark;
                if (ct->mark != newmark) {
                        ct->mark = newmark;
                        nf_conntrack_event_cache(IPCT_MARK, ct);
                }
                break;
        case XT_CONNMARK_RESTORE:
-               newmark = (skb->mark & ~info->nfmask) ^
-                         (ct->mark & info->ctmask);
-               if (shift_dir == D_SHIFT_RIGHT)
-                       newmark >>= shift_bits;
+               new_targetmark = (ct->mark & info->ctmask);
+               if (info->shift_dir == D_SHIFT_RIGHT)
+                       new_targetmark >>= info->shift_bits;
                else
-                       newmark <<= shift_bits;
+                       new_targetmark <<= info->shift_bits;
+
+               newmark = (skb->mark & ~info->nfmask) ^
+                         new_targetmark;
                skb->mark = newmark;
                break;
        }
@@ -89,8 +93,14 @@ static unsigned int
 connmark_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
        const struct xt_connmark_tginfo1 *info = par->targinfo;
-
-       return connmark_tg_shift(skb, info, 0, 0);
+       const struct xt_connmark_tginfo2 info2 = {
+               .ctmark = info->ctmark,
+               .ctmask = info->ctmask,
+               .nfmask = info->nfmask,
+               .mode   = info->mode,
+       };
+
+       return connmark_tg_shift(skb, &info2);
 }
 
 static unsigned int
@@ -98,8 +108,7 @@ connmark_tg_v2(struct sk_buff *skb, const struct xt_action_param *par)
 {
        const struct xt_connmark_tginfo2 *info = par->targinfo;
 
-       return connmark_tg_shift(skb, (const struct xt_connmark_tginfo1 *)info,
-                                info->shift_bits, info->shift_dir);
+       return connmark_tg_shift(skb, info);
 }
 
 static int connmark_tg_check(const struct xt_tgchk_param *par)
index bdb689cdc829df85372dbdaa0acf47c6772e47d8..8af9707f8789e475710733b3434c9db4a7eedae2 100644 (file)
@@ -37,11 +37,12 @@ static void xt_nat_destroy(const struct xt_tgdtor_param *par)
        nf_ct_netns_put(par->net, par->family);
 }
 
-static void xt_nat_convert_range(struct nf_nat_range *dst,
+static void xt_nat_convert_range(struct nf_nat_range2 *dst,
                                 const struct nf_nat_ipv4_range *src)
 {
        memset(&dst->min_addr, 0, sizeof(dst->min_addr));
        memset(&dst->max_addr, 0, sizeof(dst->max_addr));
+       memset(&dst->base_proto, 0, sizeof(dst->base_proto));
 
        dst->flags       = src->flags;
        dst->min_addr.ip = src->min_ip;
@@ -54,7 +55,7 @@ static unsigned int
 xt_snat_target_v0(struct sk_buff *skb, const struct xt_action_param *par)
 {
        const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
-       struct nf_nat_range range;
+       struct nf_nat_range2 range;
        enum ip_conntrack_info ctinfo;
        struct nf_conn *ct;
 
@@ -71,7 +72,7 @@ static unsigned int
 xt_dnat_target_v0(struct sk_buff *skb, const struct xt_action_param *par)
 {
        const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
-       struct nf_nat_range range;
+       struct nf_nat_range2 range;
        enum ip_conntrack_info ctinfo;
        struct nf_conn *ct;
 
@@ -86,7 +87,8 @@ xt_dnat_target_v0(struct sk_buff *skb, const struct xt_action_param *par)
 static unsigned int
 xt_snat_target_v1(struct sk_buff *skb, const struct xt_action_param *par)
 {
-       const struct nf_nat_range *range = par->targinfo;
+       const struct nf_nat_range *range_v1 = par->targinfo;
+       struct nf_nat_range2 range;
        enum ip_conntrack_info ctinfo;
        struct nf_conn *ct;
 
@@ -95,13 +97,49 @@ xt_snat_target_v1(struct sk_buff *skb, const struct xt_action_param *par)
                 (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
                  ctinfo == IP_CT_RELATED_REPLY)));
 
-       return nf_nat_setup_info(ct, range, NF_NAT_MANIP_SRC);
+       memcpy(&range, range_v1, sizeof(*range_v1));
+       memset(&range.base_proto, 0, sizeof(range.base_proto));
+
+       return nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC);
 }
 
 static unsigned int
 xt_dnat_target_v1(struct sk_buff *skb, const struct xt_action_param *par)
 {
-       const struct nf_nat_range *range = par->targinfo;
+       const struct nf_nat_range *range_v1 = par->targinfo;
+       struct nf_nat_range2 range;
+       enum ip_conntrack_info ctinfo;
+       struct nf_conn *ct;
+
+       ct = nf_ct_get(skb, &ctinfo);
+       WARN_ON(!(ct != NULL &&
+                (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)));
+
+       memcpy(&range, range_v1, sizeof(*range_v1));
+       memset(&range.base_proto, 0, sizeof(range.base_proto));
+
+       return nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST);
+}
+
+static unsigned int
+xt_snat_target_v2(struct sk_buff *skb, const struct xt_action_param *par)
+{
+       const struct nf_nat_range2 *range = par->targinfo;
+       enum ip_conntrack_info ctinfo;
+       struct nf_conn *ct;
+
+       ct = nf_ct_get(skb, &ctinfo);
+       WARN_ON(!(ct != NULL &&
+                (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
+                 ctinfo == IP_CT_RELATED_REPLY)));
+
+       return nf_nat_setup_info(ct, range, NF_NAT_MANIP_SRC);
+}
+
+static unsigned int
+xt_dnat_target_v2(struct sk_buff *skb, const struct xt_action_param *par)
+{
+       const struct nf_nat_range2 *range = par->targinfo;
        enum ip_conntrack_info ctinfo;
        struct nf_conn *ct;
 
@@ -163,6 +201,28 @@ static struct xt_target xt_nat_target_reg[] __read_mostly = {
                                  (1 << NF_INET_LOCAL_OUT),
                .me             = THIS_MODULE,
        },
+       {
+               .name           = "SNAT",
+               .revision       = 2,
+               .checkentry     = xt_nat_checkentry,
+               .destroy        = xt_nat_destroy,
+               .target         = xt_snat_target_v2,
+               .targetsize     = sizeof(struct nf_nat_range2),
+               .table          = "nat",
+               .hooks          = (1 << NF_INET_POST_ROUTING) |
+                                 (1 << NF_INET_LOCAL_IN),
+               .me             = THIS_MODULE,
+       },
+       {
+               .name           = "DNAT",
+               .revision       = 2,
+               .target         = xt_dnat_target_v2,
+               .targetsize     = sizeof(struct nf_nat_range2),
+               .table          = "nat",
+               .hooks          = (1 << NF_INET_PRE_ROUTING) |
+                                 (1 << NF_INET_LOCAL_OUT),
+               .me             = THIS_MODULE,
+       },
 };
 
 static int __init xt_nat_init(void)
index a34f314a8c2380e6b6a223dd6d38dcc88ca2c1ac..9cfef73b41075dad1108623206feb05aeb36c0f6 100644 (file)
 #include <net/netfilter/nf_log.h>
 #include <linux/netfilter/xt_osf.h>
 
-struct xt_osf_finger {
-       struct rcu_head                 rcu_head;
-       struct list_head                finger_entry;
-       struct xt_osf_user_finger       finger;
-};
-
-enum osf_fmatch_states {
-       /* Packet does not match the fingerprint */
-       FMATCH_WRONG = 0,
-       /* Packet matches the fingerprint */
-       FMATCH_OK,
-       /* Options do not match the fingerprint, but header does */
-       FMATCH_OPT_WRONG,
-};
-
 /*
  * Indexed by dont-fragment bit.
  * It is the only constant value in the fingerprint.
@@ -164,200 +149,17 @@ static const struct nfnetlink_subsystem xt_osf_nfnetlink = {
        .cb                     = xt_osf_nfnetlink_callbacks,
 };
 
-static inline int xt_osf_ttl(const struct sk_buff *skb, const struct xt_osf_info *info,
-                           unsigned char f_ttl)
-{
-       const struct iphdr *ip = ip_hdr(skb);
-
-       if (info->flags & XT_OSF_TTL) {
-               if (info->ttl == XT_OSF_TTL_TRUE)
-                       return ip->ttl == f_ttl;
-               if (info->ttl == XT_OSF_TTL_NOCHECK)
-                       return 1;
-               else if (ip->ttl <= f_ttl)
-                       return 1;
-               else {
-                       struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
-                       int ret = 0;
-
-                       for_ifa(in_dev) {
-                               if (inet_ifa_match(ip->saddr, ifa)) {
-                                       ret = (ip->ttl == f_ttl);
-                                       break;
-                               }
-                       }
-                       endfor_ifa(in_dev);
-
-                       return ret;
-               }
-       }
-
-       return ip->ttl == f_ttl;
-}
-
 static bool
 xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)
 {
        const struct xt_osf_info *info = p->matchinfo;
-       const struct iphdr *ip = ip_hdr(skb);
-       const struct tcphdr *tcp;
-       struct tcphdr _tcph;
-       int fmatch = FMATCH_WRONG, fcount = 0;
-       unsigned int optsize = 0, check_WSS = 0;
-       u16 window, totlen, mss = 0;
-       bool df;
-       const unsigned char *optp = NULL, *_optp = NULL;
-       unsigned char opts[MAX_IPOPTLEN];
-       const struct xt_osf_finger *kf;
-       const struct xt_osf_user_finger *f;
        struct net *net = xt_net(p);
 
        if (!info)
                return false;
 
-       tcp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(struct tcphdr), &_tcph);
-       if (!tcp)
-               return false;
-
-       if (!tcp->syn)
-               return false;
-
-       totlen = ntohs(ip->tot_len);
-       df = ntohs(ip->frag_off) & IP_DF;
-       window = ntohs(tcp->window);
-
-       if (tcp->doff * 4 > sizeof(struct tcphdr)) {
-               optsize = tcp->doff * 4 - sizeof(struct tcphdr);
-
-               _optp = optp = skb_header_pointer(skb, ip_hdrlen(skb) +
-                               sizeof(struct tcphdr), optsize, opts);
-       }
-
-       list_for_each_entry_rcu(kf, &xt_osf_fingers[df], finger_entry) {
-               int foptsize, optnum;
-
-               f = &kf->finger;
-
-               if (!(info->flags & XT_OSF_LOG) && strcmp(info->genre, f->genre))
-                       continue;
-
-               optp = _optp;
-               fmatch = FMATCH_WRONG;
-
-               if (totlen != f->ss || !xt_osf_ttl(skb, info, f->ttl))
-                       continue;
-
-               /*
-                * Should not happen if userspace parser was written correctly.
-                */
-               if (f->wss.wc >= OSF_WSS_MAX)
-                       continue;
-
-               /* Check options */
-
-               foptsize = 0;
-               for (optnum = 0; optnum < f->opt_num; ++optnum)
-                       foptsize += f->opt[optnum].length;
-
-               if (foptsize > MAX_IPOPTLEN ||
-                   optsize > MAX_IPOPTLEN ||
-                   optsize != foptsize)
-                       continue;
-
-               check_WSS = f->wss.wc;
-
-               for (optnum = 0; optnum < f->opt_num; ++optnum) {
-                       if (f->opt[optnum].kind == (*optp)) {
-                               __u32 len = f->opt[optnum].length;
-                               const __u8 *optend = optp + len;
-
-                               fmatch = FMATCH_OK;
-
-                               switch (*optp) {
-                               case OSFOPT_MSS:
-                                       mss = optp[3];
-                                       mss <<= 8;
-                                       mss |= optp[2];
-
-                                       mss = ntohs((__force __be16)mss);
-                                       break;
-                               case OSFOPT_TS:
-                                       break;
-                               }
-
-                               optp = optend;
-                       } else
-                               fmatch = FMATCH_OPT_WRONG;
-
-                       if (fmatch != FMATCH_OK)
-                               break;
-               }
-
-               if (fmatch != FMATCH_OPT_WRONG) {
-                       fmatch = FMATCH_WRONG;
-
-                       switch (check_WSS) {
-                       case OSF_WSS_PLAIN:
-                               if (f->wss.val == 0 || window == f->wss.val)
-                                       fmatch = FMATCH_OK;
-                               break;
-                       case OSF_WSS_MSS:
-                               /*
-                                * Some smart modems decrease mangle MSS to
-                                * SMART_MSS_2, so we check standard, decreased
-                                * and the one provided in the fingerprint MSS
-                                * values.
-                                */
-#define SMART_MSS_1    1460
-#define SMART_MSS_2    1448
-                               if (window == f->wss.val * mss ||
-                                   window == f->wss.val * SMART_MSS_1 ||
-                                   window == f->wss.val * SMART_MSS_2)
-                                       fmatch = FMATCH_OK;
-                               break;
-                       case OSF_WSS_MTU:
-                               if (window == f->wss.val * (mss + 40) ||
-                                   window == f->wss.val * (SMART_MSS_1 + 40) ||
-                                   window == f->wss.val * (SMART_MSS_2 + 40))
-                                       fmatch = FMATCH_OK;
-                               break;
-                       case OSF_WSS_MODULO:
-                               if ((window % f->wss.val) == 0)
-                                       fmatch = FMATCH_OK;
-                               break;
-                       }
-               }
-
-               if (fmatch != FMATCH_OK)
-                       continue;
-
-               fcount++;
-
-               if (info->flags & XT_OSF_LOG)
-                       nf_log_packet(net, xt_family(p), xt_hooknum(p), skb,
-                                     xt_in(p), xt_out(p), NULL,
-                                     "%s [%s:%s] : %pI4:%d -> %pI4:%d hops=%d\n",
-                                     f->genre, f->version, f->subtype,
-                                     &ip->saddr, ntohs(tcp->source),
-                                     &ip->daddr, ntohs(tcp->dest),
-                                     f->ttl - ip->ttl);
-
-               if ((info->flags & XT_OSF_LOG) &&
-                   info->loglevel == XT_OSF_LOGLEVEL_FIRST)
-                       break;
-       }
-
-       if (!fcount && (info->flags & XT_OSF_LOG))
-               nf_log_packet(net, xt_family(p), xt_hooknum(p), skb, xt_in(p),
-                             xt_out(p), NULL,
-                       "Remote OS is not known: %pI4:%u -> %pI4:%u\n",
-                               &ip->saddr, ntohs(tcp->source),
-                               &ip->daddr, ntohs(tcp->dest));
-
-       if (fcount)
-               fmatch = FMATCH_OK;
-
-       return fmatch == FMATCH_OK;
+       return nf_osf_match(skb, xt_family(p), xt_hooknum(p), xt_in(p),
+                           xt_out(p), info, net, xt_osf_fingers);
 }
 
 static struct xt_match xt_osf_match = {
index 55342c4d5cec6a999065aa1c9607bdf476c59d36..2e2dd88fc79ffd460fdc54d305b6380da2653568 100644 (file)
@@ -2606,13 +2606,13 @@ static int netlink_seq_show(struct seq_file *seq, void *v)
 {
        if (v == SEQ_START_TOKEN) {
                seq_puts(seq,
-                        "sk       Eth Pid    Groups   "
-                        "Rmem     Wmem     Dump     Locks     Drops     Inode\n");
+                        "sk               Eth Pid        Groups   "
+                        "Rmem     Wmem     Dump  Locks    Drops    Inode\n");
        } else {
                struct sock *s = v;
                struct netlink_sock *nlk = nlk_sk(s);
 
-               seq_printf(seq, "%pK %-3d %-6u %08x %-8d %-8d %d %-8d %-8d %-8lu\n",
+               seq_printf(seq, "%pK %-3d %-10u %08x %-8d %-8d %-5d %-8d %-8d %-8lu\n",
                           s,
                           s->sk_protocol,
                           nlk->portid,
index d7da99a0b0b852d7459eed9ac6d3cdf3d49a1a1c..9696ef96b719bf24625adea2a959deac1d2a975f 100644 (file)
@@ -57,6 +57,8 @@ int nsh_pop(struct sk_buff *skb)
                return -ENOMEM;
        nh = (struct nshhdr *)(skb->data);
        length = nsh_hdr_len(nh);
+       if (length < NSH_BASE_HDR_LEN)
+               return -EINVAL;
        inner_proto = tun_p_to_eth_p(nh->np);
        if (!pskb_may_pull(skb, length))
                return -ENOMEM;
@@ -90,6 +92,8 @@ static struct sk_buff *nsh_gso_segment(struct sk_buff *skb,
        if (unlikely(!pskb_may_pull(skb, NSH_BASE_HDR_LEN)))
                goto out;
        nsh_len = nsh_hdr_len(nsh_hdr(skb));
+       if (nsh_len < NSH_BASE_HDR_LEN)
+               goto out;
        if (unlikely(!pskb_may_pull(skb, nsh_len)))
                goto out;
 
index c5904f629091d71420645b9585bf1362b0f39de2..02fc343feb665f1404703a4b2815752014e362fa 100644 (file)
@@ -72,7 +72,7 @@ struct ovs_conntrack_info {
        struct md_mark mark;
        struct md_labels labels;
 #ifdef CONFIG_NF_NAT_NEEDED
-       struct nf_nat_range range;  /* Only present for SRC NAT and DST NAT. */
+       struct nf_nat_range2 range;  /* Only present for SRC NAT and DST NAT. */
 #endif
 };
 
@@ -710,7 +710,7 @@ static bool skb_nfct_cached(struct net *net,
  */
 static int ovs_ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct,
                              enum ip_conntrack_info ctinfo,
-                             const struct nf_nat_range *range,
+                             const struct nf_nat_range2 *range,
                              enum nf_nat_manip_type maniptype)
 {
        int hooknum, nh_off, err = NF_ACCEPT;
index 7322aa1e382e4ba476243ef47801a50df2efb7f1..492ab0c36f7c9e3caf6de7e7d77368028716e09c 100644 (file)
@@ -1712,13 +1712,10 @@ static void nlattr_set(struct nlattr *attr, u8 val,
 
        /* The nlattr stream should already have been validated */
        nla_for_each_nested(nla, attr, rem) {
-               if (tbl[nla_type(nla)].len == OVS_ATTR_NESTED) {
-                       if (tbl[nla_type(nla)].next)
-                               tbl = tbl[nla_type(nla)].next;
-                       nlattr_set(nla, val, tbl);
-               } else {
+               if (tbl[nla_type(nla)].len == OVS_ATTR_NESTED)
+                       nlattr_set(nla, val, tbl[nla_type(nla)].next ? : tbl);
+               else
                        memset(nla_data(nla), val, nla_len(nla));
-               }
 
                if (nla_type(nla) == OVS_KEY_ATTR_CT_STATE)
                        *(u32 *)nla_data(nla) &= CT_SUPPORTED_MASK;
index c31b0687396a6ef45413f06efcc7c3f923e91d01..2cc98c763003f70d36fa4cfbc9d0700ef3b7d75a 100644 (file)
@@ -209,7 +209,7 @@ static void prb_clear_rxhash(struct tpacket_kbdq_core *,
 static void prb_fill_vlan_info(struct tpacket_kbdq_core *,
                struct tpacket3_hdr *);
 static void packet_flush_mclist(struct sock *sk);
-static void packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb);
+static u16 packet_pick_tx_queue(struct sk_buff *skb);
 
 struct packet_skb_cb {
        union {
@@ -243,40 +243,7 @@ static void __fanout_link(struct sock *sk, struct packet_sock *po);
 
 static int packet_direct_xmit(struct sk_buff *skb)
 {
-       struct net_device *dev = skb->dev;
-       struct sk_buff *orig_skb = skb;
-       struct netdev_queue *txq;
-       int ret = NETDEV_TX_BUSY;
-       bool again = false;
-
-       if (unlikely(!netif_running(dev) ||
-                    !netif_carrier_ok(dev)))
-               goto drop;
-
-       skb = validate_xmit_skb_list(skb, dev, &again);
-       if (skb != orig_skb)
-               goto drop;
-
-       packet_pick_tx_queue(dev, skb);
-       txq = skb_get_tx_queue(dev, skb);
-
-       local_bh_disable();
-
-       HARD_TX_LOCK(dev, txq, smp_processor_id());
-       if (!netif_xmit_frozen_or_drv_stopped(txq))
-               ret = netdev_start_xmit(skb, dev, txq, false);
-       HARD_TX_UNLOCK(dev, txq);
-
-       local_bh_enable();
-
-       if (!dev_xmit_complete(ret))
-               kfree_skb(skb);
-
-       return ret;
-drop:
-       atomic_long_inc(&dev->tx_dropped);
-       kfree_skb_list(skb);
-       return NET_XMIT_DROP;
+       return dev_direct_xmit(skb, packet_pick_tx_queue(skb));
 }
 
 static struct net_device *packet_cached_dev_get(struct packet_sock *po)
@@ -313,8 +280,9 @@ static u16 __packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb)
        return (u16) raw_smp_processor_id() % dev->real_num_tx_queues;
 }
 
-static void packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb)
+static u16 packet_pick_tx_queue(struct sk_buff *skb)
 {
+       struct net_device *dev = skb->dev;
        const struct net_device_ops *ops = dev->netdev_ops;
        u16 queue_index;
 
@@ -326,14 +294,14 @@ static void packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb)
                queue_index = __packet_pick_tx_queue(dev, skb);
        }
 
-       skb_set_queue_mapping(skb, queue_index);
+       return queue_index;
 }
 
-/* register_prot_hook must be invoked with the po->bind_lock held,
+/* __register_prot_hook must be invoked through register_prot_hook
  * or from a context in which asynchronous accesses to the packet
  * socket is not possible (packet_create()).
  */
-static void register_prot_hook(struct sock *sk)
+static void __register_prot_hook(struct sock *sk)
 {
        struct packet_sock *po = pkt_sk(sk);
 
@@ -348,8 +316,13 @@ static void register_prot_hook(struct sock *sk)
        }
 }
 
-/* {,__}unregister_prot_hook() must be invoked with the po->bind_lock
- * held.   If the sync parameter is true, we will temporarily drop
+static void register_prot_hook(struct sock *sk)
+{
+       lockdep_assert_held_once(&pkt_sk(sk)->bind_lock);
+       __register_prot_hook(sk);
+}
+
+/* If the sync parameter is true, we will temporarily drop
  * the po->bind_lock and do a synchronize_net to make sure no
  * asynchronous packet processing paths still refer to the elements
  * of po->prot_hook.  If the sync parameter is false, it is the
@@ -359,6 +332,8 @@ static void __unregister_prot_hook(struct sock *sk, bool sync)
 {
        struct packet_sock *po = pkt_sk(sk);
 
+       lockdep_assert_held_once(&po->bind_lock);
+
        po->running = 0;
 
        if (po->fanout)
@@ -2896,13 +2871,15 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
        if (skb == NULL)
                goto out_unlock;
 
-       skb_set_network_header(skb, reserve);
+       skb_reset_network_header(skb);
 
        err = -EINVAL;
        if (sock->type == SOCK_DGRAM) {
                offset = dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len);
                if (unlikely(offset < 0))
                        goto out_free;
+       } else if (reserve) {
+               skb_push(skb, reserve);
        }
 
        /* Returns -EFAULT on error */
@@ -3252,7 +3229,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol,
 
        if (proto) {
                po->prot_hook.type = proto;
-               register_prot_hook(sk);
+               __register_prot_hook(sk);
        }
 
        mutex_lock(&net->packet.sklist_lock);
@@ -3732,12 +3709,18 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
 
                if (optlen != sizeof(val))
                        return -EINVAL;
-               if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
-                       return -EBUSY;
                if (copy_from_user(&val, optval, sizeof(val)))
                        return -EFAULT;
-               po->tp_loss = !!val;
-               return 0;
+
+               lock_sock(sk);
+               if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
+                       ret = -EBUSY;
+               } else {
+                       po->tp_loss = !!val;
+                       ret = 0;
+               }
+               release_sock(sk);
+               return ret;
        }
        case PACKET_AUXDATA:
        {
@@ -3748,7 +3731,9 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
                if (copy_from_user(&val, optval, sizeof(val)))
                        return -EFAULT;
 
+               lock_sock(sk);
                po->auxdata = !!val;
+               release_sock(sk);
                return 0;
        }
        case PACKET_ORIGDEV:
@@ -3760,7 +3745,9 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
                if (copy_from_user(&val, optval, sizeof(val)))
                        return -EFAULT;
 
+               lock_sock(sk);
                po->origdev = !!val;
+               release_sock(sk);
                return 0;
        }
        case PACKET_VNET_HDR:
@@ -3769,15 +3756,20 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
 
                if (sock->type != SOCK_RAW)
                        return -EINVAL;
-               if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
-                       return -EBUSY;
                if (optlen < sizeof(val))
                        return -EINVAL;
                if (copy_from_user(&val, optval, sizeof(val)))
                        return -EFAULT;
 
-               po->has_vnet_hdr = !!val;
-               return 0;
+               lock_sock(sk);
+               if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
+                       ret = -EBUSY;
+               } else {
+                       po->has_vnet_hdr = !!val;
+                       ret = 0;
+               }
+               release_sock(sk);
+               return ret;
        }
        case PACKET_TIMESTAMP:
        {
@@ -3815,11 +3807,17 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
 
                if (optlen != sizeof(val))
                        return -EINVAL;
-               if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
-                       return -EBUSY;
                if (copy_from_user(&val, optval, sizeof(val)))
                        return -EFAULT;
-               po->tp_tx_has_off = !!val;
+
+               lock_sock(sk);
+               if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
+                       ret = -EBUSY;
+               } else {
+                       po->tp_tx_has_off = !!val;
+                       ret = 0;
+               }
+               release_sock(sk);
                return 0;
        }
        case PACKET_QDISC_BYPASS:
index a1d2b2319ae990d55bb3c469cc1ba404cadcbdb9..3bb7c5fb3bff2fd5d91c3d973d006d0cdde29a0b 100644 (file)
@@ -112,10 +112,12 @@ struct packet_sock {
        int                     copy_thresh;
        spinlock_t              bind_lock;
        struct mutex            pg_vec_lock;
-       unsigned int            running:1,      /* prot_hook is attached*/
-                               auxdata:1,
+       unsigned int            running;        /* bind_lock must be held */
+       unsigned int            auxdata:1,      /* writer must hold sock lock */
                                origdev:1,
-                               has_vnet_hdr:1;
+                               has_vnet_hdr:1,
+                               tp_loss:1,
+                               tp_tx_has_off:1;
        int                     pressure;
        int                     ifindex;        /* bound device         */
        __be16                  num;
@@ -125,8 +127,6 @@ struct packet_sock {
        enum tpacket_versions   tp_version;
        unsigned int            tp_hdrlen;
        unsigned int            tp_reserve;
-       unsigned int            tp_loss:1;
-       unsigned int            tp_tx_has_off:1;
        unsigned int            tp_tstamp;
        struct net_device __rcu *cached_dev;
        int                     (*xmit)(struct sk_buff *skb);
index 326fd97444f5bed5c82af7d632d8a4424f9908d3..1944834d225cf63069ccc0badc2b5e342e492d2e 100644 (file)
@@ -21,4 +21,11 @@ config QRTR_SMD
          Say Y here to support SMD based ipcrouter channels.  SMD is the
          most common transport for IPC Router.
 
+config QRTR_TUN
+       tristate "TUN device for Qualcomm IPC Router"
+       ---help---
+         Say Y here to expose a character device that allows user space to
+         implement endpoints of QRTR, for purpose of tunneling data to other
+         hosts or testing purposes.
+
 endif # QRTR
index ab09e40f7c74b9183397a78afc701cd24a4cab3c..be012bfd3e52555b4032ca8566d906735b763c2b 100644 (file)
@@ -2,3 +2,5 @@ obj-$(CONFIG_QRTR) := qrtr.o
 
 obj-$(CONFIG_QRTR_SMD) += qrtr-smd.o
 qrtr-smd-y     := smd.o
+obj-$(CONFIG_QRTR_TUN) += qrtr-tun.o
+qrtr-tun-y     := tun.o
diff --git a/net/qrtr/tun.c b/net/qrtr/tun.c
new file mode 100644 (file)
index 0000000..ccff1e5
--- /dev/null
@@ -0,0 +1,161 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018, Linaro Ltd */
+
+#include <linux/miscdevice.h>
+#include <linux/module.h>
+#include <linux/poll.h>
+#include <linux/skbuff.h>
+#include <linux/uaccess.h>
+
+#include "qrtr.h"
+
+struct qrtr_tun {
+       struct qrtr_endpoint ep;
+
+       struct sk_buff_head queue;
+       wait_queue_head_t readq;
+};
+
+static int qrtr_tun_send(struct qrtr_endpoint *ep, struct sk_buff *skb)
+{
+       struct qrtr_tun *tun = container_of(ep, struct qrtr_tun, ep);
+
+       skb_queue_tail(&tun->queue, skb);
+
+       /* wake up any blocking processes, waiting for new data */
+       wake_up_interruptible(&tun->readq);
+
+       return 0;
+}
+
+static int qrtr_tun_open(struct inode *inode, struct file *filp)
+{
+       struct qrtr_tun *tun;
+
+       tun = kzalloc(sizeof(*tun), GFP_KERNEL);
+       if (!tun)
+               return -ENOMEM;
+
+       skb_queue_head_init(&tun->queue);
+       init_waitqueue_head(&tun->readq);
+
+       tun->ep.xmit = qrtr_tun_send;
+
+       filp->private_data = tun;
+
+       return qrtr_endpoint_register(&tun->ep, QRTR_EP_NID_AUTO);
+}
+
+static ssize_t qrtr_tun_read_iter(struct kiocb *iocb, struct iov_iter *to)
+{
+       struct file *filp = iocb->ki_filp;
+       struct qrtr_tun *tun = filp->private_data;
+       struct sk_buff *skb;
+       int count;
+
+       while (!(skb = skb_dequeue(&tun->queue))) {
+               if (filp->f_flags & O_NONBLOCK)
+                       return -EAGAIN;
+
+               /* Wait until we get data or the endpoint goes away */
+               if (wait_event_interruptible(tun->readq,
+                                            !skb_queue_empty(&tun->queue)))
+                       return -ERESTARTSYS;
+       }
+
+       count = min_t(size_t, iov_iter_count(to), skb->len);
+       if (copy_to_iter(skb->data, count, to) != count)
+               count = -EFAULT;
+
+       kfree_skb(skb);
+
+       return count;
+}
+
+static ssize_t qrtr_tun_write_iter(struct kiocb *iocb, struct iov_iter *from)
+{
+       struct file *filp = iocb->ki_filp;
+       struct qrtr_tun *tun = filp->private_data;
+       size_t len = iov_iter_count(from);
+       ssize_t ret;
+       void *kbuf;
+
+       kbuf = kzalloc(len, GFP_KERNEL);
+       if (!kbuf)
+               return -ENOMEM;
+
+       if (!copy_from_iter_full(kbuf, len, from))
+               return -EFAULT;
+
+       ret = qrtr_endpoint_post(&tun->ep, kbuf, len);
+
+       return ret < 0 ? ret : len;
+}
+
+static __poll_t qrtr_tun_poll(struct file *filp, poll_table *wait)
+{
+       struct qrtr_tun *tun = filp->private_data;
+       __poll_t mask = 0;
+
+       poll_wait(filp, &tun->readq, wait);
+
+       if (!skb_queue_empty(&tun->queue))
+               mask |= EPOLLIN | EPOLLRDNORM;
+
+       return mask;
+}
+
+static int qrtr_tun_release(struct inode *inode, struct file *filp)
+{
+       struct qrtr_tun *tun = filp->private_data;
+       struct sk_buff *skb;
+
+       qrtr_endpoint_unregister(&tun->ep);
+
+       /* Discard all SKBs */
+       while (!skb_queue_empty(&tun->queue)) {
+               skb = skb_dequeue(&tun->queue);
+               kfree_skb(skb);
+       }
+
+       kfree(tun);
+
+       return 0;
+}
+
+static const struct file_operations qrtr_tun_ops = {
+       .owner = THIS_MODULE,
+       .open = qrtr_tun_open,
+       .poll = qrtr_tun_poll,
+       .read_iter = qrtr_tun_read_iter,
+       .write_iter = qrtr_tun_write_iter,
+       .release = qrtr_tun_release,
+};
+
+static struct miscdevice qrtr_tun_miscdev = {
+       MISC_DYNAMIC_MINOR,
+       "qrtr-tun",
+       &qrtr_tun_ops,
+};
+
+static int __init qrtr_tun_init(void)
+{
+       int ret;
+
+       ret = misc_register(&qrtr_tun_miscdev);
+       if (ret)
+               pr_err("failed to register Qualcomm IPC Router tun device\n");
+
+       return ret;
+}
+
+static void __exit qrtr_tun_exit(void)
+{
+       misc_deregister(&qrtr_tun_miscdev);
+}
+
+module_init(qrtr_tun_init);
+module_exit(qrtr_tun_exit);
+
+MODULE_DESCRIPTION("Qualcomm IPC Router TUN device");
+MODULE_LICENSE("GPL v2");
index eea1d8611b205d771c04cdb12c7c35dc2db403ff..13b38ad0fa4a4c9e70f4f593a5a1c2a058a0d446 100644 (file)
@@ -547,7 +547,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
        rdsdebug("conn %p pd %p cq %p %p\n", conn, ic->i_pd,
                 ic->i_send_cq, ic->i_recv_cq);
 
-       return ret;
+       goto out;
 
 sends_out:
        vfree(ic->i_sends);
@@ -572,6 +572,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
                ic->i_send_cq = NULL;
 rds_ibdev_out:
        rds_ib_remove_conn(rds_ibdev, conn);
+out:
        rds_ib_dev_put(rds_ibdev);
 
        return ret;
index de50e2126e404aed541b8d268a28da08154bf08d..dc67458b52f0043c2328d4a77a43536e7c62b0ed 100644 (file)
@@ -558,6 +558,7 @@ static int rds_cmsg_recv(struct rds_incoming *inc, struct msghdr *msg,
                struct rds_cmsg_rx_trace t;
                int i, j;
 
+               memset(&t, 0, sizeof(t));
                inc->i_rx_lat_trace[RDS_MSG_RX_CMSG] = local_clock();
                t.rx_traces =  rs->rs_rx_traces;
                for (i = 0; i < rs->rs_rx_traces; i++) {
index 41bd496531d45e8d43428584605bfb55ef249169..00192a996be0eab95be229f801ada687cca0f71a 100644 (file)
@@ -137,13 +137,18 @@ static int rfkill_gpio_probe(struct platform_device *pdev)
 
        ret = rfkill_register(rfkill->rfkill_dev);
        if (ret < 0)
-               return ret;
+               goto err_destroy;
 
        platform_set_drvdata(pdev, rfkill);
 
        dev_info(&pdev->dev, "%s device registered.\n", rfkill->name);
 
        return 0;
+
+err_destroy:
+       rfkill_destroy(rfkill->rfkill_dev);
+
+       return ret;
 }
 
 static int rfkill_gpio_remove(struct platform_device *pdev)
index 9a2c8e7c000e6ad21858b082fa2e8aa5a295114e..2b463047dd7ba93267feb584e1ffda280449a0b3 100644 (file)
@@ -313,7 +313,7 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
        memset(&cp, 0, sizeof(cp));
        cp.local                = rx->local;
        cp.key                  = key;
-       cp.security_level       = 0;
+       cp.security_level       = rx->min_sec_level;
        cp.exclusive            = false;
        cp.upgrade              = upgrade;
        cp.service_id           = srx->srx_service;
index 90d7079e0aa99327ced07c61d4c8e315fbe26f8d..19975d2ca9a20367d900f14ab8b776953e3d5ba1 100644 (file)
@@ -476,6 +476,7 @@ enum rxrpc_call_flag {
        RXRPC_CALL_SEND_PING,           /* A ping will need to be sent */
        RXRPC_CALL_PINGING,             /* Ping in process */
        RXRPC_CALL_RETRANS_TIMEOUT,     /* Retransmission due to timeout occurred */
+       RXRPC_CALL_BEGAN_RX_TIMER,      /* We began the expect_rx_by timer */
 };
 
 /*
index c717152070dff2906d15aa53bd1c488bd42cf2c8..1350f1be8037b7655a9ba9ebc46ff898e043ff5c 100644 (file)
@@ -40,7 +40,7 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
        } __attribute__((packed)) pkt;
        struct rxrpc_ackinfo ack_info;
        size_t len;
-       int ioc;
+       int ret, ioc;
        u32 serial, mtu, call_id, padding;
 
        _enter("%d", conn->debug_id);
@@ -135,10 +135,13 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
                break;
        }
 
-       kernel_sendmsg(conn->params.local->socket, &msg, iov, ioc, len);
+       ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, ioc, len);
        conn->params.peer->last_tx_at = ktime_get_real();
+       if (ret < 0)
+               trace_rxrpc_tx_fail(conn->debug_id, serial, ret,
+                                   rxrpc_tx_fail_call_final_resend);
+
        _leave("");
-       return;
 }
 
 /*
@@ -236,6 +239,8 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn,
 
        ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len);
        if (ret < 0) {
+               trace_rxrpc_tx_fail(conn->debug_id, serial, ret,
+                                   rxrpc_tx_fail_conn_abort);
                _debug("sendmsg failed: %d", ret);
                return -EAGAIN;
        }
index 0410d2277ca28bcf0259df897581b947f707fac1..b5fd6381313d06b1b02ad6e6f6ca5c0dcc74b10d 100644 (file)
@@ -971,7 +971,7 @@ static void rxrpc_input_call_packet(struct rxrpc_call *call,
        if (timo) {
                unsigned long now = jiffies, expect_rx_by;
 
-               expect_rx_by = jiffies + timo;
+               expect_rx_by = now + timo;
                WRITE_ONCE(call->expect_rx_by, expect_rx_by);
                rxrpc_reduce_call_timer(call, expect_rx_by, now,
                                        rxrpc_timer_set_for_normal);
index 93b5d910b4a130a8f2641f1a20f0d2e83b60ed56..8325f1b868404690d54a6e98d70b6896c3537f72 100644 (file)
@@ -71,7 +71,8 @@ static void rxrpc_send_version_request(struct rxrpc_local *local,
 
        ret = kernel_sendmsg(local->socket, &msg, iov, 2, len);
        if (ret < 0)
-               _debug("sendmsg failed: %d", ret);
+               trace_rxrpc_tx_fail(local->debug_id, 0, ret,
+                                   rxrpc_tx_fail_version_reply);
 
        _leave("");
 }
index 8b54e9531d52b63c0afd0989b1fbb641d147e6f3..b493e6b6274043e07b15c5a0481f0e92a4478ea4 100644 (file)
@@ -134,22 +134,49 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net)
                }
        }
 
-       /* we want to receive ICMP errors */
-       opt = 1;
-       ret = kernel_setsockopt(local->socket, SOL_IP, IP_RECVERR,
-                               (char *) &opt, sizeof(opt));
-       if (ret < 0) {
-               _debug("setsockopt failed");
-               goto error;
-       }
+       switch (local->srx.transport.family) {
+       case AF_INET:
+               /* we want to receive ICMP errors */
+               opt = 1;
+               ret = kernel_setsockopt(local->socket, SOL_IP, IP_RECVERR,
+                                       (char *) &opt, sizeof(opt));
+               if (ret < 0) {
+                       _debug("setsockopt failed");
+                       goto error;
+               }
 
-       /* we want to set the don't fragment bit */
-       opt = IP_PMTUDISC_DO;
-       ret = kernel_setsockopt(local->socket, SOL_IP, IP_MTU_DISCOVER,
-                               (char *) &opt, sizeof(opt));
-       if (ret < 0) {
-               _debug("setsockopt failed");
-               goto error;
+               /* we want to set the don't fragment bit */
+               opt = IP_PMTUDISC_DO;
+               ret = kernel_setsockopt(local->socket, SOL_IP, IP_MTU_DISCOVER,
+                                       (char *) &opt, sizeof(opt));
+               if (ret < 0) {
+                       _debug("setsockopt failed");
+                       goto error;
+               }
+               break;
+
+       case AF_INET6:
+               /* we want to receive ICMP errors */
+               opt = 1;
+               ret = kernel_setsockopt(local->socket, SOL_IPV6, IPV6_RECVERR,
+                                       (char *) &opt, sizeof(opt));
+               if (ret < 0) {
+                       _debug("setsockopt failed");
+                       goto error;
+               }
+
+               /* we want to set the don't fragment bit */
+               opt = IPV6_PMTUDISC_DO;
+               ret = kernel_setsockopt(local->socket, SOL_IPV6, IPV6_MTU_DISCOVER,
+                                       (char *) &opt, sizeof(opt));
+               if (ret < 0) {
+                       _debug("setsockopt failed");
+                       goto error;
+               }
+               break;
+
+       default:
+               BUG();
        }
 
        /* set the socket up */
index 7f1fc04775b34e5abd8e24a450ae386e74bf194a..f03de1c59ba37678f36f3a5c0778f3f3f9274757 100644 (file)
@@ -210,6 +210,9 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping,
        if (ping)
                call->ping_time = now;
        conn->params.peer->last_tx_at = ktime_get_real();
+       if (ret < 0)
+               trace_rxrpc_tx_fail(call->debug_id, serial, ret,
+                                   rxrpc_tx_fail_call_ack);
 
        if (call->state < RXRPC_CALL_COMPLETE) {
                if (ret < 0) {
@@ -294,6 +297,10 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call)
        ret = kernel_sendmsg(conn->params.local->socket,
                             &msg, iov, 1, sizeof(pkt));
        conn->params.peer->last_tx_at = ktime_get_real();
+       if (ret < 0)
+               trace_rxrpc_tx_fail(call->debug_id, serial, ret,
+                                   rxrpc_tx_fail_call_abort);
+
 
        rxrpc_put_connection(conn);
        return ret;
@@ -387,6 +394,9 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb,
        conn->params.peer->last_tx_at = ktime_get_real();
 
        up_read(&conn->params.local->defrag_sem);
+       if (ret < 0)
+               trace_rxrpc_tx_fail(call->debug_id, serial, ret,
+                                   rxrpc_tx_fail_call_data_nofrag);
        if (ret == -EMSGSIZE)
                goto send_fragmentable;
 
@@ -414,6 +424,17 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb,
                                                        rxrpc_timer_set_for_lost_ack);
                        }
                }
+
+               if (sp->hdr.seq == 1 &&
+                   !test_and_set_bit(RXRPC_CALL_BEGAN_RX_TIMER,
+                                     &call->flags)) {
+                       unsigned long nowj = jiffies, expect_rx_by;
+
+                       expect_rx_by = nowj + call->next_rx_timo;
+                       WRITE_ONCE(call->expect_rx_by, expect_rx_by);
+                       rxrpc_reduce_call_timer(call, expect_rx_by, nowj,
+                                               rxrpc_timer_set_for_normal);
+               }
        }
 
        rxrpc_set_keepalive(call);
@@ -465,6 +486,10 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb,
 #endif
        }
 
+       if (ret < 0)
+               trace_rxrpc_tx_fail(call->debug_id, serial, ret,
+                                   rxrpc_tx_fail_call_data_frag);
+
        up_write(&conn->params.local->defrag_sem);
        goto done;
 }
@@ -482,6 +507,7 @@ void rxrpc_reject_packets(struct rxrpc_local *local)
        struct kvec iov[2];
        size_t size;
        __be32 code;
+       int ret;
 
        _enter("%d", local->debug_id);
 
@@ -516,7 +542,10 @@ void rxrpc_reject_packets(struct rxrpc_local *local)
                        whdr.flags      ^= RXRPC_CLIENT_INITIATED;
                        whdr.flags      &= RXRPC_CLIENT_INITIATED;
 
-                       kernel_sendmsg(local->socket, &msg, iov, 2, size);
+                       ret = kernel_sendmsg(local->socket, &msg, iov, 2, size);
+                       if (ret < 0)
+                               trace_rxrpc_tx_fail(local->debug_id, 0, ret,
+                                                   rxrpc_tx_fail_reject);
                }
 
                rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
@@ -567,7 +596,8 @@ void rxrpc_send_keepalive(struct rxrpc_peer *peer)
 
        ret = kernel_sendmsg(peer->local->socket, &msg, iov, 2, len);
        if (ret < 0)
-               _debug("sendmsg failed: %d", ret);
+               trace_rxrpc_tx_fail(peer->debug_id, 0, ret,
+                                   rxrpc_tx_fail_version_keepalive);
 
        peer->last_tx_at = ktime_get_real();
        _leave("");
index 78c2f95d1f221c808d541f40121f75a3e5126091..0ed8b651cec293e121e40cf05282bddc8c3f1171 100644 (file)
@@ -28,39 +28,39 @@ static void rxrpc_store_error(struct rxrpc_peer *, struct sock_exterr_skb *);
  * Find the peer associated with an ICMP packet.
  */
 static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local,
-                                                    const struct sk_buff *skb)
+                                                    const struct sk_buff *skb,
+                                                    struct sockaddr_rxrpc *srx)
 {
        struct sock_exterr_skb *serr = SKB_EXT_ERR(skb);
-       struct sockaddr_rxrpc srx;
 
        _enter("");
 
-       memset(&srx, 0, sizeof(srx));
-       srx.transport_type = local->srx.transport_type;
-       srx.transport_len = local->srx.transport_len;
-       srx.transport.family = local->srx.transport.family;
+       memset(srx, 0, sizeof(*srx));
+       srx->transport_type = local->srx.transport_type;
+       srx->transport_len = local->srx.transport_len;
+       srx->transport.family = local->srx.transport.family;
 
        /* Can we see an ICMP4 packet on an ICMP6 listening socket?  and vice
         * versa?
         */
-       switch (srx.transport.family) {
+       switch (srx->transport.family) {
        case AF_INET:
-               srx.transport.sin.sin_port = serr->port;
+               srx->transport.sin.sin_port = serr->port;
                switch (serr->ee.ee_origin) {
                case SO_EE_ORIGIN_ICMP:
                        _net("Rx ICMP");
-                       memcpy(&srx.transport.sin.sin_addr,
+                       memcpy(&srx->transport.sin.sin_addr,
                               skb_network_header(skb) + serr->addr_offset,
                               sizeof(struct in_addr));
                        break;
                case SO_EE_ORIGIN_ICMP6:
                        _net("Rx ICMP6 on v4 sock");
-                       memcpy(&srx.transport.sin.sin_addr,
+                       memcpy(&srx->transport.sin.sin_addr,
                               skb_network_header(skb) + serr->addr_offset + 12,
                               sizeof(struct in_addr));
                        break;
                default:
-                       memcpy(&srx.transport.sin.sin_addr, &ip_hdr(skb)->saddr,
+                       memcpy(&srx->transport.sin.sin_addr, &ip_hdr(skb)->saddr,
                               sizeof(struct in_addr));
                        break;
                }
@@ -68,25 +68,25 @@ static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local,
 
 #ifdef CONFIG_AF_RXRPC_IPV6
        case AF_INET6:
-               srx.transport.sin6.sin6_port = serr->port;
+               srx->transport.sin6.sin6_port = serr->port;
                switch (serr->ee.ee_origin) {
                case SO_EE_ORIGIN_ICMP6:
                        _net("Rx ICMP6");
-                       memcpy(&srx.transport.sin6.sin6_addr,
+                       memcpy(&srx->transport.sin6.sin6_addr,
                               skb_network_header(skb) + serr->addr_offset,
                               sizeof(struct in6_addr));
                        break;
                case SO_EE_ORIGIN_ICMP:
                        _net("Rx ICMP on v6 sock");
-                       srx.transport.sin6.sin6_addr.s6_addr32[0] = 0;
-                       srx.transport.sin6.sin6_addr.s6_addr32[1] = 0;
-                       srx.transport.sin6.sin6_addr.s6_addr32[2] = htonl(0xffff);
-                       memcpy(srx.transport.sin6.sin6_addr.s6_addr + 12,
+                       srx->transport.sin6.sin6_addr.s6_addr32[0] = 0;
+                       srx->transport.sin6.sin6_addr.s6_addr32[1] = 0;
+                       srx->transport.sin6.sin6_addr.s6_addr32[2] = htonl(0xffff);
+                       memcpy(srx->transport.sin6.sin6_addr.s6_addr + 12,
                               skb_network_header(skb) + serr->addr_offset,
                               sizeof(struct in_addr));
                        break;
                default:
-                       memcpy(&srx.transport.sin6.sin6_addr,
+                       memcpy(&srx->transport.sin6.sin6_addr,
                               &ipv6_hdr(skb)->saddr,
                               sizeof(struct in6_addr));
                        break;
@@ -98,7 +98,7 @@ static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local,
                BUG();
        }
 
-       return rxrpc_lookup_peer_rcu(local, &srx);
+       return rxrpc_lookup_peer_rcu(local, srx);
 }
 
 /*
@@ -146,6 +146,7 @@ static void rxrpc_adjust_mtu(struct rxrpc_peer *peer, struct sock_exterr_skb *se
 void rxrpc_error_report(struct sock *sk)
 {
        struct sock_exterr_skb *serr;
+       struct sockaddr_rxrpc srx;
        struct rxrpc_local *local = sk->sk_user_data;
        struct rxrpc_peer *peer;
        struct sk_buff *skb;
@@ -166,7 +167,7 @@ void rxrpc_error_report(struct sock *sk)
        }
 
        rcu_read_lock();
-       peer = rxrpc_lookup_peer_icmp_rcu(local, skb);
+       peer = rxrpc_lookup_peer_icmp_rcu(local, skb, &srx);
        if (peer && !rxrpc_get_peer_maybe(peer))
                peer = NULL;
        if (!peer) {
@@ -176,6 +177,8 @@ void rxrpc_error_report(struct sock *sk)
                return;
        }
 
+       trace_rxrpc_rx_icmp(peer, &serr->ee, &srx);
+
        if ((serr->ee.ee_origin == SO_EE_ORIGIN_ICMP &&
             serr->ee.ee_type == ICMP_DEST_UNREACH &&
             serr->ee.ee_code == ICMP_FRAG_NEEDED)) {
@@ -209,9 +212,6 @@ static void rxrpc_store_error(struct rxrpc_peer *peer,
 
        ee = &serr->ee;
 
-       _net("Rx Error o=%d t=%d c=%d e=%d",
-            ee->ee_origin, ee->ee_type, ee->ee_code, ee->ee_errno);
-
        err = ee->ee_errno;
 
        switch (ee->ee_origin) {
index 588fea0dd3627e5ea6515963d9eabebca603831e..6c0ae27fff84e2312bdc6a7f84abc7ed288e05de 100644 (file)
@@ -664,7 +664,8 @@ static int rxkad_issue_challenge(struct rxrpc_connection *conn)
 
        ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len);
        if (ret < 0) {
-               _debug("sendmsg failed: %d", ret);
+               trace_rxrpc_tx_fail(conn->debug_id, serial, ret,
+                                   rxrpc_tx_fail_conn_challenge);
                return -EAGAIN;
        }
 
@@ -719,7 +720,8 @@ static int rxkad_send_response(struct rxrpc_connection *conn,
 
        ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 3, len);
        if (ret < 0) {
-               _debug("sendmsg failed: %d", ret);
+               trace_rxrpc_tx_fail(conn->debug_id, serial, ret,
+                                   rxrpc_tx_fail_conn_response);
                return -EAGAIN;
        }
 
index 206e802ccbdc1a588ad26e13ff5f7e66afa6c4bb..be01f9c5d963ddfc766fac811ace9a381b89a7f7 100644 (file)
@@ -223,6 +223,15 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call,
 
        ret = rxrpc_send_data_packet(call, skb, false);
        if (ret < 0) {
+               switch (ret) {
+               case -ENETUNREACH:
+               case -EHOSTUNREACH:
+               case -ECONNREFUSED:
+                       rxrpc_set_call_completion(call,
+                                                 RXRPC_CALL_LOCAL_ERROR,
+                                                 0, ret);
+                       goto out;
+               }
                _debug("need instant resend %d", ret);
                rxrpc_instant_resend(call, ix);
        } else {
@@ -241,6 +250,7 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call,
                                        rxrpc_timer_set_for_send);
        }
 
+out:
        rxrpc_free_skb(skb, rxrpc_skb_tx_freed);
        _leave("");
 }
index 7e28b2ce143763b106fda9a12b1f11bc8df53137..526a8e491626efb65fcda10d875e6f55ca2168e8 100644 (file)
@@ -648,6 +648,11 @@ static int tcf_csum_search(struct net *net, struct tc_action **a, u32 index,
        return tcf_idr_search(tn, a, index);
 }
 
+static size_t tcf_csum_get_fill_size(const struct tc_action *act)
+{
+       return nla_total_size(sizeof(struct tc_csum));
+}
+
 static struct tc_action_ops act_csum_ops = {
        .kind           = "csum",
        .type           = TCA_ACT_CSUM,
@@ -658,6 +663,7 @@ static struct tc_action_ops act_csum_ops = {
        .cleanup        = tcf_csum_cleanup,
        .walk           = tcf_csum_walker,
        .lookup         = tcf_csum_search,
+       .get_fill_size  = tcf_csum_get_fill_size,
        .size           = sizeof(struct tcf_csum),
 };
 
index a5994cf0512bd731f55a1dfa798d85ff658e18ef..8527cfdc446d9bb82e8fa9fe1364dc13249b1e03 100644 (file)
@@ -652,7 +652,7 @@ static int find_decode_metaid(struct sk_buff *skb, struct tcf_ife_info *ife,
                }
        }
 
-       return 0;
+       return -ENOENT;
 }
 
 static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a,
@@ -682,7 +682,12 @@ static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a,
                u16 mtype;
                u16 dlen;
 
-               curr_data = ife_tlv_meta_decode(tlv_data, &mtype, &dlen, NULL);
+               curr_data = ife_tlv_meta_decode(tlv_data, ifehdr_end, &mtype,
+                                               &dlen, NULL);
+               if (!curr_data) {
+                       qstats_drop_inc(this_cpu_ptr(ife->common.cpu_qstats));
+                       return TC_ACT_SHOT;
+               }
 
                if (find_decode_metaid(skb, ife, mtype, dlen, curr_data)) {
                        /* abuse overlimits to count when we receive metadata
index ddf69fc01bdf8913260b63adb6de0bbbdcf90b25..6138d1d71900b561f50578bf22110902bb488bf4 100644 (file)
@@ -121,7 +121,8 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
                return 0;
 
        if (!flags) {
-               tcf_idr_release(*a, bind);
+               if (exists)
+                       tcf_idr_release(*a, bind);
                return -EINVAL;
        }
 
index bbcbdce732cc010f1a0cee47f1d2f42066dcb200..ad050d7d4b46a2d45f85e15bb7e68d28915f1d54 100644 (file)
@@ -131,8 +131,11 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
        if (exists && bind)
                return 0;
 
-       if (!lflags)
+       if (!lflags) {
+               if (exists)
+                       tcf_idr_release(*a, bind);
                return -EINVAL;
+       }
 
        if (!exists) {
                ret = tcf_idr_create(tn, parm->index, est, a,
index 853604685965128dcd4af54ad05f784b4237e205..1fb39e1f9d077beb4fdb440459f18116b561f334 100644 (file)
@@ -161,6 +161,8 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
                        case htons(ETH_P_8021AD):
                                break;
                        default:
+                               if (exists)
+                                       tcf_idr_release(*a, bind);
                                return -EPROTONOSUPPORT;
                        }
                } else {
index b66754f52a9f1cfd18fbddb807a67f015b7c6f0c..963e4bf0aab8ae23d999305208c757fbc0f49ebe 100644 (file)
@@ -152,8 +152,8 @@ static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol,
                        NL_SET_ERR_MSG(extack, "TC classifier not found");
                        err = -ENOENT;
                }
-               goto errout;
 #endif
+               goto errout;
        }
        tp->classify = tp->ops->classify;
        tp->protocol = protocol;
index d964e60c730eafb79a8a9437325e14c8d6288e40..eacaaf803914cdc77372d9e661d8d1bb63096be3 100644 (file)
@@ -61,16 +61,18 @@ struct fl_flow_mask_range {
 struct fl_flow_mask {
        struct fl_flow_key key;
        struct fl_flow_mask_range range;
-       struct rcu_head rcu;
+       struct rhash_head ht_node;
+       struct rhashtable ht;
+       struct rhashtable_params filter_ht_params;
+       struct flow_dissector dissector;
+       struct list_head filters;
+       struct rcu_head rcu;
+       struct list_head list;
 };
 
 struct cls_fl_head {
        struct rhashtable ht;
-       struct fl_flow_mask mask;
-       struct flow_dissector dissector;
-       bool mask_assigned;
-       struct list_head filters;
-       struct rhashtable_params ht_params;
+       struct list_head masks;
        union {
                struct work_struct work;
                struct rcu_head rcu;
@@ -79,6 +81,7 @@ struct cls_fl_head {
 };
 
 struct cls_fl_filter {
+       struct fl_flow_mask *mask;
        struct rhash_head ht_node;
        struct fl_flow_key mkey;
        struct tcf_exts exts;
@@ -94,6 +97,13 @@ struct cls_fl_filter {
        struct net_device *hw_dev;
 };
 
+static const struct rhashtable_params mask_ht_params = {
+       .key_offset = offsetof(struct fl_flow_mask, key),
+       .key_len = sizeof(struct fl_flow_key),
+       .head_offset = offsetof(struct fl_flow_mask, ht_node),
+       .automatic_shrinking = true,
+};
+
 static unsigned short int fl_mask_range(const struct fl_flow_mask *mask)
 {
        return mask->range.end - mask->range.start;
@@ -103,13 +113,19 @@ static void fl_mask_update_range(struct fl_flow_mask *mask)
 {
        const u8 *bytes = (const u8 *) &mask->key;
        size_t size = sizeof(mask->key);
-       size_t i, first = 0, last = size - 1;
+       size_t i, first = 0, last;
 
-       for (i = 0; i < sizeof(mask->key); i++) {
+       for (i = 0; i < size; i++) {
+               if (bytes[i]) {
+                       first = i;
+                       break;
+               }
+       }
+       last = first;
+       for (i = size - 1; i != first; i--) {
                if (bytes[i]) {
-                       if (!first && i)
-                               first = i;
                        last = i;
+                       break;
                }
        }
        mask->range.start = rounddown(first, sizeof(long));
@@ -140,12 +156,11 @@ static void fl_clear_masked_range(struct fl_flow_key *key,
        memset(fl_key_get_start(key, mask), 0, fl_mask_range(mask));
 }
 
-static struct cls_fl_filter *fl_lookup(struct cls_fl_head *head,
+static struct cls_fl_filter *fl_lookup(struct fl_flow_mask *mask,
                                       struct fl_flow_key *mkey)
 {
-       return rhashtable_lookup_fast(&head->ht,
-                                     fl_key_get_start(mkey, &head->mask),
-                                     head->ht_params);
+       return rhashtable_lookup_fast(&mask->ht, fl_key_get_start(mkey, mask),
+                                     mask->filter_ht_params);
 }
 
 static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
@@ -153,28 +168,28 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 {
        struct cls_fl_head *head = rcu_dereference_bh(tp->root);
        struct cls_fl_filter *f;
+       struct fl_flow_mask *mask;
        struct fl_flow_key skb_key;
        struct fl_flow_key skb_mkey;
 
-       if (!atomic_read(&head->ht.nelems))
-               return -1;
-
-       fl_clear_masked_range(&skb_key, &head->mask);
+       list_for_each_entry_rcu(mask, &head->masks, list) {
+               fl_clear_masked_range(&skb_key, mask);
 
-       skb_key.indev_ifindex = skb->skb_iif;
-       /* skb_flow_dissect() does not set n_proto in case an unknown protocol,
-        * so do it rather here.
-        */
-       skb_key.basic.n_proto = skb->protocol;
-       skb_flow_dissect_tunnel_info(skb, &head->dissector, &skb_key);
-       skb_flow_dissect(skb, &head->dissector, &skb_key, 0);
+               skb_key.indev_ifindex = skb->skb_iif;
+               /* skb_flow_dissect() does not set n_proto in case an unknown
+                * protocol, so do it rather here.
+                */
+               skb_key.basic.n_proto = skb->protocol;
+               skb_flow_dissect_tunnel_info(skb, &mask->dissector, &skb_key);
+               skb_flow_dissect(skb, &mask->dissector, &skb_key, 0);
 
-       fl_set_masked_key(&skb_mkey, &skb_key, &head->mask);
+               fl_set_masked_key(&skb_mkey, &skb_key, mask);
 
-       f = fl_lookup(head, &skb_mkey);
-       if (f && !tc_skip_sw(f->flags)) {
-               *res = f->res;
-               return tcf_exts_exec(skb, &f->exts, res);
+               f = fl_lookup(mask, &skb_mkey);
+               if (f && !tc_skip_sw(f->flags)) {
+                       *res = f->res;
+                       return tcf_exts_exec(skb, &f->exts, res);
+               }
        }
        return -1;
 }
@@ -187,11 +202,28 @@ static int fl_init(struct tcf_proto *tp)
        if (!head)
                return -ENOBUFS;
 
-       INIT_LIST_HEAD_RCU(&head->filters);
+       INIT_LIST_HEAD_RCU(&head->masks);
        rcu_assign_pointer(tp->root, head);
        idr_init(&head->handle_idr);
 
-       return 0;
+       return rhashtable_init(&head->ht, &mask_ht_params);
+}
+
+static bool fl_mask_put(struct cls_fl_head *head, struct fl_flow_mask *mask,
+                       bool async)
+{
+       if (!list_empty(&mask->filters))
+               return false;
+
+       rhashtable_remove_fast(&head->ht, &mask->ht_node, mask_ht_params);
+       rhashtable_destroy(&mask->ht);
+       list_del_rcu(&mask->list);
+       if (async)
+               kfree_rcu(mask, rcu);
+       else
+               kfree(mask);
+
+       return true;
 }
 
 static void __fl_destroy_filter(struct cls_fl_filter *f)
@@ -234,8 +266,6 @@ static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f,
 }
 
 static int fl_hw_replace_filter(struct tcf_proto *tp,
-                               struct flow_dissector *dissector,
-                               struct fl_flow_key *mask,
                                struct cls_fl_filter *f,
                                struct netlink_ext_ack *extack)
 {
@@ -247,8 +277,8 @@ static int fl_hw_replace_filter(struct tcf_proto *tp,
        tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, extack);
        cls_flower.command = TC_CLSFLOWER_REPLACE;
        cls_flower.cookie = (unsigned long) f;
-       cls_flower.dissector = dissector;
-       cls_flower.mask = mask;
+       cls_flower.dissector = &f->mask->dissector;
+       cls_flower.mask = &f->mask->key;
        cls_flower.key = &f->mkey;
        cls_flower.exts = &f->exts;
        cls_flower.classid = f->res.classid;
@@ -283,28 +313,31 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f)
                         &cls_flower, false);
 }
 
-static void __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f,
+static bool __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f,
                        struct netlink_ext_ack *extack)
 {
        struct cls_fl_head *head = rtnl_dereference(tp->root);
+       bool async = tcf_exts_get_net(&f->exts);
+       bool last;
 
        idr_remove(&head->handle_idr, f->handle);
        list_del_rcu(&f->list);
+       last = fl_mask_put(head, f->mask, async);
        if (!tc_skip_hw(f->flags))
                fl_hw_destroy_filter(tp, f, extack);
        tcf_unbind_filter(tp, &f->res);
-       if (tcf_exts_get_net(&f->exts))
+       if (async)
                call_rcu(&f->rcu, fl_destroy_filter);
        else
                __fl_destroy_filter(f);
+
+       return last;
 }
 
 static void fl_destroy_sleepable(struct work_struct *work)
 {
        struct cls_fl_head *head = container_of(work, struct cls_fl_head,
                                                work);
-       if (head->mask_assigned)
-               rhashtable_destroy(&head->ht);
        kfree(head);
        module_put(THIS_MODULE);
 }
@@ -320,10 +353,15 @@ static void fl_destroy_rcu(struct rcu_head *rcu)
 static void fl_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
 {
        struct cls_fl_head *head = rtnl_dereference(tp->root);
+       struct fl_flow_mask *mask, *next_mask;
        struct cls_fl_filter *f, *next;
 
-       list_for_each_entry_safe(f, next, &head->filters, list)
-               __fl_delete(tp, f, extack);
+       list_for_each_entry_safe(mask, next_mask, &head->masks, list) {
+               list_for_each_entry_safe(f, next, &mask->filters, list) {
+                       if (__fl_delete(tp, f, extack))
+                               break;
+               }
+       }
        idr_destroy(&head->handle_idr);
 
        __module_get(THIS_MODULE);
@@ -715,14 +753,14 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
        return ret;
 }
 
-static bool fl_mask_eq(struct fl_flow_mask *mask1,
-                      struct fl_flow_mask *mask2)
+static void fl_mask_copy(struct fl_flow_mask *dst,
+                        struct fl_flow_mask *src)
 {
-       const long *lmask1 = fl_key_get_start(&mask1->key, mask1);
-       const long *lmask2 = fl_key_get_start(&mask2->key, mask2);
+       const void *psrc = fl_key_get_start(&src->key, src);
+       void *pdst = fl_key_get_start(&dst->key, src);
 
-       return !memcmp(&mask1->range, &mask2->range, sizeof(mask1->range)) &&
-              !memcmp(lmask1, lmask2, fl_mask_range(mask1));
+       memcpy(pdst, psrc, fl_mask_range(src));
+       dst->range = src->range;
 }
 
 static const struct rhashtable_params fl_ht_params = {
@@ -731,14 +769,13 @@ static const struct rhashtable_params fl_ht_params = {
        .automatic_shrinking = true,
 };
 
-static int fl_init_hashtable(struct cls_fl_head *head,
-                            struct fl_flow_mask *mask)
+static int fl_init_mask_hashtable(struct fl_flow_mask *mask)
 {
-       head->ht_params = fl_ht_params;
-       head->ht_params.key_len = fl_mask_range(mask);
-       head->ht_params.key_offset += mask->range.start;
+       mask->filter_ht_params = fl_ht_params;
+       mask->filter_ht_params.key_len = fl_mask_range(mask);
+       mask->filter_ht_params.key_offset += mask->range.start;
 
-       return rhashtable_init(&head->ht, &head->ht_params);
+       return rhashtable_init(&mask->ht, &mask->filter_ht_params);
 }
 
 #define FL_KEY_MEMBER_OFFSET(member) offsetof(struct fl_flow_key, member)
@@ -761,8 +798,7 @@ static int fl_init_hashtable(struct cls_fl_head *head,
                        FL_KEY_SET(keys, cnt, id, member);                      \
        } while(0);
 
-static void fl_init_dissector(struct cls_fl_head *head,
-                             struct fl_flow_mask *mask)
+static void fl_init_dissector(struct fl_flow_mask *mask)
 {
        struct flow_dissector_key keys[FLOW_DISSECTOR_KEY_MAX];
        size_t cnt = 0;
@@ -802,31 +838,66 @@ static void fl_init_dissector(struct cls_fl_head *head,
        FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
                             FLOW_DISSECTOR_KEY_ENC_PORTS, enc_tp);
 
-       skb_flow_dissector_init(&head->dissector, keys, cnt);
+       skb_flow_dissector_init(&mask->dissector, keys, cnt);
+}
+
+static struct fl_flow_mask *fl_create_new_mask(struct cls_fl_head *head,
+                                              struct fl_flow_mask *mask)
+{
+       struct fl_flow_mask *newmask;
+       int err;
+
+       newmask = kzalloc(sizeof(*newmask), GFP_KERNEL);
+       if (!newmask)
+               return ERR_PTR(-ENOMEM);
+
+       fl_mask_copy(newmask, mask);
+
+       err = fl_init_mask_hashtable(newmask);
+       if (err)
+               goto errout_free;
+
+       fl_init_dissector(newmask);
+
+       INIT_LIST_HEAD_RCU(&newmask->filters);
+
+       err = rhashtable_insert_fast(&head->ht, &newmask->ht_node,
+                                    mask_ht_params);
+       if (err)
+               goto errout_destroy;
+
+       list_add_tail_rcu(&newmask->list, &head->masks);
+
+       return newmask;
+
+errout_destroy:
+       rhashtable_destroy(&newmask->ht);
+errout_free:
+       kfree(newmask);
+
+       return ERR_PTR(err);
 }
 
 static int fl_check_assign_mask(struct cls_fl_head *head,
+                               struct cls_fl_filter *fnew,
+                               struct cls_fl_filter *fold,
                                struct fl_flow_mask *mask)
 {
-       int err;
+       struct fl_flow_mask *newmask;
 
-       if (head->mask_assigned) {
-               if (!fl_mask_eq(&head->mask, mask))
+       fnew->mask = rhashtable_lookup_fast(&head->ht, mask, mask_ht_params);
+       if (!fnew->mask) {
+               if (fold)
                        return -EINVAL;
-               else
-                       return 0;
-       }
 
-       /* Mask is not assigned yet. So assign it and init hashtable
-        * according to that.
-        */
-       err = fl_init_hashtable(head, mask);
-       if (err)
-               return err;
-       memcpy(&head->mask, mask, sizeof(head->mask));
-       head->mask_assigned = true;
+               newmask = fl_create_new_mask(head, mask);
+               if (IS_ERR(newmask))
+                       return PTR_ERR(newmask);
 
-       fl_init_dissector(head, mask);
+               fnew->mask = newmask;
+       } else if (fold && fold->mask == fnew->mask) {
+               return -EINVAL;
+       }
 
        return 0;
 }
@@ -924,30 +995,26 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
        if (err)
                goto errout_idr;
 
-       err = fl_check_assign_mask(head, &mask);
+       err = fl_check_assign_mask(head, fnew, fold, &mask);
        if (err)
                goto errout_idr;
 
        if (!tc_skip_sw(fnew->flags)) {
-               if (!fold && fl_lookup(head, &fnew->mkey)) {
+               if (!fold && fl_lookup(fnew->mask, &fnew->mkey)) {
                        err = -EEXIST;
-                       goto errout_idr;
+                       goto errout_mask;
                }
 
-               err = rhashtable_insert_fast(&head->ht, &fnew->ht_node,
-                                            head->ht_params);
+               err = rhashtable_insert_fast(&fnew->mask->ht, &fnew->ht_node,
+                                            fnew->mask->filter_ht_params);
                if (err)
-                       goto errout_idr;
+                       goto errout_mask;
        }
 
        if (!tc_skip_hw(fnew->flags)) {
-               err = fl_hw_replace_filter(tp,
-                                          &head->dissector,
-                                          &mask.key,
-                                          fnew,
-                                          extack);
+               err = fl_hw_replace_filter(tp, fnew, extack);
                if (err)
-                       goto errout_idr;
+                       goto errout_mask;
        }
 
        if (!tc_in_hw(fnew->flags))
@@ -955,8 +1022,9 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
 
        if (fold) {
                if (!tc_skip_sw(fold->flags))
-                       rhashtable_remove_fast(&head->ht, &fold->ht_node,
-                                              head->ht_params);
+                       rhashtable_remove_fast(&fold->mask->ht,
+                                              &fold->ht_node,
+                                              fold->mask->filter_ht_params);
                if (!tc_skip_hw(fold->flags))
                        fl_hw_destroy_filter(tp, fold, NULL);
        }
@@ -970,12 +1038,15 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
                tcf_exts_get_net(&fold->exts);
                call_rcu(&fold->rcu, fl_destroy_filter);
        } else {
-               list_add_tail_rcu(&fnew->list, &head->filters);
+               list_add_tail_rcu(&fnew->list, &fnew->mask->filters);
        }
 
        kfree(tb);
        return 0;
 
+errout_mask:
+       fl_mask_put(head, fnew->mask, false);
+
 errout_idr:
        if (fnew->handle)
                idr_remove(&head->handle_idr, fnew->handle);
@@ -994,10 +1065,10 @@ static int fl_delete(struct tcf_proto *tp, void *arg, bool *last,
        struct cls_fl_filter *f = arg;
 
        if (!tc_skip_sw(f->flags))
-               rhashtable_remove_fast(&head->ht, &f->ht_node,
-                                      head->ht_params);
+               rhashtable_remove_fast(&f->mask->ht, &f->ht_node,
+                                      f->mask->filter_ht_params);
        __fl_delete(tp, f, extack);
-       *last = list_empty(&head->filters);
+       *last = list_empty(&head->masks);
        return 0;
 }
 
@@ -1005,16 +1076,19 @@ static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 {
        struct cls_fl_head *head = rtnl_dereference(tp->root);
        struct cls_fl_filter *f;
-
-       list_for_each_entry_rcu(f, &head->filters, list) {
-               if (arg->count < arg->skip)
-                       goto skip;
-               if (arg->fn(tp, f, arg) < 0) {
-                       arg->stop = 1;
-                       break;
-               }
+       struct fl_flow_mask *mask;
+
+       list_for_each_entry_rcu(mask, &head->masks, list) {
+               list_for_each_entry_rcu(f, &mask->filters, list) {
+                       if (arg->count < arg->skip)
+                               goto skip;
+                       if (arg->fn(tp, f, arg) < 0) {
+                               arg->stop = 1;
+                               break;
+                       }
 skip:
-               arg->count++;
+                       arg->count++;
+               }
        }
 }
 
@@ -1150,7 +1224,6 @@ static int fl_dump_key_flags(struct sk_buff *skb, u32 flags_key, u32 flags_mask)
 static int fl_dump(struct net *net, struct tcf_proto *tp, void *fh,
                   struct sk_buff *skb, struct tcmsg *t)
 {
-       struct cls_fl_head *head = rtnl_dereference(tp->root);
        struct cls_fl_filter *f = fh;
        struct nlattr *nest;
        struct fl_flow_key *key, *mask;
@@ -1169,7 +1242,7 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, void *fh,
                goto nla_put_failure;
 
        key = &f->key;
-       mask = &head->mask.key;
+       mask = &f->mask->key;
 
        if (mask->indev_ifindex) {
                struct net_device *dev;
index a366e4c9413ab4fe4dfb16f0255cb7632ade7f1c..4808713c73b988cc3e536cff866cf18de05375fa 100644 (file)
@@ -128,6 +128,28 @@ static bool fq_flow_is_detached(const struct fq_flow *f)
        return f->next == &detached;
 }
 
+static bool fq_flow_is_throttled(const struct fq_flow *f)
+{
+       return f->next == &throttled;
+}
+
+static void fq_flow_add_tail(struct fq_flow_head *head, struct fq_flow *flow)
+{
+       if (head->first)
+               head->last->next = flow;
+       else
+               head->first = flow;
+       head->last = flow;
+       flow->next = NULL;
+}
+
+static void fq_flow_unset_throttled(struct fq_sched_data *q, struct fq_flow *f)
+{
+       rb_erase(&f->rate_node, &q->delayed);
+       q->throttled_flows--;
+       fq_flow_add_tail(&q->old_flows, f);
+}
+
 static void fq_flow_set_throttled(struct fq_sched_data *q, struct fq_flow *f)
 {
        struct rb_node **p = &q->delayed.rb_node, *parent = NULL;
@@ -155,15 +177,6 @@ static void fq_flow_set_throttled(struct fq_sched_data *q, struct fq_flow *f)
 
 static struct kmem_cache *fq_flow_cachep __read_mostly;
 
-static void fq_flow_add_tail(struct fq_flow_head *head, struct fq_flow *flow)
-{
-       if (head->first)
-               head->last->next = flow;
-       else
-               head->first = flow;
-       head->last = flow;
-       flow->next = NULL;
-}
 
 /* limit number of collected flows per round */
 #define FQ_GC_MAX 8
@@ -267,6 +280,8 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q)
                                     f->socket_hash != sk->sk_hash)) {
                                f->credit = q->initial_quantum;
                                f->socket_hash = sk->sk_hash;
+                               if (fq_flow_is_throttled(f))
+                                       fq_flow_unset_throttled(q, f);
                                f->time_next_packet = 0ULL;
                        }
                        return f;
@@ -438,9 +453,7 @@ static void fq_check_throttled(struct fq_sched_data *q, u64 now)
                        q->time_next_delayed_flow = f->time_next_packet;
                        break;
                }
-               rb_erase(p, &q->delayed);
-               q->throttled_flows--;
-               fq_flow_add_tail(&q->old_flows, f);
+               fq_flow_unset_throttled(q, f);
        }
 }
 
index 39c144b6ff987c38674005c821bbac5dadeee4d5..760ab1b09f8b1760cd17c6da3b56c119ef2186ff 100644 (file)
@@ -373,33 +373,24 @@ bool sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
  */
 static inline bool qdisc_restart(struct Qdisc *q, int *packets)
 {
-       bool more, validate, nolock = q->flags & TCQ_F_NOLOCK;
        spinlock_t *root_lock = NULL;
        struct netdev_queue *txq;
        struct net_device *dev;
        struct sk_buff *skb;
+       bool validate;
 
        /* Dequeue packet */
-       if (nolock && test_and_set_bit(__QDISC_STATE_RUNNING, &q->state))
-               return false;
-
        skb = dequeue_skb(q, &validate, packets);
-       if (unlikely(!skb)) {
-               if (nolock)
-                       clear_bit(__QDISC_STATE_RUNNING, &q->state);
+       if (unlikely(!skb))
                return false;
-       }
 
-       if (!nolock)
+       if (!(q->flags & TCQ_F_NOLOCK))
                root_lock = qdisc_lock(q);
 
        dev = qdisc_dev(q);
        txq = skb_get_tx_queue(dev, skb);
 
-       more = sch_direct_xmit(skb, q, dev, txq, root_lock, validate);
-       if (nolock)
-               clear_bit(__QDISC_STATE_RUNNING, &q->state);
-       return more;
+       return sch_direct_xmit(skb, q, dev, txq, root_lock, validate);
 }
 
 void __qdisc_run(struct Qdisc *q)
@@ -665,7 +656,7 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc)
                if (__skb_array_empty(q))
                        continue;
 
-               skb = skb_array_consume_bh(q);
+               skb = __skb_array_consume(q);
        }
        if (likely(skb)) {
                qdisc_qstats_cpu_backlog_dec(qdisc, skb);
@@ -706,7 +697,7 @@ static void pfifo_fast_reset(struct Qdisc *qdisc)
                if (!q->ring.queue)
                        continue;
 
-               while ((skb = skb_array_consume_bh(q)) != NULL)
+               while ((skb = __skb_array_consume(q)) != NULL)
                        kfree_skb(skb);
        }
 
@@ -867,6 +858,11 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
        lockdep_set_class(&sch->busylock,
                          dev->qdisc_tx_busylock ?: &qdisc_tx_busylock);
 
+       /* seqlock has the same scope of busylock, for NOLOCK qdisc */
+       spin_lock_init(&sch->seqlock);
+       lockdep_set_class(&sch->busylock,
+                         dev->qdisc_tx_busylock ?: &qdisc_tx_busylock);
+
        seqcount_init(&sch->running);
        lockdep_set_class(&sch->running,
                          dev->qdisc_running_key ?: &qdisc_running_key);
@@ -1106,6 +1102,10 @@ static void dev_deactivate_queue(struct net_device *dev,
 
        qdisc = rtnl_dereference(dev_queue->qdisc);
        if (qdisc) {
+               bool nolock = qdisc->flags & TCQ_F_NOLOCK;
+
+               if (nolock)
+                       spin_lock_bh(&qdisc->seqlock);
                spin_lock_bh(qdisc_lock(qdisc));
 
                if (!(qdisc->flags & TCQ_F_BUILTIN))
@@ -1115,6 +1115,8 @@ static void dev_deactivate_queue(struct net_device *dev,
                qdisc_reset(qdisc);
 
                spin_unlock_bh(qdisc_lock(qdisc));
+               if (nolock)
+                       spin_unlock_bh(&qdisc->seqlock);
        }
 }
 
@@ -1131,17 +1133,13 @@ static bool some_qdisc_is_busy(struct net_device *dev)
                dev_queue = netdev_get_tx_queue(dev, i);
                q = dev_queue->qdisc_sleeping;
 
-               if (q->flags & TCQ_F_NOLOCK) {
-                       val = test_bit(__QDISC_STATE_SCHED, &q->state);
-               } else {
-                       root_lock = qdisc_lock(q);
-                       spin_lock_bh(root_lock);
+               root_lock = qdisc_lock(q);
+               spin_lock_bh(root_lock);
 
-                       val = (qdisc_is_running(q) ||
-                              test_bit(__QDISC_STATE_SCHED, &q->state));
+               val = (qdisc_is_running(q) ||
+                      test_bit(__QDISC_STATE_SCHED, &q->state));
 
-                       spin_unlock_bh(root_lock);
-               }
+               spin_unlock_bh(root_lock);
 
                if (val)
                        return true;
index 16644b3d236271533b6e96a62705bc1903ab1d8a..56c181c3feeb27a428a56fb76b26d0bc3622ea33 100644 (file)
@@ -222,10 +222,11 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt,
                                         extack);
                if (IS_ERR(child))
                        return PTR_ERR(child);
-       }
 
-       if (child != &noop_qdisc)
+               /* child is fifo, no need to check for noop_qdisc */
                qdisc_hash_add(child, true);
+       }
+
        sch_tree_lock(sch);
        q->flags = ctl->flags;
        q->limit = ctl->limit;
index 03225a8df9730cee7e020331b42a805d42b6f25c..6f74a426f159e440f33755e13656c849408bbc18 100644 (file)
@@ -383,6 +383,9 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt,
                        err = PTR_ERR(child);
                        goto done;
                }
+
+               /* child is fifo, no need to check for noop_qdisc */
+               qdisc_hash_add(child, true);
        }
 
        sch_tree_lock(sch);
@@ -391,8 +394,6 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt,
                                          q->qdisc->qstats.backlog);
                qdisc_destroy(q->qdisc);
                q->qdisc = child;
-               if (child != &noop_qdisc)
-                       qdisc_hash_add(child, true);
        }
        q->limit = qopt->limit;
        if (tb[TCA_TBF_PBURST])
index 837806dd57990af4b39147045f8eb53b5f34ec61..5d5a16204d50516eca7d5322a60aac6511178c38 100644 (file)
@@ -652,33 +652,20 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
         */
        peer->param_flags = asoc->param_flags;
 
-       sctp_transport_route(peer, NULL, sp);
-
        /* Initialize the pmtu of the transport. */
-       if (peer->param_flags & SPP_PMTUD_DISABLE) {
-               if (asoc->pathmtu)
-                       peer->pathmtu = asoc->pathmtu;
-               else
-                       peer->pathmtu = SCTP_DEFAULT_MAXSEGMENT;
-       }
+       sctp_transport_route(peer, NULL, sp);
 
        /* If this is the first transport addr on this association,
         * initialize the association PMTU to the peer's PMTU.
         * If not and the current association PMTU is higher than the new
         * peer's PMTU, reset the association PMTU to the new peer's PMTU.
         */
-       if (asoc->pathmtu)
-               asoc->pathmtu = min_t(int, peer->pathmtu, asoc->pathmtu);
-       else
-               asoc->pathmtu = peer->pathmtu;
-
-       pr_debug("%s: association:%p PMTU set to %d\n", __func__, asoc,
-                asoc->pathmtu);
+       sctp_assoc_set_pmtu(asoc, asoc->pathmtu ?
+                                 min_t(int, peer->pathmtu, asoc->pathmtu) :
+                                 peer->pathmtu);
 
        peer->pmtu_pending = 0;
 
-       asoc->frag_point = sctp_frag_point(asoc, asoc->pathmtu);
-
        /* The asoc->peer.port might not be meaningful yet, but
         * initialize the packet structure anyway.
         */
@@ -988,31 +975,6 @@ struct sctp_transport *sctp_assoc_lookup_tsn(struct sctp_association *asoc,
        return match;
 }
 
-/* Is this the association we are looking for? */
-struct sctp_transport *sctp_assoc_is_match(struct sctp_association *asoc,
-                                          struct net *net,
-                                          const union sctp_addr *laddr,
-                                          const union sctp_addr *paddr)
-{
-       struct sctp_transport *transport;
-
-       if ((htons(asoc->base.bind_addr.port) == laddr->v4.sin_port) &&
-           (htons(asoc->peer.port) == paddr->v4.sin_port) &&
-           net_eq(sock_net(asoc->base.sk), net)) {
-               transport = sctp_assoc_lookup_paddr(asoc, paddr);
-               if (!transport)
-                       goto out;
-
-               if (sctp_bind_addr_match(&asoc->base.bind_addr, laddr,
-                                        sctp_sk(asoc->base.sk)))
-                       goto out;
-       }
-       transport = NULL;
-
-out:
-       return transport;
-}
-
 /* Do delayed input processing.  This is scheduled by sctp_rcv(). */
 static void sctp_assoc_bh_rcv(struct work_struct *work)
 {
@@ -1024,8 +986,9 @@ static void sctp_assoc_bh_rcv(struct work_struct *work)
        struct sctp_endpoint *ep;
        struct sctp_chunk *chunk;
        struct sctp_inq *inqueue;
-       int state;
+       int first_time = 1;     /* is this the first time through the loop */
        int error = 0;
+       int state;
 
        /* The association should be held so we should be safe. */
        ep = asoc->ep;
@@ -1036,6 +999,30 @@ static void sctp_assoc_bh_rcv(struct work_struct *work)
                state = asoc->state;
                subtype = SCTP_ST_CHUNK(chunk->chunk_hdr->type);
 
+               /* If the first chunk in the packet is AUTH, do special
+                * processing specified in Section 6.3 of SCTP-AUTH spec
+                */
+               if (first_time && subtype.chunk == SCTP_CID_AUTH) {
+                       struct sctp_chunkhdr *next_hdr;
+
+                       next_hdr = sctp_inq_peek(inqueue);
+                       if (!next_hdr)
+                               goto normal;
+
+                       /* If the next chunk is COOKIE-ECHO, skip the AUTH
+                        * chunk while saving a pointer to it so we can do
+                        * Authentication later (during cookie-echo
+                        * processing).
+                        */
+                       if (next_hdr->type == SCTP_CID_COOKIE_ECHO) {
+                               chunk->auth_chunk = skb_clone(chunk->skb,
+                                                             GFP_ATOMIC);
+                               chunk->auth = 1;
+                               continue;
+                       }
+               }
+
+normal:
                /* SCTP-AUTH, Section 6.3:
                 *    The receiver has a list of chunk types which it expects
                 *    to be received only after an AUTH-chunk.  This list has
@@ -1074,6 +1061,9 @@ static void sctp_assoc_bh_rcv(struct work_struct *work)
                /* If there is an error on chunk, discard this packet. */
                if (error && chunk)
                        chunk->pdiscard = 1;
+
+               if (first_time)
+                       first_time = 0;
        }
        sctp_association_put(asoc);
 }
@@ -1406,6 +1396,31 @@ sctp_assoc_choose_alter_transport(struct sctp_association *asoc,
        }
 }
 
+void sctp_assoc_update_frag_point(struct sctp_association *asoc)
+{
+       int frag = sctp_mtu_payload(sctp_sk(asoc->base.sk), asoc->pathmtu,
+                                   sctp_datachk_len(&asoc->stream));
+
+       if (asoc->user_frag)
+               frag = min_t(int, frag, asoc->user_frag);
+
+       frag = min_t(int, frag, SCTP_MAX_CHUNK_LEN -
+                               sctp_datachk_len(&asoc->stream));
+
+       asoc->frag_point = SCTP_TRUNC4(frag);
+}
+
+void sctp_assoc_set_pmtu(struct sctp_association *asoc, __u32 pmtu)
+{
+       if (asoc->pathmtu != pmtu) {
+               asoc->pathmtu = pmtu;
+               sctp_assoc_update_frag_point(asoc);
+       }
+
+       pr_debug("%s: asoc:%p, pmtu:%d, frag_point:%d\n", __func__, asoc,
+                asoc->pathmtu, asoc->frag_point);
+}
+
 /* Update the association's pmtu and frag_point by going through all the
  * transports. This routine is called when a transport's PMTU has changed.
  */
@@ -1418,24 +1433,16 @@ void sctp_assoc_sync_pmtu(struct sctp_association *asoc)
                return;
 
        /* Get the lowest pmtu of all the transports. */
-       list_for_each_entry(t, &asoc->peer.transport_addr_list,
-                               transports) {
+       list_for_each_entry(t, &asoc->peer.transport_addr_list, transports) {
                if (t->pmtu_pending && t->dst) {
-                       sctp_transport_update_pmtu(
-                                       t, SCTP_TRUNC4(dst_mtu(t->dst)));
+                       sctp_transport_update_pmtu(t, sctp_dst_mtu(t->dst));
                        t->pmtu_pending = 0;
                }
                if (!pmtu || (t->pathmtu < pmtu))
                        pmtu = t->pathmtu;
        }
 
-       if (pmtu) {
-               asoc->pathmtu = pmtu;
-               asoc->frag_point = sctp_frag_point(asoc, pmtu);
-       }
-
-       pr_debug("%s: asoc:%p, pmtu:%d, frag_point:%d\n", __func__, asoc,
-                asoc->pathmtu, asoc->frag_point);
+       sctp_assoc_set_pmtu(asoc, pmtu);
 }
 
 /* Should we send a SACK to update our peer? */
index be296d633e951c8df322d560150973b5b2bdce5b..79daa98208c391c780440144d69bc7be875c3476 100644 (file)
@@ -172,8 +172,6 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
        struct list_head *pos, *temp;
        struct sctp_chunk *chunk;
        struct sctp_datamsg *msg;
-       struct sctp_sock *sp;
-       struct sctp_af *af;
        int err;
 
        msg = sctp_datamsg_new(GFP_KERNEL);
@@ -192,12 +190,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
        /* This is the biggest possible DATA chunk that can fit into
         * the packet
         */
-       sp = sctp_sk(asoc->base.sk);
-       af = sp->pf->af;
-       max_data = asoc->pathmtu - af->net_header_len -
-                  sizeof(struct sctphdr) - sctp_datachk_len(&asoc->stream) -
-                  af->ip_options_len(asoc->base.sk);
-       max_data = SCTP_TRUNC4(max_data);
+       max_data = asoc->frag_point;
 
        /* If the the peer requested that we authenticate DATA chunks
         * we need to account for bundling of the AUTH chunks along with
@@ -222,9 +215,6 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
                }
        }
 
-       /* Check what's our max considering the above */
-       max_data = min_t(size_t, max_data, asoc->frag_point);
-
        /* Set first_len and then account for possible bundles on first frag */
        first_len = max_data;
 
index 23ebc5318edc47c51230a95256064f5b2974d2f4..eb93ffe2408bde973e92b29a3b1ebff85f5099a0 100644 (file)
@@ -217,7 +217,7 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue)
        skb_pull(chunk->skb, sizeof(*ch));
        chunk->subh.v = NULL; /* Subheader is no longer valid.  */
 
-       if (chunk->chunk_end + sizeof(*ch) < skb_tail_pointer(chunk->skb)) {
+       if (chunk->chunk_end + sizeof(*ch) <= skb_tail_pointer(chunk->skb)) {
                /* This is not a singleton */
                chunk->singleton = 0;
        } else if (chunk->chunk_end > skb_tail_pointer(chunk->skb)) {
index 2e3f7b75a8ece789fbbc722754200ad8505b5d64..42247110d842e75d61068ad20d577d8dc6dd4263 100644 (file)
@@ -895,6 +895,9 @@ static int sctp_inet6_cmp_addr(const union sctp_addr *addr1,
        if (sctp_is_any(sk, addr1) || sctp_is_any(sk, addr2))
                return 1;
 
+       if (addr1->sa.sa_family == AF_INET && addr2->sa.sa_family == AF_INET)
+               return addr1->v4.sin_addr.s_addr == addr2->v4.sin_addr.s_addr;
+
        return __sctp_v6_cmp_addr(addr1, addr2);
 }
 
index 690d8557bb7bfe20a0a209b6c937c85c764a3bd0..e672dee302c7092433a64ed3ed8bfcd183e1f9c8 100644 (file)
@@ -90,8 +90,8 @@ void sctp_packet_config(struct sctp_packet *packet, __u32 vtag,
 {
        struct sctp_transport *tp = packet->transport;
        struct sctp_association *asoc = tp->asoc;
+       struct sctp_sock *sp = NULL;
        struct sock *sk;
-       size_t overhead = sizeof(struct ipv6hdr) + sizeof(struct sctphdr);
 
        pr_debug("%s: packet:%p vtag:0x%x\n", __func__, packet, vtag);
        packet->vtag = vtag;
@@ -102,28 +102,20 @@ void sctp_packet_config(struct sctp_packet *packet, __u32 vtag,
 
        /* set packet max_size with pathmtu, then calculate overhead */
        packet->max_size = tp->pathmtu;
+
        if (asoc) {
-               struct sctp_sock *sp = sctp_sk(asoc->base.sk);
-               struct sctp_af *af = sp->pf->af;
-
-               overhead = af->net_header_len +
-                          af->ip_options_len(asoc->base.sk);
-               overhead += sizeof(struct sctphdr);
-               packet->overhead = overhead;
-               packet->size = overhead;
-       } else {
-               packet->overhead = overhead;
-               packet->size = overhead;
-               return;
+               sk = asoc->base.sk;
+               sp = sctp_sk(sk);
        }
+       packet->overhead = sctp_mtu_payload(sp, 0, 0);
+       packet->size = packet->overhead;
+
+       if (!asoc)
+               return;
 
        /* update dst or transport pathmtu if in need */
-       sk = asoc->base.sk;
        if (!sctp_transport_dst_check(tp)) {
-               sctp_transport_route(tp, NULL, sctp_sk(sk));
-               if (asoc->param_flags & SPP_PMTUD_ENABLE)
-                       sctp_assoc_sync_pmtu(asoc);
-       } else if (!sctp_transport_pmtu_check(tp)) {
+               sctp_transport_route(tp, NULL, sp);
                if (asoc->param_flags & SPP_PMTUD_ENABLE)
                        sctp_assoc_sync_pmtu(asoc);
        }
index f211b3db6a3543073e113da121bb28518b0af491..d68aa33485a94e87858fed9b655f00a1b9748998 100644 (file)
@@ -601,14 +601,14 @@ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport,
 
 /*
  * Transmit DATA chunks on the retransmit queue.  Upon return from
- * sctp_outq_flush_rtx() the packet 'pkt' may contain chunks which
+ * __sctp_outq_flush_rtx() the packet 'pkt' may contain chunks which
  * need to be transmitted by the caller.
  * We assume that pkt->transport has already been set.
  *
  * The return value is a normal kernel error return value.
  */
-static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt,
-                              int rtx_timeout, int *start_timer)
+static int __sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt,
+                                int rtx_timeout, int *start_timer, gfp_t gfp)
 {
        struct sctp_transport *transport = pkt->transport;
        struct sctp_chunk *chunk, *chunk1;
@@ -684,12 +684,12 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt,
                                 * control chunks are already freed so there
                                 * is nothing we can do.
                                 */
-                               sctp_packet_transmit(pkt, GFP_ATOMIC);
+                               sctp_packet_transmit(pkt, gfp);
                                goto redo;
                        }
 
                        /* Send this packet.  */
-                       error = sctp_packet_transmit(pkt, GFP_ATOMIC);
+                       error = sctp_packet_transmit(pkt, gfp);
 
                        /* If we are retransmitting, we should only
                         * send a single packet.
@@ -705,7 +705,7 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt,
 
                case SCTP_XMIT_RWND_FULL:
                        /* Send this packet. */
-                       error = sctp_packet_transmit(pkt, GFP_ATOMIC);
+                       error = sctp_packet_transmit(pkt, gfp);
 
                        /* Stop sending DATA as there is no more room
                         * at the receiver.
@@ -715,7 +715,7 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt,
 
                case SCTP_XMIT_DELAY:
                        /* Send this packet. */
-                       error = sctp_packet_transmit(pkt, GFP_ATOMIC);
+                       error = sctp_packet_transmit(pkt, gfp);
 
                        /* Stop sending DATA because of nagle delay. */
                        done = 1;
@@ -776,68 +776,43 @@ void sctp_outq_uncork(struct sctp_outq *q, gfp_t gfp)
        sctp_outq_flush(q, 0, gfp);
 }
 
-
-/*
- * Try to flush an outqueue.
- *
- * Description: Send everything in q which we legally can, subject to
- * congestion limitations.
- * * Note: This function can be called from multiple contexts so appropriate
- * locking concerns must be made.  Today we use the sock lock to protect
- * this function.
- */
-static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp)
+static int sctp_packet_singleton(struct sctp_transport *transport,
+                                struct sctp_chunk *chunk, gfp_t gfp)
 {
-       struct sctp_packet *packet;
+       const struct sctp_association *asoc = transport->asoc;
+       const __u16 sport = asoc->base.bind_addr.port;
+       const __u16 dport = asoc->peer.port;
+       const __u32 vtag = asoc->peer.i.init_tag;
        struct sctp_packet singleton;
-       struct sctp_association *asoc = q->asoc;
-       __u16 sport = asoc->base.bind_addr.port;
-       __u16 dport = asoc->peer.port;
-       __u32 vtag = asoc->peer.i.init_tag;
-       struct sctp_transport *transport = NULL;
-       struct sctp_transport *new_transport;
-       struct sctp_chunk *chunk, *tmp;
-       enum sctp_xmit status;
-       int error = 0;
-       int start_timer = 0;
-       int one_packet = 0;
 
+       sctp_packet_init(&singleton, transport, sport, dport);
+       sctp_packet_config(&singleton, vtag, 0);
+       sctp_packet_append_chunk(&singleton, chunk);
+       return sctp_packet_transmit(&singleton, gfp);
+}
+
+/* Struct to hold the context during sctp outq flush */
+struct sctp_flush_ctx {
+       struct sctp_outq *q;
+       /* Current transport being used. It's NOT the same as curr active one */
+       struct sctp_transport *transport;
        /* These transports have chunks to send. */
        struct list_head transport_list;
-       struct list_head *ltransport;
-
-       INIT_LIST_HEAD(&transport_list);
-       packet = NULL;
-
-       /*
-        * 6.10 Bundling
-        *   ...
-        *   When bundling control chunks with DATA chunks, an
-        *   endpoint MUST place control chunks first in the outbound
-        *   SCTP packet.  The transmitter MUST transmit DATA chunks
-        *   within a SCTP packet in increasing order of TSN.
-        *   ...
-        */
-
-       list_for_each_entry_safe(chunk, tmp, &q->control_chunk_list, list) {
-               /* RFC 5061, 5.3
-                * F1) This means that until such time as the ASCONF
-                * containing the add is acknowledged, the sender MUST
-                * NOT use the new IP address as a source for ANY SCTP
-                * packet except on carrying an ASCONF Chunk.
-                */
-               if (asoc->src_out_of_asoc_ok &&
-                   chunk->chunk_hdr->type != SCTP_CID_ASCONF)
-                       continue;
-
-               list_del_init(&chunk->list);
+       struct sctp_association *asoc;
+       /* Packet on the current transport above */
+       struct sctp_packet *packet;
+       gfp_t gfp;
+};
 
-               /* Pick the right transport to use. */
-               new_transport = chunk->transport;
+/* transport: current transport */
+static void sctp_outq_select_transport(struct sctp_flush_ctx *ctx,
+                                      struct sctp_chunk *chunk)
+{
+       struct sctp_transport *new_transport = chunk->transport;
 
-               if (!new_transport) {
-                       /*
-                        * If we have a prior transport pointer, see if
+       if (!new_transport) {
+               if (!sctp_chunk_is_data(chunk)) {
+                       /* If we have a prior transport pointer, see if
                         * the destination address of the chunk
                         * matches the destination address of the
                         * current transport.  If not a match, then
@@ -846,22 +821,26 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp)
                         * after processing ASCONFs, we may have new
                         * transports created.
                         */
-                       if (transport &&
-                           sctp_cmp_addr_exact(&chunk->dest,
-                                               &transport->ipaddr))
-                                       new_transport = transport;
+                       if (ctx->transport && sctp_cmp_addr_exact(&chunk->dest,
+                                                       &ctx->transport->ipaddr))
+                               new_transport = ctx->transport;
                        else
-                               new_transport = sctp_assoc_lookup_paddr(asoc,
-                                                               &chunk->dest);
+                               new_transport = sctp_assoc_lookup_paddr(ctx->asoc,
+                                                                 &chunk->dest);
+               }
 
-                       /* if we still don't have a new transport, then
-                        * use the current active path.
-                        */
-                       if (!new_transport)
-                               new_transport = asoc->peer.active_path;
-               } else if ((new_transport->state == SCTP_INACTIVE) ||
-                          (new_transport->state == SCTP_UNCONFIRMED) ||
-                          (new_transport->state == SCTP_PF)) {
+               /* if we still don't have a new transport, then
+                * use the current active path.
+                */
+               if (!new_transport)
+                       new_transport = ctx->asoc->peer.active_path;
+       } else {
+               __u8 type;
+
+               switch (new_transport->state) {
+               case SCTP_INACTIVE:
+               case SCTP_UNCONFIRMED:
+               case SCTP_PF:
                        /* If the chunk is Heartbeat or Heartbeat Ack,
                         * send it to chunk->transport, even if it's
                         * inactive.
@@ -875,29 +854,64 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp)
                         *
                         * ASCONF_ACKs also must be sent to the source.
                         */
-                       if (chunk->chunk_hdr->type != SCTP_CID_HEARTBEAT &&
-                           chunk->chunk_hdr->type != SCTP_CID_HEARTBEAT_ACK &&
-                           chunk->chunk_hdr->type != SCTP_CID_ASCONF_ACK)
-                               new_transport = asoc->peer.active_path;
+                       type = chunk->chunk_hdr->type;
+                       if (type != SCTP_CID_HEARTBEAT &&
+                           type != SCTP_CID_HEARTBEAT_ACK &&
+                           type != SCTP_CID_ASCONF_ACK)
+                               new_transport = ctx->asoc->peer.active_path;
+                       break;
+               default:
+                       break;
                }
+       }
+
+       /* Are we switching transports? Take care of transport locks. */
+       if (new_transport != ctx->transport) {
+               ctx->transport = new_transport;
+               ctx->packet = &ctx->transport->packet;
 
-               /* Are we switching transports?
-                * Take care of transport locks.
+               if (list_empty(&ctx->transport->send_ready))
+                       list_add_tail(&ctx->transport->send_ready,
+                                     &ctx->transport_list);
+
+               sctp_packet_config(ctx->packet,
+                                  ctx->asoc->peer.i.init_tag,
+                                  ctx->asoc->peer.ecn_capable);
+               /* We've switched transports, so apply the
+                * Burst limit to the new transport.
                 */
-               if (new_transport != transport) {
-                       transport = new_transport;
-                       if (list_empty(&transport->send_ready)) {
-                               list_add_tail(&transport->send_ready,
-                                             &transport_list);
-                       }
-                       packet = &transport->packet;
-                       sctp_packet_config(packet, vtag,
-                                          asoc->peer.ecn_capable);
-               }
+               sctp_transport_burst_limited(ctx->transport);
+       }
+}
+
+static void sctp_outq_flush_ctrl(struct sctp_flush_ctx *ctx)
+{
+       struct sctp_chunk *chunk, *tmp;
+       enum sctp_xmit status;
+       int one_packet, error;
+
+       list_for_each_entry_safe(chunk, tmp, &ctx->q->control_chunk_list, list) {
+               one_packet = 0;
+
+               /* RFC 5061, 5.3
+                * F1) This means that until such time as the ASCONF
+                * containing the add is acknowledged, the sender MUST
+                * NOT use the new IP address as a source for ANY SCTP
+                * packet except on carrying an ASCONF Chunk.
+                */
+               if (ctx->asoc->src_out_of_asoc_ok &&
+                   chunk->chunk_hdr->type != SCTP_CID_ASCONF)
+                       continue;
+
+               list_del_init(&chunk->list);
+
+               /* Pick the right transport to use. Should always be true for
+                * the first chunk as we don't have a transport by then.
+                */
+               sctp_outq_select_transport(ctx, chunk);
 
                switch (chunk->chunk_hdr->type) {
-               /*
-                * 6.10 Bundling
+               /* 6.10 Bundling
                 *   ...
                 *   An endpoint MUST NOT bundle INIT, INIT ACK or SHUTDOWN
                 *   COMPLETE with any other chunks.  [Send them immediately.]
@@ -905,20 +919,19 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp)
                case SCTP_CID_INIT:
                case SCTP_CID_INIT_ACK:
                case SCTP_CID_SHUTDOWN_COMPLETE:
-                       sctp_packet_init(&singleton, transport, sport, dport);
-                       sctp_packet_config(&singleton, vtag, 0);
-                       sctp_packet_append_chunk(&singleton, chunk);
-                       error = sctp_packet_transmit(&singleton, gfp);
+                       error = sctp_packet_singleton(ctx->transport, chunk,
+                                                     ctx->gfp);
                        if (error < 0) {
-                               asoc->base.sk->sk_err = -error;
+                               ctx->asoc->base.sk->sk_err = -error;
                                return;
                        }
                        break;
 
                case SCTP_CID_ABORT:
                        if (sctp_test_T_bit(chunk))
-                               packet->vtag = asoc->c.my_vtag;
+                               ctx->packet->vtag = ctx->asoc->c.my_vtag;
                        /* fallthru */
+
                /* The following chunks are "response" chunks, i.e.
                 * they are generated in response to something we
                 * received.  If we are sending these, then we can
@@ -942,27 +955,27 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp)
                case SCTP_CID_FWD_TSN:
                case SCTP_CID_I_FWD_TSN:
                case SCTP_CID_RECONF:
-                       status = sctp_packet_transmit_chunk(packet, chunk,
-                                                           one_packet, gfp);
-                       if (status  != SCTP_XMIT_OK) {
+                       status = sctp_packet_transmit_chunk(ctx->packet, chunk,
+                                                           one_packet, ctx->gfp);
+                       if (status != SCTP_XMIT_OK) {
                                /* put the chunk back */
-                               list_add(&chunk->list, &q->control_chunk_list);
+                               list_add(&chunk->list, &ctx->q->control_chunk_list);
                                break;
                        }
 
-                       asoc->stats.octrlchunks++;
+                       ctx->asoc->stats.octrlchunks++;
                        /* PR-SCTP C5) If a FORWARD TSN is sent, the
                         * sender MUST assure that at least one T3-rtx
                         * timer is running.
                         */
                        if (chunk->chunk_hdr->type == SCTP_CID_FWD_TSN ||
                            chunk->chunk_hdr->type == SCTP_CID_I_FWD_TSN) {
-                               sctp_transport_reset_t3_rtx(transport);
-                               transport->last_time_sent = jiffies;
+                               sctp_transport_reset_t3_rtx(ctx->transport);
+                               ctx->transport->last_time_sent = jiffies;
                        }
 
-                       if (chunk == asoc->strreset_chunk)
-                               sctp_transport_reset_reconf_timer(transport);
+                       if (chunk == ctx->asoc->strreset_chunk)
+                               sctp_transport_reset_reconf_timer(ctx->transport);
 
                        break;
 
@@ -971,232 +984,186 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp)
                        BUG();
                }
        }
+}
 
-       if (q->asoc->src_out_of_asoc_ok)
-               goto sctp_flush_out;
+/* Returns false if new data shouldn't be sent */
+static bool sctp_outq_flush_rtx(struct sctp_flush_ctx *ctx,
+                               int rtx_timeout)
+{
+       int error, start_timer = 0;
+
+       if (ctx->asoc->peer.retran_path->state == SCTP_UNCONFIRMED)
+               return false;
+
+       if (ctx->transport != ctx->asoc->peer.retran_path) {
+               /* Switch transports & prepare the packet.  */
+               ctx->transport = ctx->asoc->peer.retran_path;
+               ctx->packet = &ctx->transport->packet;
+
+               if (list_empty(&ctx->transport->send_ready))
+                       list_add_tail(&ctx->transport->send_ready,
+                                     &ctx->transport_list);
+
+               sctp_packet_config(ctx->packet, ctx->asoc->peer.i.init_tag,
+                                  ctx->asoc->peer.ecn_capable);
+       }
+
+       error = __sctp_outq_flush_rtx(ctx->q, ctx->packet, rtx_timeout,
+                                     &start_timer, ctx->gfp);
+       if (error < 0)
+               ctx->asoc->base.sk->sk_err = -error;
+
+       if (start_timer) {
+               sctp_transport_reset_t3_rtx(ctx->transport);
+               ctx->transport->last_time_sent = jiffies;
+       }
+
+       /* This can happen on COOKIE-ECHO resend.  Only
+        * one chunk can get bundled with a COOKIE-ECHO.
+        */
+       if (ctx->packet->has_cookie_echo)
+               return false;
+
+       /* Don't send new data if there is still data
+        * waiting to retransmit.
+        */
+       if (!list_empty(&ctx->q->retransmit))
+               return false;
+
+       return true;
+}
+
+static void sctp_outq_flush_data(struct sctp_flush_ctx *ctx,
+                                int rtx_timeout)
+{
+       struct sctp_chunk *chunk;
+       enum sctp_xmit status;
 
        /* Is it OK to send data chunks?  */
-       switch (asoc->state) {
+       switch (ctx->asoc->state) {
        case SCTP_STATE_COOKIE_ECHOED:
                /* Only allow bundling when this packet has a COOKIE-ECHO
                 * chunk.
                 */
-               if (!packet || !packet->has_cookie_echo)
-                       break;
+               if (!ctx->packet || !ctx->packet->has_cookie_echo)
+                       return;
 
                /* fallthru */
        case SCTP_STATE_ESTABLISHED:
        case SCTP_STATE_SHUTDOWN_PENDING:
        case SCTP_STATE_SHUTDOWN_RECEIVED:
-               /*
-                * RFC 2960 6.1  Transmission of DATA Chunks
-                *
-                * C) When the time comes for the sender to transmit,
-                * before sending new DATA chunks, the sender MUST
-                * first transmit any outstanding DATA chunks which
-                * are marked for retransmission (limited by the
-                * current cwnd).
-                */
-               if (!list_empty(&q->retransmit)) {
-                       if (asoc->peer.retran_path->state == SCTP_UNCONFIRMED)
-                               goto sctp_flush_out;
-                       if (transport == asoc->peer.retran_path)
-                               goto retran;
-
-                       /* Switch transports & prepare the packet.  */
-
-                       transport = asoc->peer.retran_path;
+               break;
 
-                       if (list_empty(&transport->send_ready)) {
-                               list_add_tail(&transport->send_ready,
-                                             &transport_list);
-                       }
+       default:
+               /* Do nothing. */
+               return;
+       }
 
-                       packet = &transport->packet;
-                       sctp_packet_config(packet, vtag,
-                                          asoc->peer.ecn_capable);
-               retran:
-                       error = sctp_outq_flush_rtx(q, packet,
-                                                   rtx_timeout, &start_timer);
-                       if (error < 0)
-                               asoc->base.sk->sk_err = -error;
+       /* RFC 2960 6.1  Transmission of DATA Chunks
+        *
+        * C) When the time comes for the sender to transmit,
+        * before sending new DATA chunks, the sender MUST
+        * first transmit any outstanding DATA chunks which
+        * are marked for retransmission (limited by the
+        * current cwnd).
+        */
+       if (!list_empty(&ctx->q->retransmit) &&
+           !sctp_outq_flush_rtx(ctx, rtx_timeout))
+               return;
 
-                       if (start_timer) {
-                               sctp_transport_reset_t3_rtx(transport);
-                               transport->last_time_sent = jiffies;
-                       }
+       /* Apply Max.Burst limitation to the current transport in
+        * case it will be used for new data.  We are going to
+        * rest it before we return, but we want to apply the limit
+        * to the currently queued data.
+        */
+       if (ctx->transport)
+               sctp_transport_burst_limited(ctx->transport);
 
-                       /* This can happen on COOKIE-ECHO resend.  Only
-                        * one chunk can get bundled with a COOKIE-ECHO.
-                        */
-                       if (packet->has_cookie_echo)
-                               goto sctp_flush_out;
+       /* Finally, transmit new packets.  */
+       while ((chunk = sctp_outq_dequeue_data(ctx->q)) != NULL) {
+               __u32 sid = ntohs(chunk->subh.data_hdr->stream);
 
-                       /* Don't send new data if there is still data
-                        * waiting to retransmit.
-                        */
-                       if (!list_empty(&q->retransmit))
-                               goto sctp_flush_out;
+               /* Has this chunk expired? */
+               if (sctp_chunk_abandoned(chunk)) {
+                       sctp_sched_dequeue_done(ctx->q, chunk);
+                       sctp_chunk_fail(chunk, 0);
+                       sctp_chunk_free(chunk);
+                       continue;
                }
 
-               /* Apply Max.Burst limitation to the current transport in
-                * case it will be used for new data.  We are going to
-                * rest it before we return, but we want to apply the limit
-                * to the currently queued data.
-                */
-               if (transport)
-                       sctp_transport_burst_limited(transport);
-
-               /* Finally, transmit new packets.  */
-               while ((chunk = sctp_outq_dequeue_data(q)) != NULL) {
-                       __u32 sid = ntohs(chunk->subh.data_hdr->stream);
-
-                       /* Has this chunk expired? */
-                       if (sctp_chunk_abandoned(chunk)) {
-                               sctp_sched_dequeue_done(q, chunk);
-                               sctp_chunk_fail(chunk, 0);
-                               sctp_chunk_free(chunk);
-                               continue;
-                       }
+               if (ctx->asoc->stream.out[sid].state == SCTP_STREAM_CLOSED) {
+                       sctp_outq_head_data(ctx->q, chunk);
+                       break;
+               }
 
-                       if (asoc->stream.out[sid].state == SCTP_STREAM_CLOSED) {
-                               sctp_outq_head_data(q, chunk);
-                               goto sctp_flush_out;
-                       }
+               sctp_outq_select_transport(ctx, chunk);
 
-                       /* If there is a specified transport, use it.
-                        * Otherwise, we want to use the active path.
+               pr_debug("%s: outq:%p, chunk:%p[%s], tx-tsn:0x%x skb->head:%p skb->users:%d\n",
+                        __func__, ctx->q, chunk, chunk && chunk->chunk_hdr ?
+                        sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)) :
+                        "illegal chunk", ntohl(chunk->subh.data_hdr->tsn),
+                        chunk->skb ? chunk->skb->head : NULL, chunk->skb ?
+                        refcount_read(&chunk->skb->users) : -1);
+
+               /* Add the chunk to the packet.  */
+               status = sctp_packet_transmit_chunk(ctx->packet, chunk, 0,
+                                                   ctx->gfp);
+               if (status != SCTP_XMIT_OK) {
+                       /* We could not append this chunk, so put
+                        * the chunk back on the output queue.
                         */
-                       new_transport = chunk->transport;
-                       if (!new_transport ||
-                           ((new_transport->state == SCTP_INACTIVE) ||
-                            (new_transport->state == SCTP_UNCONFIRMED) ||
-                            (new_transport->state == SCTP_PF)))
-                               new_transport = asoc->peer.active_path;
-                       if (new_transport->state == SCTP_UNCONFIRMED) {
-                               WARN_ONCE(1, "Attempt to send packet on unconfirmed path.");
-                               sctp_sched_dequeue_done(q, chunk);
-                               sctp_chunk_fail(chunk, 0);
-                               sctp_chunk_free(chunk);
-                               continue;
-                       }
-
-                       /* Change packets if necessary.  */
-                       if (new_transport != transport) {
-                               transport = new_transport;
+                       pr_debug("%s: could not transmit tsn:0x%x, status:%d\n",
+                                __func__, ntohl(chunk->subh.data_hdr->tsn),
+                                status);
 
-                               /* Schedule to have this transport's
-                                * packet flushed.
-                                */
-                               if (list_empty(&transport->send_ready)) {
-                                       list_add_tail(&transport->send_ready,
-                                                     &transport_list);
-                               }
-
-                               packet = &transport->packet;
-                               sctp_packet_config(packet, vtag,
-                                                  asoc->peer.ecn_capable);
-                               /* We've switched transports, so apply the
-                                * Burst limit to the new transport.
-                                */
-                               sctp_transport_burst_limited(transport);
-                       }
-
-                       pr_debug("%s: outq:%p, chunk:%p[%s], tx-tsn:0x%x skb->head:%p "
-                                "skb->users:%d\n",
-                                __func__, q, chunk, chunk && chunk->chunk_hdr ?
-                                sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)) :
-                                "illegal chunk", ntohl(chunk->subh.data_hdr->tsn),
-                                chunk->skb ? chunk->skb->head : NULL, chunk->skb ?
-                                refcount_read(&chunk->skb->users) : -1);
-
-                       /* Add the chunk to the packet.  */
-                       status = sctp_packet_transmit_chunk(packet, chunk, 0, gfp);
-
-                       switch (status) {
-                       case SCTP_XMIT_PMTU_FULL:
-                       case SCTP_XMIT_RWND_FULL:
-                       case SCTP_XMIT_DELAY:
-                               /* We could not append this chunk, so put
-                                * the chunk back on the output queue.
-                                */
-                               pr_debug("%s: could not transmit tsn:0x%x, status:%d\n",
-                                        __func__, ntohl(chunk->subh.data_hdr->tsn),
-                                        status);
-
-                               sctp_outq_head_data(q, chunk);
-                               goto sctp_flush_out;
-
-                       case SCTP_XMIT_OK:
-                               /* The sender is in the SHUTDOWN-PENDING state,
-                                * The sender MAY set the I-bit in the DATA
-                                * chunk header.
-                                */
-                               if (asoc->state == SCTP_STATE_SHUTDOWN_PENDING)
-                                       chunk->chunk_hdr->flags |= SCTP_DATA_SACK_IMM;
-                               if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED)
-                                       asoc->stats.ouodchunks++;
-                               else
-                                       asoc->stats.oodchunks++;
-
-                               /* Only now it's safe to consider this
-                                * chunk as sent, sched-wise.
-                                */
-                               sctp_sched_dequeue_done(q, chunk);
-
-                               break;
+                       sctp_outq_head_data(ctx->q, chunk);
+                       break;
+               }
 
-                       default:
-                               BUG();
-                       }
+               /* The sender is in the SHUTDOWN-PENDING state,
+                * The sender MAY set the I-bit in the DATA
+                * chunk header.
+                */
+               if (ctx->asoc->state == SCTP_STATE_SHUTDOWN_PENDING)
+                       chunk->chunk_hdr->flags |= SCTP_DATA_SACK_IMM;
+               if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED)
+                       ctx->asoc->stats.ouodchunks++;
+               else
+                       ctx->asoc->stats.oodchunks++;
 
-                       /* BUG: We assume that the sctp_packet_transmit()
-                        * call below will succeed all the time and add the
-                        * chunk to the transmitted list and restart the
-                        * timers.
-                        * It is possible that the call can fail under OOM
-                        * conditions.
-                        *
-                        * Is this really a problem?  Won't this behave
-                        * like a lost TSN?
-                        */
-                       list_add_tail(&chunk->transmitted_list,
-                                     &transport->transmitted);
+               /* Only now it's safe to consider this
+                * chunk as sent, sched-wise.
+                */
+               sctp_sched_dequeue_done(ctx->q, chunk);
 
-                       sctp_transport_reset_t3_rtx(transport);
-                       transport->last_time_sent = jiffies;
+               list_add_tail(&chunk->transmitted_list,
+                             &ctx->transport->transmitted);
 
-                       /* Only let one DATA chunk get bundled with a
-                        * COOKIE-ECHO chunk.
-                        */
-                       if (packet->has_cookie_echo)
-                               goto sctp_flush_out;
-               }
-               break;
+               sctp_transport_reset_t3_rtx(ctx->transport);
+               ctx->transport->last_time_sent = jiffies;
 
-       default:
-               /* Do nothing.  */
-               break;
+               /* Only let one DATA chunk get bundled with a
+                * COOKIE-ECHO chunk.
+                */
+               if (ctx->packet->has_cookie_echo)
+                       break;
        }
+}
 
-sctp_flush_out:
+static void sctp_outq_flush_transports(struct sctp_flush_ctx *ctx)
+{
+       struct list_head *ltransport;
+       struct sctp_packet *packet;
+       struct sctp_transport *t;
+       int error = 0;
 
-       /* Before returning, examine all the transports touched in
-        * this call.  Right now, we bluntly force clear all the
-        * transports.  Things might change after we implement Nagle.
-        * But such an examination is still required.
-        *
-        * --xguo
-        */
-       while ((ltransport = sctp_list_dequeue(&transport_list)) != NULL) {
-               struct sctp_transport *t = list_entry(ltransport,
-                                                     struct sctp_transport,
-                                                     send_ready);
+       while ((ltransport = sctp_list_dequeue(&ctx->transport_list)) != NULL) {
+               t = list_entry(ltransport, struct sctp_transport, send_ready);
                packet = &t->packet;
                if (!sctp_packet_empty(packet)) {
-                       error = sctp_packet_transmit(packet, gfp);
+                       error = sctp_packet_transmit(packet, ctx->gfp);
                        if (error < 0)
-                               asoc->base.sk->sk_err = -error;
+                               ctx->q->asoc->base.sk->sk_err = -error;
                }
 
                /* Clear the burst limited state, if any */
@@ -1204,6 +1171,47 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp)
        }
 }
 
+/* Try to flush an outqueue.
+ *
+ * Description: Send everything in q which we legally can, subject to
+ * congestion limitations.
+ * * Note: This function can be called from multiple contexts so appropriate
+ * locking concerns must be made.  Today we use the sock lock to protect
+ * this function.
+ */
+
+static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp)
+{
+       struct sctp_flush_ctx ctx = {
+               .q = q,
+               .transport = NULL,
+               .transport_list = LIST_HEAD_INIT(ctx.transport_list),
+               .asoc = q->asoc,
+               .packet = NULL,
+               .gfp = gfp,
+       };
+
+       /* 6.10 Bundling
+        *   ...
+        *   When bundling control chunks with DATA chunks, an
+        *   endpoint MUST place control chunks first in the outbound
+        *   SCTP packet.  The transmitter MUST transmit DATA chunks
+        *   within a SCTP packet in increasing order of TSN.
+        *   ...
+        */
+
+       sctp_outq_flush_ctrl(&ctx);
+
+       if (q->asoc->src_out_of_asoc_ok)
+               goto sctp_flush_out;
+
+       sctp_outq_flush_data(&ctx, rtx_timeout);
+
+sctp_flush_out:
+
+       sctp_outq_flush_transports(&ctx);
+}
+
 /* Update unack_data based on the incoming SACK chunk */
 static void sctp_sack_update_unack_data(struct sctp_association *assoc,
                                        struct sctp_sackhdr *sack)
@@ -1457,7 +1465,7 @@ static void sctp_check_transmitted(struct sctp_outq *q,
                         * the outstanding bytes for this chunk, so only
                         * count bytes associated with a transport.
                         */
-                       if (transport) {
+                       if (transport && !tchunk->tsn_gap_acked) {
                                /* If this chunk is being used for RTT
                                 * measurement, calculate the RTT and update
                                 * the RTO using this value.
@@ -1469,14 +1477,34 @@ static void sctp_check_transmitted(struct sctp_outq *q,
                                 * first instance of the packet or a later
                                 * instance).
                                 */
-                               if (!tchunk->tsn_gap_acked &&
-                                   !sctp_chunk_retransmitted(tchunk) &&
+                               if (!sctp_chunk_retransmitted(tchunk) &&
                                    tchunk->rtt_in_progress) {
                                        tchunk->rtt_in_progress = 0;
                                        rtt = jiffies - tchunk->sent_at;
                                        sctp_transport_update_rto(transport,
                                                                  rtt);
                                }
+
+                               if (TSN_lte(tsn, sack_ctsn)) {
+                                       /*
+                                        * SFR-CACC algorithm:
+                                        * 2) If the SACK contains gap acks
+                                        * and the flag CHANGEOVER_ACTIVE is
+                                        * set the receiver of the SACK MUST
+                                        * take the following action:
+                                        *
+                                        * B) For each TSN t being acked that
+                                        * has not been acked in any SACK so
+                                        * far, set cacc_saw_newack to 1 for
+                                        * the destination that the TSN was
+                                        * sent to.
+                                        */
+                                       if (sack->num_gap_ack_blocks &&
+                                           q->asoc->peer.primary_path->cacc.
+                                           changeover_active)
+                                               transport->cacc.cacc_saw_newack
+                                                       = 1;
+                               }
                        }
 
                        /* If the chunk hasn't been marked as ACKED,
@@ -1508,28 +1536,6 @@ static void sctp_check_transmitted(struct sctp_outq *q,
                                restart_timer = 1;
                                forward_progress = true;
 
-                               if (!tchunk->tsn_gap_acked) {
-                                       /*
-                                        * SFR-CACC algorithm:
-                                        * 2) If the SACK contains gap acks
-                                        * and the flag CHANGEOVER_ACTIVE is
-                                        * set the receiver of the SACK MUST
-                                        * take the following action:
-                                        *
-                                        * B) For each TSN t being acked that
-                                        * has not been acked in any SACK so
-                                        * far, set cacc_saw_newack to 1 for
-                                        * the destination that the TSN was
-                                        * sent to.
-                                        */
-                                       if (transport &&
-                                           sack->num_gap_ack_blocks &&
-                                           q->asoc->peer.primary_path->cacc.
-                                           changeover_active)
-                                               transport->cacc.cacc_saw_newack
-                                                       = 1;
-                               }
-
                                list_add_tail(&tchunk->transmitted_list,
                                              &q->sacked);
                        } else {
@@ -1756,7 +1762,7 @@ static int sctp_acked(struct sctp_sackhdr *sack, __u32 tsn)
        if (TSN_lte(tsn, ctsn))
                goto pass;
 
-       /* 3.3.4 Selective Acknowledgement (SACK) (3):
+       /* 3.3.4 Selective Acknowledgment (SACK) (3):
         *
         * Gap Ack Blocks:
         *  These fields contain the Gap Ack Blocks. They are repeated
index 5a4fb1dc8400a0316177ce65be8126857297eb5e..4a4fd19712552b9ac3429897cf9f78e65db6214d 100644 (file)
@@ -81,8 +81,6 @@ static int sctp_process_param(struct sctp_association *asoc,
                              gfp_t gfp);
 static void *sctp_addto_param(struct sctp_chunk *chunk, int len,
                              const void *data);
-static void  *sctp_addto_chunk_fixed(struct sctp_chunk *, int len,
-                                    const void *data);
 
 /* Control chunk destructor */
 static void sctp_control_release_owner(struct sk_buff *skb)
@@ -154,12 +152,11 @@ static const struct sctp_paramhdr prsctp_param = {
        cpu_to_be16(sizeof(struct sctp_paramhdr)),
 };
 
-/* A helper to initialize an op error inside a
- * provided chunk, as most cause codes will be embedded inside an
- * abort chunk.
+/* A helper to initialize an op error inside a provided chunk, as most
+ * cause codes will be embedded inside an abort chunk.
  */
-void sctp_init_cause(struct sctp_chunk *chunk, __be16 cause_code,
-                    size_t paylen)
+int sctp_init_cause(struct sctp_chunk *chunk, __be16 cause_code,
+                   size_t paylen)
 {
        struct sctp_errhdr err;
        __u16 len;
@@ -167,33 +164,16 @@ void sctp_init_cause(struct sctp_chunk *chunk, __be16 cause_code,
        /* Cause code constants are now defined in network order.  */
        err.cause = cause_code;
        len = sizeof(err) + paylen;
-       err.length  = htons(len);
-       chunk->subh.err_hdr = sctp_addto_chunk(chunk, sizeof(err), &err);
-}
-
-/* A helper to initialize an op error inside a
- * provided chunk, as most cause codes will be embedded inside an
- * abort chunk.  Differs from sctp_init_cause in that it won't oops
- * if there isn't enough space in the op error chunk
- */
-static int sctp_init_cause_fixed(struct sctp_chunk *chunk, __be16 cause_code,
-                                size_t paylen)
-{
-       struct sctp_errhdr err;
-       __u16 len;
-
-       /* Cause code constants are now defined in network order.  */
-       err.cause = cause_code;
-       len = sizeof(err) + paylen;
-       err.length  = htons(len);
+       err.length = htons(len);
 
        if (skb_tailroom(chunk->skb) < len)
                return -ENOSPC;
 
-       chunk->subh.err_hdr = sctp_addto_chunk_fixed(chunk, sizeof(err), &err);
+       chunk->subh.err_hdr = sctp_addto_chunk(chunk, sizeof(err), &err);
 
        return 0;
 }
+
 /* 3.3.2 Initiation (INIT) (1)
  *
  * This chunk is used to initiate a SCTP association between two
@@ -779,10 +759,9 @@ struct sctp_chunk *sctp_make_datafrag_empty(const struct sctp_association *asoc,
  * association.  This reports on which TSN's we've seen to date,
  * including duplicates and gaps.
  */
-struct sctp_chunk *sctp_make_sack(const struct sctp_association *asoc)
+struct sctp_chunk *sctp_make_sack(struct sctp_association *asoc)
 {
        struct sctp_tsnmap *map = (struct sctp_tsnmap *)&asoc->peer.tsn_map;
-       struct sctp_association *aptr = (struct sctp_association *)asoc;
        struct sctp_gap_ack_block gabs[SCTP_MAX_GABS];
        __u16 num_gabs, num_dup_tsns;
        struct sctp_transport *trans;
@@ -857,7 +836,7 @@ struct sctp_chunk *sctp_make_sack(const struct sctp_association *asoc)
 
        /* Add the duplicate TSN information.  */
        if (num_dup_tsns) {
-               aptr->stats.idupchunks += num_dup_tsns;
+               asoc->stats.idupchunks += num_dup_tsns;
                sctp_addto_chunk(retval, sizeof(__u32) * num_dup_tsns,
                                 sctp_tsnmap_get_dups(map));
        }
@@ -869,11 +848,11 @@ struct sctp_chunk *sctp_make_sack(const struct sctp_association *asoc)
         * association so no transport will match after a wrap event like this,
         * Until the next sack
         */
-       if (++aptr->peer.sack_generation == 0) {
+       if (++asoc->peer.sack_generation == 0) {
                list_for_each_entry(trans, &asoc->peer.transport_addr_list,
                                    transports)
                        trans->sack_generation = 0;
-               aptr->peer.sack_generation = 1;
+               asoc->peer.sack_generation = 1;
        }
 nodata:
        return retval;
@@ -1152,7 +1131,7 @@ struct sctp_chunk *sctp_make_violation_max_retrans(
                                        const struct sctp_association *asoc,
                                        const struct sctp_chunk *chunk)
 {
-       static const char error[] = "Association exceeded its max_retans count";
+       static const char error[] = "Association exceeded its max_retrans count";
        size_t payload_len = sizeof(error) + sizeof(struct sctp_errhdr);
        struct sctp_chunk *retval;
 
@@ -1258,20 +1237,26 @@ static struct sctp_chunk *sctp_make_op_error_space(
        return retval;
 }
 
-/* Create an Operation Error chunk of a fixed size,
- * specifically, max(asoc->pathmtu, SCTP_DEFAULT_MAXSEGMENT)
- * This is a helper function to allocate an error chunk for
- * for those invalid parameter codes in which we may not want
- * to report all the errors, if the incoming chunk is large
+/* Create an Operation Error chunk of a fixed size, specifically,
+ * min(asoc->pathmtu, SCTP_DEFAULT_MAXSEGMENT) - overheads.
+ * This is a helper function to allocate an error chunk for for those
+ * invalid parameter codes in which we may not want to report all the
+ * errors, if the incoming chunk is large. If it can't fit in a single
+ * packet, we ignore it.
  */
-static inline struct sctp_chunk *sctp_make_op_error_fixed(
+static inline struct sctp_chunk *sctp_make_op_error_limited(
                                        const struct sctp_association *asoc,
                                        const struct sctp_chunk *chunk)
 {
-       size_t size = asoc ? asoc->pathmtu : 0;
+       size_t size = SCTP_DEFAULT_MAXSEGMENT;
+       struct sctp_sock *sp = NULL;
 
-       if (!size)
-               size = SCTP_DEFAULT_MAXSEGMENT;
+       if (asoc) {
+               size = min_t(size_t, size, asoc->pathmtu);
+               sp = sctp_sk(asoc->base.sk);
+       }
+
+       size = sctp_mtu_payload(sp, size, sizeof(struct sctp_errhdr));
 
        return sctp_make_op_error_space(asoc, chunk, size);
 }
@@ -1523,18 +1508,6 @@ void *sctp_addto_chunk(struct sctp_chunk *chunk, int len, const void *data)
        return target;
 }
 
-/* Append bytes to the end of a chunk. Returns NULL if there isn't sufficient
- * space in the chunk
- */
-static void *sctp_addto_chunk_fixed(struct sctp_chunk *chunk,
-                                   int len, const void *data)
-{
-       if (skb_tailroom(chunk->skb) >= len)
-               return sctp_addto_chunk(chunk, len, data);
-       else
-               return NULL;
-}
-
 /* Append bytes from user space to the end of a chunk.  Will panic if
  * chunk is not big enough.
  * Returns a kernel err value.
@@ -1829,6 +1802,9 @@ struct sctp_association *sctp_unpack_cookie(
                kt = ktime_get_real();
 
        if (!asoc && ktime_before(bear_cookie->expiration, kt)) {
+               suseconds_t usecs = ktime_to_us(ktime_sub(kt, bear_cookie->expiration));
+               __be32 n = htonl(usecs);
+
                /*
                 * Section 3.3.10.3 Stale Cookie Error (3)
                 *
@@ -1837,17 +1813,12 @@ struct sctp_association *sctp_unpack_cookie(
                 * Stale Cookie Error:  Indicates the receipt of a valid State
                 * Cookie that has expired.
                 */
-               len = ntohs(chunk->chunk_hdr->length);
-               *errp = sctp_make_op_error_space(asoc, chunk, len);
-               if (*errp) {
-                       suseconds_t usecs = ktime_to_us(ktime_sub(kt, bear_cookie->expiration));
-                       __be32 n = htonl(usecs);
-
-                       sctp_init_cause(*errp, SCTP_ERROR_STALE_COOKIE,
-                                       sizeof(n));
-                       sctp_addto_chunk(*errp, sizeof(n), &n);
+               *errp = sctp_make_op_error(asoc, chunk,
+                                          SCTP_ERROR_STALE_COOKIE, &n,
+                                          sizeof(n), 0);
+               if (*errp)
                        *error = -SCTP_IERROR_STALE_COOKIE;
-               else
+               else
                        *error = -SCTP_IERROR_NOMEM;
 
                goto fail;
@@ -1998,12 +1969,8 @@ static int sctp_process_hn_param(const struct sctp_association *asoc,
        if (*errp)
                sctp_chunk_free(*errp);
 
-       *errp = sctp_make_op_error_space(asoc, chunk, len);
-
-       if (*errp) {
-               sctp_init_cause(*errp, SCTP_ERROR_DNS_FAILED, len);
-               sctp_addto_chunk(*errp, len, param.v);
-       }
+       *errp = sctp_make_op_error(asoc, chunk, SCTP_ERROR_DNS_FAILED,
+                                  param.v, len, 0);
 
        /* Stop processing this chunk. */
        return 0;
@@ -2128,23 +2095,23 @@ static enum sctp_ierror sctp_process_unk_param(
                /* Make an ERROR chunk, preparing enough room for
                 * returning multiple unknown parameters.
                 */
-               if (NULL == *errp)
-                       *errp = sctp_make_op_error_fixed(asoc, chunk);
-
-               if (*errp) {
-                       if (!sctp_init_cause_fixed(*errp, SCTP_ERROR_UNKNOWN_PARAM,
-                                       SCTP_PAD4(ntohs(param.p->length))))
-                               sctp_addto_chunk_fixed(*errp,
-                                               SCTP_PAD4(ntohs(param.p->length)),
-                                               param.v);
-               } else {
-                       /* If there is no memory for generating the ERROR
-                        * report as specified, an ABORT will be triggered
-                        * to the peer and the association won't be
-                        * established.
-                        */
-                       retval = SCTP_IERROR_NOMEM;
+               if (!*errp) {
+                       *errp = sctp_make_op_error_limited(asoc, chunk);
+                       if (!*errp) {
+                               /* If there is no memory for generating the
+                                * ERROR report as specified, an ABORT will be
+                                * triggered to the peer and the association
+                                * won't be established.
+                                */
+                               retval = SCTP_IERROR_NOMEM;
+                               break;
+                       }
                }
+
+               if (!sctp_init_cause(*errp, SCTP_ERROR_UNKNOWN_PARAM,
+                                    ntohs(param.p->length)))
+                       sctp_addto_chunk(*errp, ntohs(param.p->length),
+                                        param.v);
                break;
        default:
                break;
@@ -2220,10 +2187,10 @@ static enum sctp_ierror sctp_verify_param(struct net *net,
                 * MUST be aborted.  The ABORT chunk SHOULD contain the error
                 * cause 'Protocol Violation'.
                 */
-               if (SCTP_AUTH_RANDOM_LENGTH !=
-                       ntohs(param.p->length) - sizeof(struct sctp_paramhdr)) {
+               if (SCTP_AUTH_RANDOM_LENGTH != ntohs(param.p->length) -
+                                              sizeof(struct sctp_paramhdr)) {
                        sctp_process_inv_paramlength(asoc, param.p,
-                                                       chunk, err_chunk);
+                                                    chunk, err_chunk);
                        retval = SCTP_IERROR_ABORT;
                }
                break;
index dd0594a109610eac4b92c4932e9d81299d0d957e..c9ae3404b1bb11572e34255cb3eae86ca1dd8131 100644 (file)
@@ -153,10 +153,7 @@ static enum sctp_disposition sctp_sf_violation_chunk(
                                        struct sctp_cmd_seq *commands);
 
 static enum sctp_ierror sctp_sf_authenticate(
-                                       struct net *net,
-                                       const struct sctp_endpoint *ep,
                                        const struct sctp_association *asoc,
-                                       const union sctp_subtype type,
                                        struct sctp_chunk *chunk);
 
 static enum sctp_disposition __sctp_sf_do_9_1_abort(
@@ -626,6 +623,38 @@ enum sctp_disposition sctp_sf_do_5_1C_ack(struct net *net,
        return SCTP_DISPOSITION_CONSUME;
 }
 
+static bool sctp_auth_chunk_verify(struct net *net, struct sctp_chunk *chunk,
+                                  const struct sctp_association *asoc)
+{
+       struct sctp_chunk auth;
+
+       if (!chunk->auth_chunk)
+               return true;
+
+       /* SCTP-AUTH:  auth_chunk pointer is only set when the cookie-echo
+        * is supposed to be authenticated and we have to do delayed
+        * authentication.  We've just recreated the association using
+        * the information in the cookie and now it's much easier to
+        * do the authentication.
+        */
+
+       /* Make sure that we and the peer are AUTH capable */
+       if (!net->sctp.auth_enable || !asoc->peer.auth_capable)
+               return false;
+
+       /* set-up our fake chunk so that we can process it */
+       auth.skb = chunk->auth_chunk;
+       auth.asoc = chunk->asoc;
+       auth.sctp_hdr = chunk->sctp_hdr;
+       auth.chunk_hdr = (struct sctp_chunkhdr *)
+                               skb_push(chunk->auth_chunk,
+                                        sizeof(struct sctp_chunkhdr));
+       skb_pull(chunk->auth_chunk, sizeof(struct sctp_chunkhdr));
+       auth.transport = chunk->transport;
+
+       return sctp_sf_authenticate(asoc, &auth) == SCTP_IERROR_NO_ERROR;
+}
+
 /*
  * Respond to a normal COOKIE ECHO chunk.
  * We are the side that is being asked for an association.
@@ -763,37 +792,9 @@ enum sctp_disposition sctp_sf_do_5_1D_ce(struct net *net,
        if (error)
                goto nomem_init;
 
-       /* SCTP-AUTH:  auth_chunk pointer is only set when the cookie-echo
-        * is supposed to be authenticated and we have to do delayed
-        * authentication.  We've just recreated the association using
-        * the information in the cookie and now it's much easier to
-        * do the authentication.
-        */
-       if (chunk->auth_chunk) {
-               struct sctp_chunk auth;
-               enum sctp_ierror ret;
-
-               /* Make sure that we and the peer are AUTH capable */
-               if (!net->sctp.auth_enable || !new_asoc->peer.auth_capable) {
-                       sctp_association_free(new_asoc);
-                       return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
-               }
-
-               /* set-up our fake chunk so that we can process it */
-               auth.skb = chunk->auth_chunk;
-               auth.asoc = chunk->asoc;
-               auth.sctp_hdr = chunk->sctp_hdr;
-               auth.chunk_hdr = (struct sctp_chunkhdr *)
-                                       skb_push(chunk->auth_chunk,
-                                                sizeof(struct sctp_chunkhdr));
-               skb_pull(chunk->auth_chunk, sizeof(struct sctp_chunkhdr));
-               auth.transport = chunk->transport;
-
-               ret = sctp_sf_authenticate(net, ep, new_asoc, type, &auth);
-               if (ret != SCTP_IERROR_NO_ERROR) {
-                       sctp_association_free(new_asoc);
-                       return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
-               }
+       if (!sctp_auth_chunk_verify(net, chunk, new_asoc)) {
+               sctp_association_free(new_asoc);
+               return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
        }
 
        repl = sctp_make_cookie_ack(new_asoc, chunk);
@@ -1794,13 +1795,18 @@ static enum sctp_disposition sctp_sf_do_dupcook_a(
                               GFP_ATOMIC))
                goto nomem;
 
+       if (sctp_auth_asoc_init_active_key(new_asoc, GFP_ATOMIC))
+               goto nomem;
+
+       if (!sctp_auth_chunk_verify(net, chunk, new_asoc))
+               return SCTP_DISPOSITION_DISCARD;
+
        /* Make sure no new addresses are being added during the
         * restart.  Though this is a pretty complicated attack
         * since you'd have to get inside the cookie.
         */
-       if (!sctp_sf_check_restart_addrs(new_asoc, asoc, chunk, commands)) {
+       if (!sctp_sf_check_restart_addrs(new_asoc, asoc, chunk, commands))
                return SCTP_DISPOSITION_CONSUME;
-       }
 
        /* If the endpoint is in the SHUTDOWN-ACK-SENT state and recognizes
         * the peer has restarted (Action A), it MUST NOT setup a new
@@ -1906,6 +1912,12 @@ static enum sctp_disposition sctp_sf_do_dupcook_b(
                               GFP_ATOMIC))
                goto nomem;
 
+       if (sctp_auth_asoc_init_active_key(new_asoc, GFP_ATOMIC))
+               goto nomem;
+
+       if (!sctp_auth_chunk_verify(net, chunk, new_asoc))
+               return SCTP_DISPOSITION_DISCARD;
+
        /* Update the content of current association.  */
        sctp_add_cmd_sf(commands, SCTP_CMD_UPDATE_ASSOC, SCTP_ASOC(new_asoc));
        sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
@@ -2003,6 +2015,9 @@ static enum sctp_disposition sctp_sf_do_dupcook_d(
         * a COOKIE ACK.
         */
 
+       if (!sctp_auth_chunk_verify(net, chunk, asoc))
+               return SCTP_DISPOSITION_DISCARD;
+
        /* Don't accidentally move back into established state. */
        if (asoc->state < SCTP_STATE_ESTABLISHED) {
                sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
@@ -2050,7 +2065,7 @@ static enum sctp_disposition sctp_sf_do_dupcook_d(
                }
        }
 
-       repl = sctp_make_cookie_ack(new_asoc, chunk);
+       repl = sctp_make_cookie_ack(asoc, chunk);
        if (!repl)
                goto nomem;
 
@@ -4165,10 +4180,7 @@ enum sctp_disposition sctp_sf_eat_fwd_tsn_fast(
  * The return value is the disposition of the chunk.
  */
 static enum sctp_ierror sctp_sf_authenticate(
-                                       struct net *net,
-                                       const struct sctp_endpoint *ep,
                                        const struct sctp_association *asoc,
-                                       const union sctp_subtype type,
                                        struct sctp_chunk *chunk)
 {
        struct sctp_shared_key *sh_key = NULL;
@@ -4269,7 +4281,7 @@ enum sctp_disposition sctp_sf_eat_auth(struct net *net,
                                                  commands);
 
        auth_hdr = (struct sctp_authhdr *)chunk->skb->data;
-       error = sctp_sf_authenticate(net, ep, asoc, type, chunk);
+       error = sctp_sf_authenticate(asoc, chunk);
        switch (error) {
        case SCTP_IERROR_AUTH_BAD_HMAC:
                /* Generate the ERROR chunk and discard the rest
index 80835ac26d2c3ce6559f75aaaa0b315fb77d9adf..1b4593b842b001903f518e90484c763d9d3698f3 100644 (file)
@@ -644,16 +644,15 @@ static int sctp_send_asconf_add_ip(struct sock            *sk,
 
                        list_for_each_entry(trans,
                            &asoc->peer.transport_addr_list, transports) {
-                               /* Clear the source and route cache */
-                               sctp_transport_dst_release(trans);
                                trans->cwnd = min(4*asoc->pathmtu, max_t(__u32,
                                    2*asoc->pathmtu, 4380));
                                trans->ssthresh = asoc->peer.i.a_rwnd;
                                trans->rto = asoc->rto_initial;
                                sctp_max_rto(asoc, trans);
                                trans->rtt = trans->srtt = trans->rttvar = 0;
+                               /* Clear the source and route cache */
                                sctp_transport_route(trans, NULL,
-                                   sctp_sk(asoc->base.sk));
+                                                    sctp_sk(asoc->base.sk));
                        }
                }
                retval = sctp_send_asconf(asoc, chunk);
@@ -896,7 +895,6 @@ static int sctp_send_asconf_del_ip(struct sock              *sk,
                 */
                list_for_each_entry(transport, &asoc->peer.transport_addr_list,
                                        transports) {
-                       sctp_transport_dst_release(transport);
                        sctp_transport_route(transport, NULL,
                                             sctp_sk(asoc->base.sk));
                }
@@ -1895,6 +1893,7 @@ static int sctp_sendmsg_to_asoc(struct sctp_association *asoc,
                                struct sctp_sndrcvinfo *sinfo)
 {
        struct sock *sk = asoc->base.sk;
+       struct sctp_sock *sp = sctp_sk(sk);
        struct net *net = sock_net(sk);
        struct sctp_datamsg *datamsg;
        bool wait_connect = false;
@@ -1913,13 +1912,16 @@ static int sctp_sendmsg_to_asoc(struct sctp_association *asoc,
                        goto err;
        }
 
-       if (sctp_sk(sk)->disable_fragments && msg_len > asoc->frag_point) {
+       if (sp->disable_fragments && msg_len > asoc->frag_point) {
                err = -EMSGSIZE;
                goto err;
        }
 
-       if (asoc->pmtu_pending)
-               sctp_assoc_pending_pmtu(asoc);
+       if (asoc->pmtu_pending) {
+               if (sp->param_flags & SPP_PMTUD_ENABLE)
+                       sctp_assoc_sync_pmtu(asoc);
+               asoc->pmtu_pending = 0;
+       }
 
        if (sctp_wspace(asoc) < msg_len)
                sctp_prsctp_prune(asoc, sinfo, msg_len - sctp_wspace(asoc));
@@ -1936,7 +1938,7 @@ static int sctp_sendmsg_to_asoc(struct sctp_association *asoc,
                if (err)
                        goto err;
 
-               if (sctp_sk(sk)->strm_interleave) {
+               if (sp->strm_interleave) {
                        timeo = sock_sndtimeo(sk, 0);
                        err = sctp_wait_for_connect(asoc, &timeo);
                        if (err)
@@ -2539,7 +2541,7 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
                        trans->pathmtu = params->spp_pathmtu;
                        sctp_assoc_sync_pmtu(asoc);
                } else if (asoc) {
-                       asoc->pathmtu = params->spp_pathmtu;
+                       sctp_assoc_set_pmtu(asoc, params->spp_pathmtu);
                } else {
                        sp->pathmtu = params->spp_pathmtu;
                }
@@ -3209,7 +3211,6 @@ static int sctp_setsockopt_mappedv4(struct sock *sk, char __user *optval, unsign
 static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned int optlen)
 {
        struct sctp_sock *sp = sctp_sk(sk);
-       struct sctp_af *af = sp->pf->af;
        struct sctp_assoc_value params;
        struct sctp_association *asoc;
        int val;
@@ -3231,30 +3232,24 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned
                return -EINVAL;
        }
 
+       asoc = sctp_id2assoc(sk, params.assoc_id);
+
        if (val) {
                int min_len, max_len;
+               __u16 datasize = asoc ? sctp_datachk_len(&asoc->stream) :
+                                sizeof(struct sctp_data_chunk);
 
-               min_len = SCTP_DEFAULT_MINSEGMENT - af->net_header_len;
-               min_len -= af->ip_options_len(sk);
-               min_len -= sizeof(struct sctphdr) +
-                          sizeof(struct sctp_data_chunk);
-
-               max_len = SCTP_MAX_CHUNK_LEN - sizeof(struct sctp_data_chunk);
+               min_len = sctp_mtu_payload(sp, SCTP_DEFAULT_MINSEGMENT,
+                                          datasize);
+               max_len = SCTP_MAX_CHUNK_LEN - datasize;
 
                if (val < min_len || val > max_len)
                        return -EINVAL;
        }
 
-       asoc = sctp_id2assoc(sk, params.assoc_id);
        if (asoc) {
-               if (val == 0) {
-                       val = asoc->pathmtu - af->net_header_len;
-                       val -= af->ip_options_len(sk);
-                       val -= sizeof(struct sctphdr) +
-                              sctp_datachk_len(&asoc->stream);
-               }
                asoc->user_frag = val;
-               asoc->frag_point = sctp_frag_point(asoc, asoc->pathmtu);
+               sctp_assoc_update_frag_point(asoc);
        } else {
                if (params.assoc_id && sctp_style(sk, UDP))
                        return -EINVAL;
index f799043abec9a48a26ba152cfa3aaa63b7df47ea..f1f1d1b232ba3e75ed28bf60f5eb111c56fa4852 100644 (file)
@@ -240,6 +240,8 @@ void sctp_stream_update(struct sctp_stream *stream, struct sctp_stream *new)
 
        new->out = NULL;
        new->in  = NULL;
+       new->outcnt = 0;
+       new->incnt  = 0;
 }
 
 static int sctp_send_reconf(struct sctp_association *asoc,
index 47f82bd794d915188bad037463c2aa14175a55ef..4a95e260b674b3dffe2ffc4c54cf9a330dfac18c 100644 (file)
@@ -242,9 +242,18 @@ void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk)
                                                &transport->fl, sk);
        }
 
-       if (transport->dst) {
-               transport->pathmtu = SCTP_TRUNC4(dst_mtu(transport->dst));
-       } else
+       if (transport->param_flags & SPP_PMTUD_DISABLE) {
+               struct sctp_association *asoc = transport->asoc;
+
+               if (!transport->pathmtu && asoc && asoc->pathmtu)
+                       transport->pathmtu = asoc->pathmtu;
+               if (transport->pathmtu)
+                       return;
+       }
+
+       if (transport->dst)
+               transport->pathmtu = sctp_dst_mtu(transport->dst);
+       else
                transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT;
 }
 
@@ -290,6 +299,7 @@ void sctp_transport_route(struct sctp_transport *transport,
        struct sctp_association *asoc = transport->asoc;
        struct sctp_af *af = transport->af_specific;
 
+       sctp_transport_dst_release(transport);
        af->get_dst(transport, saddr, &transport->fl, sctp_opt2sk(opt));
 
        if (saddr)
@@ -297,21 +307,14 @@ void sctp_transport_route(struct sctp_transport *transport,
        else
                af->get_saddr(opt, transport, &transport->fl);
 
-       if ((transport->param_flags & SPP_PMTUD_DISABLE) && transport->pathmtu) {
-               return;
-       }
-       if (transport->dst) {
-               transport->pathmtu = SCTP_TRUNC4(dst_mtu(transport->dst));
+       sctp_transport_pmtu(transport, sctp_opt2sk(opt));
 
-               /* Initialize sk->sk_rcv_saddr, if the transport is the
-                * association's active path for getsockname().
-                */
-               if (asoc && (!asoc->peer.primary_path ||
-                               (transport == asoc->peer.active_path)))
-                       opt->pf->to_sk_saddr(&transport->saddr,
-                                            asoc->base.sk);
-       } else
-               transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT;
+       /* Initialize sk->sk_rcv_saddr, if the transport is the
+        * association's active path for getsockname().
+        */
+       if (transport->dst && asoc &&
+           (!asoc->peer.primary_path || transport == asoc->peer.active_path))
+               opt->pf->to_sk_saddr(&transport->saddr, asoc->base.sk);
 }
 
 /* Hold a reference to a transport.  */
index 84207ad33e8e9270d0fa1ddf61a514cc9315656d..8cb7d9858270a617e46e32e988babf86196ef84c 100644 (file)
@@ -715,7 +715,6 @@ struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc,
        return event;
 
 fail_mark:
-       sctp_chunk_put(chunk);
        kfree_skb(skb);
 fail:
        return NULL;
index f5d4b69dbabc888e85245ae5edbefcbd4cd9dc81..48530dab5c9411e40ad983da406bde1179afd6ea 100644 (file)
@@ -29,6 +29,7 @@
 #include <net/sock.h>
 #include <net/tcp.h>
 #include <net/smc.h>
+#include <asm/ioctls.h>
 
 #include "smc.h"
 #include "smc_clc.h"
@@ -45,11 +46,6 @@ static DEFINE_MUTEX(smc_create_lgr_pending); /* serialize link group
                                                 * creation
                                                 */
 
-struct smc_lgr_list smc_lgr_list = {           /* established link groups */
-       .lock = __SPIN_LOCK_UNLOCKED(smc_lgr_list.lock),
-       .list = LIST_HEAD_INIT(smc_lgr_list.list),
-};
-
 static void smc_tcp_listen_work(struct work_struct *);
 
 static void smc_set_keepalive(struct sock *sk, int val)
@@ -192,8 +188,10 @@ static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
        sk->sk_protocol = protocol;
        smc = smc_sk(sk);
        INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
+       INIT_DELAYED_WORK(&smc->conn.tx_work, smc_tx_work);
        INIT_LIST_HEAD(&smc->accept_q);
        spin_lock_init(&smc->accept_q_lock);
+       spin_lock_init(&smc->conn.send_lock);
        sk->sk_prot->hash(sk);
        sk_refcnt_debug_inc(sk);
 
@@ -292,8 +290,28 @@ static void smc_copy_sock_settings_to_smc(struct smc_sock *smc)
        smc_copy_sock_settings(&smc->sk, smc->clcsock->sk, SK_FLAGS_CLC_TO_SMC);
 }
 
+/* register a new rmb, optionally send confirm_rkey msg to register with peer */
+static int smc_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc,
+                      bool conf_rkey)
+{
+       /* register memory region for new rmb */
+       if (smc_wr_reg_send(link, rmb_desc->mr_rx[SMC_SINGLE_LINK])) {
+               rmb_desc->regerr = 1;
+               return -EFAULT;
+       }
+       if (!conf_rkey)
+               return 0;
+       /* exchange confirm_rkey msg with peer */
+       if (smc_llc_do_confirm_rkey(link, rmb_desc)) {
+               rmb_desc->regerr = 1;
+               return -EFAULT;
+       }
+       return 0;
+}
+
 static int smc_clnt_conf_first_link(struct smc_sock *smc)
 {
+       struct net *net = sock_net(smc->clcsock->sk);
        struct smc_link_group *lgr = smc->conn.lgr;
        struct smc_link *link;
        int rest;
@@ -321,9 +339,7 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc)
 
        smc_wr_remember_qp_attr(link);
 
-       rc = smc_wr_reg_send(link,
-                            smc->conn.rmb_desc->mr_rx[SMC_SINGLE_LINK]);
-       if (rc)
+       if (smc_reg_rmb(link, smc->conn.rmb_desc, false))
                return SMC_CLC_DECL_INTERR;
 
        /* send CONFIRM LINK response over RoCE fabric */
@@ -353,7 +369,7 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc)
        if (rc < 0)
                return SMC_CLC_DECL_TCL;
 
-       link->state = SMC_LNK_ACTIVE;
+       smc_llc_link_active(link, net->ipv4.sysctl_tcp_keepalive_time);
 
        return 0;
 }
@@ -361,10 +377,13 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc)
 static void smc_conn_save_peer_info(struct smc_sock *smc,
                                    struct smc_clc_msg_accept_confirm *clc)
 {
-       smc->conn.peer_conn_idx = clc->conn_idx;
+       int bufsize = smc_uncompress_bufsize(clc->rmbe_size);
+
+       smc->conn.peer_rmbe_idx = clc->rmbe_idx;
        smc->conn.local_tx_ctrl.token = ntohl(clc->rmbe_alert_token);
-       smc->conn.peer_rmbe_size = smc_uncompress_bufsize(clc->rmbe_size);
+       smc->conn.peer_rmbe_size = bufsize;
        atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size);
+       smc->conn.tx_off = bufsize * (smc->conn.peer_rmbe_idx - 1);
 }
 
 static void smc_link_save_peer_info(struct smc_link *link,
@@ -377,165 +396,186 @@ static void smc_link_save_peer_info(struct smc_link *link,
        link->peer_mtu = clc->qp_mtu;
 }
 
-/* setup for RDMA connection of client */
-static int smc_connect_rdma(struct smc_sock *smc)
+/* fall back during connect */
+static int smc_connect_fallback(struct smc_sock *smc)
 {
-       struct smc_clc_msg_accept_confirm aclc;
-       int local_contact = SMC_FIRST_CONTACT;
-       struct smc_ib_device *smcibdev;
-       struct smc_link *link;
-       u8 srv_first_contact;
-       int reason_code = 0;
-       int rc = 0;
-       u8 ibport;
+       smc->use_fallback = true;
+       smc_copy_sock_settings_to_clc(smc);
+       if (smc->sk.sk_state == SMC_INIT)
+               smc->sk.sk_state = SMC_ACTIVE;
+       return 0;
+}
 
-       sock_hold(&smc->sk); /* sock put in passive closing */
+/* decline and fall back during connect */
+static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code)
+{
+       int rc;
 
-       if (!tcp_sk(smc->clcsock->sk)->syn_smc) {
-               /* peer has not signalled SMC-capability */
-               smc->use_fallback = true;
-               goto out_connected;
+       if (reason_code < 0) /* error, fallback is not possible */
+               return reason_code;
+       if (reason_code != SMC_CLC_DECL_REPLY) {
+               rc = smc_clc_send_decline(smc, reason_code);
+               if (rc < 0)
+                       return rc;
        }
+       return smc_connect_fallback(smc);
+}
 
-       /* IPSec connections opt out of SMC-R optimizations */
-       if (using_ipsec(smc)) {
-               reason_code = SMC_CLC_DECL_IPSEC;
-               goto decline_rdma;
-       }
+/* abort connecting */
+static int smc_connect_abort(struct smc_sock *smc, int reason_code,
+                            int local_contact)
+{
+       if (local_contact == SMC_FIRST_CONTACT)
+               smc_lgr_forget(smc->conn.lgr);
+       mutex_unlock(&smc_create_lgr_pending);
+       smc_conn_free(&smc->conn);
+       if (reason_code < 0 && smc->sk.sk_state == SMC_INIT)
+               sock_put(&smc->sk); /* passive closing */
+       return reason_code;
+}
+
+/* check if there is a rdma device available for this connection. */
+/* called for connect and listen */
+static int smc_check_rdma(struct smc_sock *smc, struct smc_ib_device **ibdev,
+                         u8 *ibport)
+{
+       int reason_code = 0;
 
        /* PNET table look up: search active ib_device and port
         * within same PNETID that also contains the ethernet device
         * used for the internal TCP socket
         */
-       smc_pnet_find_roce_resource(smc->clcsock->sk, &smcibdev, &ibport);
-       if (!smcibdev) {
+       smc_pnet_find_roce_resource(smc->clcsock->sk, ibdev, ibport);
+       if (!(*ibdev))
                reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */
-               goto decline_rdma;
-       }
+
+       return reason_code;
+}
+
+/* CLC handshake during connect */
+static int smc_connect_clc(struct smc_sock *smc,
+                          struct smc_clc_msg_accept_confirm *aclc,
+                          struct smc_ib_device *ibdev, u8 ibport)
+{
+       int rc = 0;
 
        /* do inband token exchange */
-       reason_code = smc_clc_send_proposal(smc, smcibdev, ibport);
-       if (reason_code < 0) {
-               rc = reason_code;
-               goto out_err;
-       }
-       if (reason_code > 0) /* configuration error */
-               goto decline_rdma;
+       rc = smc_clc_send_proposal(smc, ibdev, ibport);
+       if (rc)
+               return rc;
        /* receive SMC Accept CLC message */
-       reason_code = smc_clc_wait_msg(smc, &aclc, sizeof(aclc),
-                                      SMC_CLC_ACCEPT);
-       if (reason_code < 0) {
-               rc = reason_code;
-               goto out_err;
-       }
-       if (reason_code > 0)
-               goto decline_rdma;
+       return smc_clc_wait_msg(smc, aclc, sizeof(*aclc), SMC_CLC_ACCEPT);
+}
+
+/* setup for RDMA connection of client */
+static int smc_connect_rdma(struct smc_sock *smc,
+                           struct smc_clc_msg_accept_confirm *aclc,
+                           struct smc_ib_device *ibdev, u8 ibport)
+{
+       int local_contact = SMC_FIRST_CONTACT;
+       struct smc_link *link;
+       int reason_code = 0;
 
-       srv_first_contact = aclc.hdr.flag;
        mutex_lock(&smc_create_lgr_pending);
-       local_contact = smc_conn_create(smc, smcibdev, ibport, &aclc.lcl,
-                                       srv_first_contact);
+       local_contact = smc_conn_create(smc, ibdev, ibport, &aclc->lcl,
+                                       aclc->hdr.flag);
        if (local_contact < 0) {
-               rc = local_contact;
-               if (rc == -ENOMEM)
+               if (local_contact == -ENOMEM)
                        reason_code = SMC_CLC_DECL_MEM;/* insufficient memory*/
-               else if (rc == -ENOLINK)
+               else if (local_contact == -ENOLINK)
                        reason_code = SMC_CLC_DECL_SYNCERR; /* synchr. error */
-               goto decline_rdma_unlock;
+               else
+                       reason_code = SMC_CLC_DECL_INTERR; /* other error */
+               return smc_connect_abort(smc, reason_code, 0);
        }
        link = &smc->conn.lgr->lnk[SMC_SINGLE_LINK];
 
-       smc_conn_save_peer_info(smc, &aclc);
+       smc_conn_save_peer_info(smc, aclc);
 
        /* create send buffer and rmb */
-       rc = smc_buf_create(smc);
-       if (rc) {
-               reason_code = SMC_CLC_DECL_MEM;
-               goto decline_rdma_unlock;
-       }
+       if (smc_buf_create(smc))
+               return smc_connect_abort(smc, SMC_CLC_DECL_MEM, local_contact);
 
        if (local_contact == SMC_FIRST_CONTACT)
-               smc_link_save_peer_info(link, &aclc);
+               smc_link_save_peer_info(link, aclc);
 
-       rc = smc_rmb_rtoken_handling(&smc->conn, &aclc);
-       if (rc) {
-               reason_code = SMC_CLC_DECL_INTERR;
-               goto decline_rdma_unlock;
-       }
+       if (smc_rmb_rtoken_handling(&smc->conn, aclc))
+               return smc_connect_abort(smc, SMC_CLC_DECL_INTERR,
+                                        local_contact);
 
        smc_close_init(smc);
        smc_rx_init(smc);
 
        if (local_contact == SMC_FIRST_CONTACT) {
-               rc = smc_ib_ready_link(link);
-               if (rc) {
-                       reason_code = SMC_CLC_DECL_INTERR;
-                       goto decline_rdma_unlock;
-               }
+               if (smc_ib_ready_link(link))
+                       return smc_connect_abort(smc, SMC_CLC_DECL_INTERR,
+                                                local_contact);
        } else {
-               struct smc_buf_desc *buf_desc = smc->conn.rmb_desc;
-
-               if (!buf_desc->reused) {
-                       /* register memory region for new rmb */
-                       rc = smc_wr_reg_send(link,
-                                            buf_desc->mr_rx[SMC_SINGLE_LINK]);
-                       if (rc) {
-                               reason_code = SMC_CLC_DECL_INTERR;
-                               goto decline_rdma_unlock;
-                       }
-               }
+               if (!smc->conn.rmb_desc->reused &&
+                   smc_reg_rmb(link, smc->conn.rmb_desc, true))
+                       return smc_connect_abort(smc, SMC_CLC_DECL_INTERR,
+                                                local_contact);
        }
        smc_rmb_sync_sg_for_device(&smc->conn);
 
-       rc = smc_clc_send_confirm(smc);
-       if (rc)
-               goto out_err_unlock;
+       reason_code = smc_clc_send_confirm(smc);
+       if (reason_code)
+               return smc_connect_abort(smc, reason_code, local_contact);
+
+       smc_tx_init(smc);
 
        if (local_contact == SMC_FIRST_CONTACT) {
                /* QP confirmation over RoCE fabric */
                reason_code = smc_clnt_conf_first_link(smc);
-               if (reason_code < 0) {
-                       rc = reason_code;
-                       goto out_err_unlock;
-               }
-               if (reason_code > 0)
-                       goto decline_rdma_unlock;
+               if (reason_code)
+                       return smc_connect_abort(smc, reason_code,
+                                                local_contact);
        }
-
        mutex_unlock(&smc_create_lgr_pending);
-       smc_tx_init(smc);
 
-out_connected:
        smc_copy_sock_settings_to_clc(smc);
        if (smc->sk.sk_state == SMC_INIT)
                smc->sk.sk_state = SMC_ACTIVE;
 
-       return rc ? rc : local_contact;
+       return 0;
+}
 
-decline_rdma_unlock:
-       if (local_contact == SMC_FIRST_CONTACT)
-               smc_lgr_forget(smc->conn.lgr);
-       mutex_unlock(&smc_create_lgr_pending);
-       smc_conn_free(&smc->conn);
-decline_rdma:
-       /* RDMA setup failed, switch back to TCP */
-       smc->use_fallback = true;
-       if (reason_code && (reason_code != SMC_CLC_DECL_REPLY)) {
-               rc = smc_clc_send_decline(smc, reason_code);
-               if (rc < 0)
-                       goto out_err;
-       }
-       goto out_connected;
+/* perform steps before actually connecting */
+static int __smc_connect(struct smc_sock *smc)
+{
+       struct smc_clc_msg_accept_confirm aclc;
+       struct smc_ib_device *ibdev;
+       int rc = 0;
+       u8 ibport;
 
-out_err_unlock:
-       if (local_contact == SMC_FIRST_CONTACT)
-               smc_lgr_forget(smc->conn.lgr);
-       mutex_unlock(&smc_create_lgr_pending);
-       smc_conn_free(&smc->conn);
-out_err:
-       if (smc->sk.sk_state == SMC_INIT)
-               sock_put(&smc->sk); /* passive closing */
-       return rc;
+       sock_hold(&smc->sk); /* sock put in passive closing */
+
+       if (smc->use_fallback)
+               return smc_connect_fallback(smc);
+
+       /* if peer has not signalled SMC-capability, fall back */
+       if (!tcp_sk(smc->clcsock->sk)->syn_smc)
+               return smc_connect_fallback(smc);
+
+       /* IPSec connections opt out of SMC-R optimizations */
+       if (using_ipsec(smc))
+               return smc_connect_decline_fallback(smc, SMC_CLC_DECL_IPSEC);
+
+       /* check if a RDMA device is available; if not, fall back */
+       if (smc_check_rdma(smc, &ibdev, &ibport))
+               return smc_connect_decline_fallback(smc, SMC_CLC_DECL_CNFERR);
+
+       /* perform CLC handshake */
+       rc = smc_connect_clc(smc, &aclc, ibdev, ibport);
+       if (rc)
+               return smc_connect_decline_fallback(smc, rc);
+
+       /* connect using rdma */
+       rc = smc_connect_rdma(smc, &aclc, ibdev, ibport);
+       if (rc)
+               return smc_connect_decline_fallback(smc, rc);
+
+       return 0;
 }
 
 static int smc_connect(struct socket *sock, struct sockaddr *addr,
@@ -571,8 +611,7 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr,
        if (rc)
                goto out;
 
-       /* setup RDMA connection */
-       rc = smc_connect_rdma(smc);
+       rc = __smc_connect(smc);
        if (rc < 0)
                goto out;
        else
@@ -712,6 +751,7 @@ void smc_close_non_accepted(struct sock *sk)
 
 static int smc_serv_conf_first_link(struct smc_sock *smc)
 {
+       struct net *net = sock_net(smc->clcsock->sk);
        struct smc_link_group *lgr = smc->conn.lgr;
        struct smc_link *link;
        int rest;
@@ -719,9 +759,7 @@ static int smc_serv_conf_first_link(struct smc_sock *smc)
 
        link = &lgr->lnk[SMC_SINGLE_LINK];
 
-       rc = smc_wr_reg_send(link,
-                            smc->conn.rmb_desc->mr_rx[SMC_SINGLE_LINK]);
-       if (rc)
+       if (smc_reg_rmb(link, smc->conn.rmb_desc, false))
                return SMC_CLC_DECL_INTERR;
 
        /* send CONFIRM LINK request to client over the RoCE fabric */
@@ -766,189 +804,244 @@ static int smc_serv_conf_first_link(struct smc_sock *smc)
                return rc;
        }
 
-       link->state = SMC_LNK_ACTIVE;
+       smc_llc_link_active(link, net->ipv4.sysctl_tcp_keepalive_time);
 
        return 0;
 }
 
-/* setup for RDMA connection of server */
-static void smc_listen_work(struct work_struct *work)
+/* listen worker: finish */
+static void smc_listen_out(struct smc_sock *new_smc)
 {
-       struct smc_sock *new_smc = container_of(work, struct smc_sock,
-                                               smc_listen_work);
-       struct smc_clc_msg_proposal_prefix *pclc_prfx;
-       struct socket *newclcsock = new_smc->clcsock;
        struct smc_sock *lsmc = new_smc->listen_smc;
-       struct smc_clc_msg_accept_confirm cclc;
-       int local_contact = SMC_REUSE_CONTACT;
        struct sock *newsmcsk = &new_smc->sk;
-       struct smc_clc_msg_proposal *pclc;
-       struct smc_ib_device *smcibdev;
-       u8 buf[SMC_CLC_MAX_LEN];
-       struct smc_link *link;
-       int reason_code = 0;
-       int rc = 0;
-       u8 ibport;
 
-       /* check if peer is smc capable */
-       if (!tcp_sk(newclcsock->sk)->syn_smc) {
-               new_smc->use_fallback = true;
-               goto out_connected;
+       lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING);
+       if (lsmc->sk.sk_state == SMC_LISTEN) {
+               smc_accept_enqueue(&lsmc->sk, newsmcsk);
+       } else { /* no longer listening */
+               smc_close_non_accepted(newsmcsk);
        }
+       release_sock(&lsmc->sk);
 
-       /* do inband token exchange -
-        *wait for and receive SMC Proposal CLC message
-        */
-       reason_code = smc_clc_wait_msg(new_smc, &buf, sizeof(buf),
-                                      SMC_CLC_PROPOSAL);
-       if (reason_code < 0)
-               goto out_err;
-       if (reason_code > 0)
-               goto decline_rdma;
+       /* Wake up accept */
+       lsmc->sk.sk_data_ready(&lsmc->sk);
+       sock_put(&lsmc->sk); /* sock_hold in smc_tcp_listen_work */
+}
 
-       /* IPSec connections opt out of SMC-R optimizations */
-       if (using_ipsec(new_smc)) {
-               reason_code = SMC_CLC_DECL_IPSEC;
-               goto decline_rdma;
-       }
+/* listen worker: finish in state connected */
+static void smc_listen_out_connected(struct smc_sock *new_smc)
+{
+       struct sock *newsmcsk = &new_smc->sk;
 
-       /* PNET table look up: search active ib_device and port
-        * within same PNETID that also contains the ethernet device
-        * used for the internal TCP socket
-        */
-       smc_pnet_find_roce_resource(newclcsock->sk, &smcibdev, &ibport);
-       if (!smcibdev) {
-               reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */
-               goto decline_rdma;
+       sk_refcnt_debug_inc(newsmcsk);
+       if (newsmcsk->sk_state == SMC_INIT)
+               newsmcsk->sk_state = SMC_ACTIVE;
+
+       smc_listen_out(new_smc);
+}
+
+/* listen worker: finish in error state */
+static void smc_listen_out_err(struct smc_sock *new_smc)
+{
+       struct sock *newsmcsk = &new_smc->sk;
+
+       if (newsmcsk->sk_state == SMC_INIT)
+               sock_put(&new_smc->sk); /* passive closing */
+       newsmcsk->sk_state = SMC_CLOSED;
+       smc_conn_free(&new_smc->conn);
+
+       smc_listen_out(new_smc);
+}
+
+/* listen worker: decline and fall back if possible */
+static void smc_listen_decline(struct smc_sock *new_smc, int reason_code,
+                              int local_contact)
+{
+       /* RDMA setup failed, switch back to TCP */
+       if (local_contact == SMC_FIRST_CONTACT)
+               smc_lgr_forget(new_smc->conn.lgr);
+       if (reason_code < 0) { /* error, no fallback possible */
+               smc_listen_out_err(new_smc);
+               return;
+       }
+       smc_conn_free(&new_smc->conn);
+       new_smc->use_fallback = true;
+       if (reason_code && reason_code != SMC_CLC_DECL_REPLY) {
+               if (smc_clc_send_decline(new_smc, reason_code) < 0) {
+                       smc_listen_out_err(new_smc);
+                       return;
+               }
        }
+       smc_listen_out_connected(new_smc);
+}
+
+/* listen worker: check prefixes */
+static int smc_listen_rdma_check(struct smc_sock *new_smc,
+                                struct smc_clc_msg_proposal *pclc)
+{
+       struct smc_clc_msg_proposal_prefix *pclc_prfx;
+       struct socket *newclcsock = new_smc->clcsock;
 
-       pclc = (struct smc_clc_msg_proposal *)&buf;
        pclc_prfx = smc_clc_proposal_get_prefix(pclc);
+       if (smc_clc_prfx_match(newclcsock, pclc_prfx))
+               return SMC_CLC_DECL_CNFERR;
 
-       rc = smc_clc_prfx_match(newclcsock, pclc_prfx);
-       if (rc) {
-               reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */
-               goto decline_rdma;
-       }
+       return 0;
+}
 
+/* listen worker: initialize connection and buffers */
+static int smc_listen_rdma_init(struct smc_sock *new_smc,
+                               struct smc_clc_msg_proposal *pclc,
+                               struct smc_ib_device *ibdev, u8 ibport,
+                               int *local_contact)
+{
        /* allocate connection / link group */
-       mutex_lock(&smc_create_lgr_pending);
-       local_contact = smc_conn_create(new_smc, smcibdev, ibport, &pclc->lcl,
-                                       0);
-       if (local_contact < 0) {
-               rc = local_contact;
-               if (rc == -ENOMEM)
-                       reason_code = SMC_CLC_DECL_MEM;/* insufficient memory*/
-               goto decline_rdma_unlock;
+       *local_contact = smc_conn_create(new_smc, ibdev, ibport, &pclc->lcl, 0);
+       if (*local_contact < 0) {
+               if (*local_contact == -ENOMEM)
+                       return SMC_CLC_DECL_MEM;/* insufficient memory*/
+               return SMC_CLC_DECL_INTERR; /* other error */
        }
-       link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK];
 
        /* create send buffer and rmb */
-       rc = smc_buf_create(new_smc);
-       if (rc) {
-               reason_code = SMC_CLC_DECL_MEM;
-               goto decline_rdma_unlock;
-       }
+       if (smc_buf_create(new_smc))
+               return SMC_CLC_DECL_MEM;
 
-       smc_close_init(new_smc);
-       smc_rx_init(new_smc);
+       return 0;
+}
+
+/* listen worker: register buffers */
+static int smc_listen_rdma_reg(struct smc_sock *new_smc, int local_contact)
+{
+       struct smc_link *link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK];
 
        if (local_contact != SMC_FIRST_CONTACT) {
-               struct smc_buf_desc *buf_desc = new_smc->conn.rmb_desc;
-
-               if (!buf_desc->reused) {
-                       /* register memory region for new rmb */
-                       rc = smc_wr_reg_send(link,
-                                            buf_desc->mr_rx[SMC_SINGLE_LINK]);
-                       if (rc) {
-                               reason_code = SMC_CLC_DECL_INTERR;
-                               goto decline_rdma_unlock;
-                       }
+               if (!new_smc->conn.rmb_desc->reused) {
+                       if (smc_reg_rmb(link, new_smc->conn.rmb_desc, true))
+                               return SMC_CLC_DECL_INTERR;
                }
        }
        smc_rmb_sync_sg_for_device(&new_smc->conn);
 
-       rc = smc_clc_send_accept(new_smc, local_contact);
-       if (rc)
-               goto out_err_unlock;
+       return 0;
+}
+
+/* listen worker: finish RDMA setup */
+static void smc_listen_rdma_finish(struct smc_sock *new_smc,
+                                  struct smc_clc_msg_accept_confirm *cclc,
+                                  int local_contact)
+{
+       struct smc_link *link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK];
+       int reason_code = 0;
 
-       /* receive SMC Confirm CLC message */
-       reason_code = smc_clc_wait_msg(new_smc, &cclc, sizeof(cclc),
-                                      SMC_CLC_CONFIRM);
-       if (reason_code < 0)
-               goto out_err_unlock;
-       if (reason_code > 0)
-               goto decline_rdma_unlock;
-       smc_conn_save_peer_info(new_smc, &cclc);
        if (local_contact == SMC_FIRST_CONTACT)
-               smc_link_save_peer_info(link, &cclc);
+               smc_link_save_peer_info(link, cclc);
 
-       rc = smc_rmb_rtoken_handling(&new_smc->conn, &cclc);
-       if (rc) {
+       if (smc_rmb_rtoken_handling(&new_smc->conn, cclc)) {
                reason_code = SMC_CLC_DECL_INTERR;
-               goto decline_rdma_unlock;
+               goto decline;
        }
 
        if (local_contact == SMC_FIRST_CONTACT) {
-               rc = smc_ib_ready_link(link);
-               if (rc) {
+               if (smc_ib_ready_link(link)) {
                        reason_code = SMC_CLC_DECL_INTERR;
-                       goto decline_rdma_unlock;
+                       goto decline;
                }
                /* QP confirmation over RoCE fabric */
                reason_code = smc_serv_conf_first_link(new_smc);
-               if (reason_code < 0)
-                       /* peer is not aware of a problem */
-                       goto out_err_unlock;
-               if (reason_code > 0)
-                       goto decline_rdma_unlock;
+               if (reason_code)
+                       goto decline;
        }
+       return;
 
-       smc_tx_init(new_smc);
+decline:
        mutex_unlock(&smc_create_lgr_pending);
+       smc_listen_decline(new_smc, reason_code, local_contact);
+}
 
-out_connected:
-       sk_refcnt_debug_inc(newsmcsk);
-       if (newsmcsk->sk_state == SMC_INIT)
-               newsmcsk->sk_state = SMC_ACTIVE;
-enqueue:
-       lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING);
-       if (lsmc->sk.sk_state == SMC_LISTEN) {
-               smc_accept_enqueue(&lsmc->sk, newsmcsk);
-       } else { /* no longer listening */
-               smc_close_non_accepted(newsmcsk);
+/* setup for RDMA connection of server */
+static void smc_listen_work(struct work_struct *work)
+{
+       struct smc_sock *new_smc = container_of(work, struct smc_sock,
+                                               smc_listen_work);
+       struct socket *newclcsock = new_smc->clcsock;
+       struct smc_clc_msg_accept_confirm cclc;
+       struct smc_clc_msg_proposal *pclc;
+       struct smc_ib_device *ibdev;
+       u8 buf[SMC_CLC_MAX_LEN];
+       int local_contact = 0;
+       int reason_code = 0;
+       int rc = 0;
+       u8 ibport;
+
+       if (new_smc->use_fallback) {
+               smc_listen_out_connected(new_smc);
+               return;
        }
-       release_sock(&lsmc->sk);
 
-       /* Wake up accept */
-       lsmc->sk.sk_data_ready(&lsmc->sk);
-       sock_put(&lsmc->sk); /* sock_hold in smc_tcp_listen_work */
-       return;
+       /* check if peer is smc capable */
+       if (!tcp_sk(newclcsock->sk)->syn_smc) {
+               new_smc->use_fallback = true;
+               smc_listen_out_connected(new_smc);
+               return;
+       }
 
-decline_rdma_unlock:
-       if (local_contact == SMC_FIRST_CONTACT)
-               smc_lgr_forget(new_smc->conn.lgr);
-       mutex_unlock(&smc_create_lgr_pending);
-decline_rdma:
-       /* RDMA setup failed, switch back to TCP */
-       smc_conn_free(&new_smc->conn);
-       new_smc->use_fallback = true;
-       if (reason_code && (reason_code != SMC_CLC_DECL_REPLY)) {
-               if (smc_clc_send_decline(new_smc, reason_code) < 0)
-                       goto out_err;
+       /* do inband token exchange -
+        * wait for and receive SMC Proposal CLC message
+        */
+       pclc = (struct smc_clc_msg_proposal *)&buf;
+       reason_code = smc_clc_wait_msg(new_smc, pclc, SMC_CLC_MAX_LEN,
+                                      SMC_CLC_PROPOSAL);
+       if (reason_code) {
+               smc_listen_decline(new_smc, reason_code, 0);
+               return;
        }
-       goto out_connected;
 
-out_err_unlock:
-       if (local_contact == SMC_FIRST_CONTACT)
-               smc_lgr_forget(new_smc->conn.lgr);
+       /* IPSec connections opt out of SMC-R optimizations */
+       if (using_ipsec(new_smc)) {
+               smc_listen_decline(new_smc, SMC_CLC_DECL_IPSEC, 0);
+               return;
+       }
+
+       mutex_lock(&smc_create_lgr_pending);
+       smc_close_init(new_smc);
+       smc_rx_init(new_smc);
+       smc_tx_init(new_smc);
+
+       /* check if RDMA is available */
+       if (smc_check_rdma(new_smc, &ibdev, &ibport) ||
+           smc_listen_rdma_check(new_smc, pclc) ||
+           smc_listen_rdma_init(new_smc, pclc, ibdev, ibport,
+                                &local_contact) ||
+           smc_listen_rdma_reg(new_smc, local_contact)) {
+               /* SMC not supported, decline */
+               mutex_unlock(&smc_create_lgr_pending);
+               smc_listen_decline(new_smc, SMC_CLC_DECL_CNFERR, local_contact);
+               return;
+       }
+
+       /* send SMC Accept CLC message */
+       rc = smc_clc_send_accept(new_smc, local_contact);
+       if (rc) {
+               mutex_unlock(&smc_create_lgr_pending);
+               smc_listen_decline(new_smc, rc, local_contact);
+               return;
+       }
+
+       /* receive SMC Confirm CLC message */
+       reason_code = smc_clc_wait_msg(new_smc, &cclc, sizeof(cclc),
+                                      SMC_CLC_CONFIRM);
+       if (reason_code) {
+               mutex_unlock(&smc_create_lgr_pending);
+               smc_listen_decline(new_smc, reason_code, local_contact);
+               return;
+       }
+
+       /* finish worker */
+       smc_listen_rdma_finish(new_smc, &cclc, local_contact);
+       smc_conn_save_peer_info(new_smc, &cclc);
        mutex_unlock(&smc_create_lgr_pending);
-out_err:
-       if (newsmcsk->sk_state == SMC_INIT)
-               sock_put(&new_smc->sk); /* passive closing */
-       newsmcsk->sk_state = SMC_CLOSED;
-       smc_conn_free(&new_smc->conn);
-       goto enqueue; /* queue new sock with sk_err set */
+       smc_listen_out_connected(new_smc);
 }
 
 static void smc_tcp_listen_work(struct work_struct *work)
@@ -968,7 +1061,7 @@ static void smc_tcp_listen_work(struct work_struct *work)
                        continue;
 
                new_smc->listen_smc = lsmc;
-               new_smc->use_fallback = false; /* assume rdma capability first*/
+               new_smc->use_fallback = lsmc->use_fallback;
                sock_hold(lsk); /* sock_put in smc_listen_work */
                INIT_WORK(&new_smc->smc_listen_work, smc_listen_work);
                smc_copy_sock_settings_to_smc(new_smc);
@@ -978,10 +1071,6 @@ static void smc_tcp_listen_work(struct work_struct *work)
        }
 
 out:
-       if (lsmc->clcsock) {
-               sock_release(lsmc->clcsock);
-               lsmc->clcsock = NULL;
-       }
        release_sock(lsk);
        sock_put(&lsmc->sk); /* sock_hold in smc_listen */
 }
@@ -1008,7 +1097,8 @@ static int smc_listen(struct socket *sock, int backlog)
         * them to the clc socket -- copy smc socket options to clc socket
         */
        smc_copy_sock_settings_to_clc(smc);
-       tcp_sk(smc->clcsock->sk)->syn_smc = 1;
+       if (!smc->use_fallback)
+               tcp_sk(smc->clcsock->sk)->syn_smc = 1;
 
        rc = kernel_listen(smc->clcsock, backlog);
        if (rc)
@@ -1041,6 +1131,7 @@ static int smc_accept(struct socket *sock, struct socket *new_sock,
 
        if (lsmc->sk.sk_state != SMC_LISTEN) {
                rc = -EINVAL;
+               release_sock(sk);
                goto out;
        }
 
@@ -1068,9 +1159,29 @@ static int smc_accept(struct socket *sock, struct socket *new_sock,
 
        if (!rc)
                rc = sock_error(nsk);
+       release_sock(sk);
+       if (rc)
+               goto out;
+
+       if (lsmc->sockopt_defer_accept && !(flags & O_NONBLOCK)) {
+               /* wait till data arrives on the socket */
+               timeo = msecs_to_jiffies(lsmc->sockopt_defer_accept *
+                                                               MSEC_PER_SEC);
+               if (smc_sk(nsk)->use_fallback) {
+                       struct sock *clcsk = smc_sk(nsk)->clcsock->sk;
+
+                       lock_sock(clcsk);
+                       if (skb_queue_empty(&clcsk->sk_receive_queue))
+                               sk_wait_data(clcsk, &timeo, NULL);
+                       release_sock(clcsk);
+               } else if (!atomic_read(&smc_sk(nsk)->conn.bytes_to_rcv)) {
+                       lock_sock(nsk);
+                       smc_rx_wait(smc_sk(nsk), &timeo, smc_rx_data_available);
+                       release_sock(nsk);
+               }
+       }
 
 out:
-       release_sock(sk);
        sock_put(sk); /* sock_hold above */
        return rc;
 }
@@ -1101,6 +1212,16 @@ static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
            (sk->sk_state != SMC_APPCLOSEWAIT1) &&
            (sk->sk_state != SMC_INIT))
                goto out;
+
+       if (msg->msg_flags & MSG_FASTOPEN) {
+               if (sk->sk_state == SMC_INIT) {
+                       smc->use_fallback = true;
+               } else {
+                       rc = -EINVAL;
+                       goto out;
+               }
+       }
+
        if (smc->use_fallback)
                rc = smc->clcsock->ops->sendmsg(smc->clcsock, msg, len);
        else
@@ -1129,10 +1250,12 @@ static int smc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
                goto out;
        }
 
-       if (smc->use_fallback)
+       if (smc->use_fallback) {
                rc = smc->clcsock->ops->recvmsg(smc->clcsock, msg, len, flags);
-       else
-               rc = smc_rx_recvmsg(smc, msg, len, flags);
+       } else {
+               msg->msg_namelen = 0;
+               rc = smc_rx_recvmsg(smc, msg, NULL, len, flags);
+       }
 
 out:
        release_sock(sk);
@@ -1170,14 +1293,16 @@ static __poll_t smc_poll(struct file *file, struct socket *sock,
                /* delegate to CLC child sock */
                release_sock(sk);
                mask = smc->clcsock->ops->poll(file, smc->clcsock, wait);
-               /* if non-blocking connect finished ... */
                lock_sock(sk);
-               if ((sk->sk_state == SMC_INIT) && (mask & EPOLLOUT)) {
-                       sk->sk_err = smc->clcsock->sk->sk_err;
-                       if (sk->sk_err) {
-                               mask |= EPOLLERR;
-                       } else {
-                               rc = smc_connect_rdma(smc);
+               sk->sk_err = smc->clcsock->sk->sk_err;
+               if (sk->sk_err) {
+                       mask |= EPOLLERR;
+               } else {
+                       /* if non-blocking connect finished ... */
+                       if (sk->sk_state == SMC_INIT &&
+                           mask & EPOLLOUT &&
+                           smc->clcsock->sk->sk_state != TCP_CLOSE) {
+                               rc = __smc_connect(smc);
                                if (rc < 0)
                                        mask |= EPOLLERR;
                                /* success cases including fallback */
@@ -1278,14 +1403,64 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
 {
        struct sock *sk = sock->sk;
        struct smc_sock *smc;
+       int val, rc;
 
        smc = smc_sk(sk);
 
        /* generic setsockopts reaching us here always apply to the
         * CLC socket
         */
-       return smc->clcsock->ops->setsockopt(smc->clcsock, level, optname,
-                                            optval, optlen);
+       rc = smc->clcsock->ops->setsockopt(smc->clcsock, level, optname,
+                                          optval, optlen);
+       if (smc->clcsock->sk->sk_err) {
+               sk->sk_err = smc->clcsock->sk->sk_err;
+               sk->sk_error_report(sk);
+       }
+       if (rc)
+               return rc;
+
+       if (optlen < sizeof(int))
+               return rc;
+       get_user(val, (int __user *)optval);
+
+       lock_sock(sk);
+       switch (optname) {
+       case TCP_ULP:
+       case TCP_FASTOPEN:
+       case TCP_FASTOPEN_CONNECT:
+       case TCP_FASTOPEN_KEY:
+       case TCP_FASTOPEN_NO_COOKIE:
+               /* option not supported by SMC */
+               if (sk->sk_state == SMC_INIT) {
+                       smc->use_fallback = true;
+               } else {
+                       if (!smc->use_fallback)
+                               rc = -EINVAL;
+               }
+               break;
+       case TCP_NODELAY:
+               if (sk->sk_state != SMC_INIT && sk->sk_state != SMC_LISTEN) {
+                       if (val && !smc->use_fallback)
+                               mod_delayed_work(system_wq, &smc->conn.tx_work,
+                                                0);
+               }
+               break;
+       case TCP_CORK:
+               if (sk->sk_state != SMC_INIT && sk->sk_state != SMC_LISTEN) {
+                       if (!val && !smc->use_fallback)
+                               mod_delayed_work(system_wq, &smc->conn.tx_work,
+                                                0);
+               }
+               break;
+       case TCP_DEFER_ACCEPT:
+               smc->sockopt_defer_accept = val;
+               break;
+       default:
+               break;
+       }
+       release_sock(sk);
+
+       return rc;
 }
 
 static int smc_getsockopt(struct socket *sock, int level, int optname,
@@ -1303,12 +1478,38 @@ static int smc_ioctl(struct socket *sock, unsigned int cmd,
                     unsigned long arg)
 {
        struct smc_sock *smc;
+       int answ;
 
        smc = smc_sk(sock->sk);
-       if (smc->use_fallback)
+       if (smc->use_fallback) {
+               if (!smc->clcsock)
+                       return -EBADF;
                return smc->clcsock->ops->ioctl(smc->clcsock, cmd, arg);
-       else
-               return sock_no_ioctl(sock, cmd, arg);
+       }
+       switch (cmd) {
+       case SIOCINQ: /* same as FIONREAD */
+               if (smc->sk.sk_state == SMC_LISTEN)
+                       return -EINVAL;
+               answ = atomic_read(&smc->conn.bytes_to_rcv);
+               break;
+       case SIOCOUTQ:
+               /* output queue size (not send + not acked) */
+               if (smc->sk.sk_state == SMC_LISTEN)
+                       return -EINVAL;
+               answ = smc->conn.sndbuf_desc->len -
+                                       atomic_read(&smc->conn.sndbuf_space);
+               break;
+       case SIOCOUTQNSD:
+               /* output queue size (not send only) */
+               if (smc->sk.sk_state == SMC_LISTEN)
+                       return -EINVAL;
+               answ = smc_tx_prepared_sends(&smc->conn);
+               break;
+       default:
+               return -ENOIOCTLCMD;
+       }
+
+       return put_user(answ, (int __user *)arg);
 }
 
 static ssize_t smc_sendpage(struct socket *sock, struct page *page,
@@ -1320,8 +1521,11 @@ static ssize_t smc_sendpage(struct socket *sock, struct page *page,
 
        smc = smc_sk(sk);
        lock_sock(sk);
-       if (sk->sk_state != SMC_ACTIVE)
+       if (sk->sk_state != SMC_ACTIVE) {
+               release_sock(sk);
                goto out;
+       }
+       release_sock(sk);
        if (smc->use_fallback)
                rc = kernel_sendpage(smc->clcsock, page, offset,
                                     size, flags);
@@ -1329,13 +1533,18 @@ static ssize_t smc_sendpage(struct socket *sock, struct page *page,
                rc = sock_no_sendpage(sock, page, offset, size, flags);
 
 out:
-       release_sock(sk);
        return rc;
 }
 
+/* Map the affected portions of the rmbe into an spd, note the number of bytes
+ * to splice in conn->splice_pending, and press 'go'. Delays consumer cursor
+ * updates till whenever a respective page has been fully processed.
+ * Note that subsequent recv() calls have to wait till all splice() processing
+ * completed.
+ */
 static ssize_t smc_splice_read(struct socket *sock, loff_t *ppos,
                               struct pipe_inode_info *pipe, size_t len,
-                                   unsigned int flags)
+                              unsigned int flags)
 {
        struct sock *sk = sock->sk;
        struct smc_sock *smc;
@@ -1343,16 +1552,34 @@ static ssize_t smc_splice_read(struct socket *sock, loff_t *ppos,
 
        smc = smc_sk(sk);
        lock_sock(sk);
-       if ((sk->sk_state != SMC_ACTIVE) && (sk->sk_state != SMC_CLOSED))
+
+       if (sk->sk_state == SMC_INIT ||
+           sk->sk_state == SMC_LISTEN ||
+           sk->sk_state == SMC_CLOSED)
+               goto out;
+
+       if (sk->sk_state == SMC_PEERFINCLOSEWAIT) {
+               rc = 0;
                goto out;
+       }
+
        if (smc->use_fallback) {
                rc = smc->clcsock->ops->splice_read(smc->clcsock, ppos,
                                                    pipe, len, flags);
        } else {
-               rc = -EOPNOTSUPP;
+               if (*ppos) {
+                       rc = -ESPIPE;
+                       goto out;
+               }
+               if (flags & SPLICE_F_NONBLOCK)
+                       flags = MSG_DONTWAIT;
+               else
+                       flags = 0;
+               rc = smc_rx_recvmsg(smc, NULL, pipe, len, flags);
        }
 out:
        release_sock(sk);
+
        return rc;
 }
 
@@ -1485,18 +1712,7 @@ static int __init smc_init(void)
 
 static void __exit smc_exit(void)
 {
-       struct smc_link_group *lgr, *lg;
-       LIST_HEAD(lgr_freeing_list);
-
-       spin_lock_bh(&smc_lgr_list.lock);
-       if (!list_empty(&smc_lgr_list.list))
-               list_splice_init(&smc_lgr_list.list, &lgr_freeing_list);
-       spin_unlock_bh(&smc_lgr_list.lock);
-       list_for_each_entry_safe(lgr, lg, &lgr_freeing_list, list) {
-               list_del_init(&lgr->list);
-               cancel_delayed_work_sync(&lgr->free_work);
-               smc_lgr_free(lgr); /* free link group */
-       }
+       smc_core_exit();
        static_branch_disable(&tcp_have_smc);
        smc_ib_unregister_client();
        sock_unregister(PF_SMC);
index e4829a2f46baf8fb7c129ba4ffeca24c5189134e..a1467e411645c00c0ee847caa70d23d386532160 100644 (file)
@@ -118,7 +118,7 @@ struct smc_connection {
        struct rb_node          alert_node;
        struct smc_link_group   *lgr;           /* link group of connection */
        u32                     alert_token_local; /* unique conn. id */
-       u8                      peer_conn_idx;  /* from tcp handshake */
+       u8                      peer_rmbe_idx;  /* from tcp handshake */
        int                     peer_rmbe_size; /* size of peer rx buffer */
        atomic_t                peer_rmbe_space;/* remaining free bytes in peer
                                                 * rmbe
@@ -126,9 +126,7 @@ struct smc_connection {
        int                     rtoken_idx;     /* idx to peer RMB rkey/addr */
 
        struct smc_buf_desc     *sndbuf_desc;   /* send buffer descriptor */
-       int                     sndbuf_size;    /* sndbuf size <== sock wmem */
        struct smc_buf_desc     *rmb_desc;      /* RMBE descriptor */
-       int                     rmbe_size;      /* RMBE size <== sock rmem */
        int                     rmbe_size_short;/* compressed notation */
        int                     rmbe_update_limit;
                                                /* lower limit for consumer
@@ -153,6 +151,7 @@ struct smc_connection {
        u16                     tx_cdc_seq;     /* sequence # for CDC send */
        spinlock_t              send_lock;      /* protect wr_sends */
        struct delayed_work     tx_work;        /* retry of smc_cdc_msg_send */
+       u32                     tx_off;         /* base offset in peer rmb */
 
        struct smc_host_cdc_msg local_rx_ctrl;  /* filled during event_handl.
                                                 * .prod cf. TCP rcv_nxt
@@ -164,6 +163,9 @@ struct smc_connection {
        atomic_t                bytes_to_rcv;   /* arrived data,
                                                 * not yet received
                                                 */
+       atomic_t                splice_pending; /* number of spliced bytes
+                                                * pending processing
+                                                */
 #ifndef KERNEL_HAS_ATOMIC64
        spinlock_t              acurs_lock;     /* protect cursors */
 #endif
@@ -180,6 +182,10 @@ struct smc_sock {                          /* smc sock container */
        struct list_head        accept_q;       /* sockets to be accepted */
        spinlock_t              accept_q_lock;  /* protects accept_q */
        bool                    use_fallback;   /* fallback to tcp */
+       int                     sockopt_defer_accept;
+                                               /* sockopt TCP_DEFER_ACCEPT
+                                                * value
+                                                */
        u8                      wait_close_tx_prepared : 1;
                                                /* shutdown wr or close
                                                 * started, waiting for unsent
@@ -214,41 +220,6 @@ static inline u32 ntoh24(u8 *net)
        return be32_to_cpu(t);
 }
 
-#define SMC_BUF_MIN_SIZE 16384         /* minimum size of an RMB */
-
-#define SMC_RMBE_SIZES 16      /* number of distinct sizes for an RMBE */
-/* theoretically, the RFC states that largest size would be 512K,
- * i.e. compressed 5 and thus 6 sizes (0..5), despite
- * struct smc_clc_msg_accept_confirm.rmbe_size being a 4 bit value (0..15)
- */
-
-/* convert the RMB size into the compressed notation - minimum 16K.
- * In contrast to plain ilog2, this rounds towards the next power of 2,
- * so the socket application gets at least its desired sndbuf / rcvbuf size.
- */
-static inline u8 smc_compress_bufsize(int size)
-{
-       u8 compressed;
-
-       if (size <= SMC_BUF_MIN_SIZE)
-               return 0;
-
-       size = (size - 1) >> 14;
-       compressed = ilog2(size) + 1;
-       if (compressed >= SMC_RMBE_SIZES)
-               compressed = SMC_RMBE_SIZES - 1;
-       return compressed;
-}
-
-/* convert the RMB size from compressed notation into integer */
-static inline int smc_uncompress_bufsize(u8 compressed)
-{
-       u32 size;
-
-       size = 0x00000001 << (((int)compressed) + 14);
-       return (int)size;
-}
-
 #ifdef CONFIG_XFRM
 static inline bool using_ipsec(struct smc_sock *smc)
 {
@@ -262,12 +233,6 @@ static inline bool using_ipsec(struct smc_sock *smc)
 }
 #endif
 
-struct smc_clc_msg_local;
-
-void smc_conn_free(struct smc_connection *conn);
-int smc_conn_create(struct smc_sock *smc,
-                   struct smc_ib_device *smcibdev, u8 ibport,
-                   struct smc_clc_msg_local *lcl, int srv_first_contact);
 struct sock *smc_accept_dequeue(struct sock *parent, struct socket *new_sock);
 void smc_close_non_accepted(struct sock *sk);
 
index b42395d24cba50b0e30c39ab6275cdaa1eca3235..8d2c079c87b070cf23b30a02e9a2c5f8cfca8345 100644 (file)
@@ -44,13 +44,13 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd,
        smc = container_of(cdcpend->conn, struct smc_sock, conn);
        bh_lock_sock(&smc->sk);
        if (!wc_status) {
-               diff = smc_curs_diff(cdcpend->conn->sndbuf_size,
+               diff = smc_curs_diff(cdcpend->conn->sndbuf_desc->len,
                                     &cdcpend->conn->tx_curs_fin,
                                     &cdcpend->cursor);
                /* sndbuf_space is decreased in smc_sendmsg */
                smp_mb__before_atomic();
                atomic_add(diff, &cdcpend->conn->sndbuf_space);
-               /* guarantee 0 <= sndbuf_space <= sndbuf_size */
+               /* guarantee 0 <= sndbuf_space <= sndbuf_desc->len */
                smp_mb__after_atomic();
                smc_curs_write(&cdcpend->conn->tx_curs_fin,
                               smc_curs_read(&cdcpend->cursor, cdcpend->conn),
@@ -82,7 +82,7 @@ static inline void smc_cdc_add_pending_send(struct smc_connection *conn,
                sizeof(struct smc_cdc_msg) > SMC_WR_BUF_SIZE,
                "must increase SMC_WR_BUF_SIZE to at least sizeof(struct smc_cdc_msg)");
        BUILD_BUG_ON_MSG(
-               offsetof(struct smc_cdc_msg, reserved) > SMC_WR_TX_SIZE,
+               sizeof(struct smc_cdc_msg) != SMC_WR_TX_SIZE,
                "must adapt SMC_WR_TX_SIZE to sizeof(struct smc_cdc_msg); if not all smc_wr upper layer protocols use the same message size any more, must start to set link->wr_tx_sges[i].length on each individual smc_wr_tx_send()");
        BUILD_BUG_ON_MSG(
                sizeof(struct smc_cdc_tx_pend) > SMC_WR_TX_PEND_PRIV_SIZE,
@@ -165,19 +165,12 @@ static inline bool smc_cdc_before(u16 seq1, u16 seq2)
 }
 
 static void smc_cdc_msg_recv_action(struct smc_sock *smc,
-                                   struct smc_link *link,
                                    struct smc_cdc_msg *cdc)
 {
        union smc_host_cursor cons_old, prod_old;
        struct smc_connection *conn = &smc->conn;
        int diff_cons, diff_prod;
 
-       if (!cdc->prod_flags.failover_validation) {
-               if (smc_cdc_before(ntohs(cdc->seqno),
-                                  conn->local_rx_ctrl.seqno))
-                       /* received seqno is old */
-                       return;
-       }
        smc_curs_write(&prod_old,
                       smc_curs_read(&conn->local_rx_ctrl.prod, conn),
                       conn);
@@ -198,13 +191,13 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc,
                smp_mb__after_atomic();
        }
 
-       diff_prod = smc_curs_diff(conn->rmbe_size, &prod_old,
+       diff_prod = smc_curs_diff(conn->rmb_desc->len, &prod_old,
                                  &conn->local_rx_ctrl.prod);
        if (diff_prod) {
                /* bytes_to_rcv is decreased in smc_recvmsg */
                smp_mb__before_atomic();
                atomic_add(diff_prod, &conn->bytes_to_rcv);
-               /* guarantee 0 <= bytes_to_rcv <= rmbe_size */
+               /* guarantee 0 <= bytes_to_rcv <= rmb_desc->len */
                smp_mb__after_atomic();
                smc->sk.sk_data_ready(&smc->sk);
        } else if ((conn->local_rx_ctrl.prod_flags.write_blocked) ||
@@ -236,26 +229,11 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc,
 }
 
 /* called under tasklet context */
-static inline void smc_cdc_msg_recv(struct smc_cdc_msg *cdc,
-                                   struct smc_link *link, u64 wr_id)
+static void smc_cdc_msg_recv(struct smc_sock *smc, struct smc_cdc_msg *cdc)
 {
-       struct smc_link_group *lgr = container_of(link, struct smc_link_group,
-                                                 lnk[SMC_SINGLE_LINK]);
-       struct smc_connection *connection;
-       struct smc_sock *smc;
-
-       /* lookup connection */
-       read_lock_bh(&lgr->conns_lock);
-       connection = smc_lgr_find_conn(ntohl(cdc->token), lgr);
-       if (!connection) {
-               read_unlock_bh(&lgr->conns_lock);
-               return;
-       }
-       smc = container_of(connection, struct smc_sock, conn);
        sock_hold(&smc->sk);
-       read_unlock_bh(&lgr->conns_lock);
        bh_lock_sock(&smc->sk);
-       smc_cdc_msg_recv_action(smc, link, cdc);
+       smc_cdc_msg_recv_action(smc, cdc);
        bh_unlock_sock(&smc->sk);
        sock_put(&smc->sk); /* no free sk in softirq-context */
 }
@@ -266,12 +244,31 @@ static void smc_cdc_rx_handler(struct ib_wc *wc, void *buf)
 {
        struct smc_link *link = (struct smc_link *)wc->qp->qp_context;
        struct smc_cdc_msg *cdc = buf;
+       struct smc_connection *conn;
+       struct smc_link_group *lgr;
+       struct smc_sock *smc;
 
        if (wc->byte_len < offsetof(struct smc_cdc_msg, reserved))
                return; /* short message */
        if (cdc->len != SMC_WR_TX_SIZE)
                return; /* invalid message */
-       smc_cdc_msg_recv(cdc, link, wc->wr_id);
+
+       /* lookup connection */
+       lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]);
+       read_lock_bh(&lgr->conns_lock);
+       conn = smc_lgr_find_conn(ntohl(cdc->token), lgr);
+       read_unlock_bh(&lgr->conns_lock);
+       if (!conn)
+               return;
+       smc = container_of(conn, struct smc_sock, conn);
+
+       if (!cdc->prod_flags.failover_validation) {
+               if (smc_cdc_before(ntohs(cdc->seqno),
+                                  conn->local_rx_ctrl.seqno))
+                       /* received seqno is old */
+                       return;
+       }
+       smc_cdc_msg_recv(smc, cdc);
 }
 
 static struct smc_wr_rx_handler smc_cdc_rx_handlers[] = {
index ab240b37ad11459ba552422e6b87078b358a008c..d2012fd221001030662dbe796ee4e0b9c8dfbedd 100644 (file)
@@ -48,7 +48,7 @@ struct smc_cdc_msg {
        struct smc_cdc_producer_flags   prod_flags;
        struct smc_cdc_conn_state_flags conn_state_flags;
        u8                              reserved[18];
-} __aligned(8);
+} __packed;                                    /* format defined in RFC7609 */
 
 static inline bool smc_cdc_rxed_any_close(struct smc_connection *conn)
 {
index 3a988c22f627fe4180b6720de677e183ff06253b..717449b1da0b73d924488d43cd04ed0871607d1b 100644 (file)
@@ -316,7 +316,7 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
        if (clcm->type == SMC_CLC_DECLINE) {
                reason_code = SMC_CLC_DECL_REPLY;
                if (((struct smc_clc_msg_decline *)buf)->hdr.flag) {
-                       smc->conn.lgr->sync_err = true;
+                       smc->conn.lgr->sync_err = 1;
                        smc_lgr_terminate(smc->conn.lgr);
                }
        }
@@ -442,7 +442,7 @@ int smc_clc_send_confirm(struct smc_sock *smc)
        hton24(cclc.qpn, link->roce_qp->qp_num);
        cclc.rmb_rkey =
                htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
-       cclc.conn_idx = 1; /* for now: 1 RMB = 1 RMBE */
+       cclc.rmbe_idx = 1; /* for now: 1 RMB = 1 RMBE */
        cclc.rmbe_alert_token = htonl(conn->alert_token_local);
        cclc.qp_mtu = min(link->path_mtu, link->peer_mtu);
        cclc.rmbe_size = conn->rmbe_size_short;
@@ -494,7 +494,7 @@ int smc_clc_send_accept(struct smc_sock *new_smc, int srv_first_contact)
        hton24(aclc.qpn, link->roce_qp->qp_num);
        aclc.rmb_rkey =
                htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
-       aclc.conn_idx = 1;                      /* as long as 1 RMB = 1 RMBE */
+       aclc.rmbe_idx = 1;                      /* as long as 1 RMB = 1 RMBE */
        aclc.rmbe_alert_token = htonl(conn->alert_token_local);
        aclc.qp_mtu = link->path_mtu;
        aclc.rmbe_size = conn->rmbe_size_short,
index 63bf1dc2c1f9731d97374e68ed7f804b3bfaba2f..41ff9ea96139ced3b2a6760af2f310d40e6f58a7 100644 (file)
@@ -97,7 +97,7 @@ struct smc_clc_msg_accept_confirm {   /* clc accept / confirm message */
        struct smc_clc_msg_local lcl;
        u8 qpn[3];              /* QP number */
        __be32 rmb_rkey;        /* RMB rkey */
-       u8 conn_idx;            /* Connection index, which RMBE in RMB */
+       u8 rmbe_idx;            /* Index of RMBE in RMB */
        __be32 rmbe_alert_token;/* unique connection id */
 #if defined(__BIG_ENDIAN_BITFIELD)
        u8 rmbe_size : 4,       /* RMBE buf size (compressed notation) */
index f44f6803f7ff2c8585caf555b7b4aea4168b680b..1e5c0e90a706bdfa85af52efcb6aa6b43f9436eb 100644 (file)
 #define SMC_LGR_FREE_DELAY_SERV                (600 * HZ)
 #define SMC_LGR_FREE_DELAY_CLNT                (SMC_LGR_FREE_DELAY_SERV + 10)
 
-static u32 smc_lgr_num;                        /* unique link group number */
+static struct smc_lgr_list smc_lgr_list = {    /* established link groups */
+       .lock = __SPIN_LOCK_UNLOCKED(smc_lgr_list.lock),
+       .list = LIST_HEAD_INIT(smc_lgr_list.list),
+       .num = 0,
+};
+
+static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
+                        struct smc_buf_desc *buf_desc);
 
 static void smc_lgr_schedule_free_work(struct smc_link_group *lgr)
 {
@@ -145,8 +152,11 @@ static void smc_lgr_free_work(struct work_struct *work)
        list_del_init(&lgr->list); /* remove from smc_lgr_list */
 free:
        spin_unlock_bh(&smc_lgr_list.lock);
-       if (!delayed_work_pending(&lgr->free_work))
+       if (!delayed_work_pending(&lgr->free_work)) {
+               if (lgr->lnk[SMC_SINGLE_LINK].state != SMC_LNK_INACTIVE)
+                       smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]);
                smc_lgr_free(lgr);
+       }
 }
 
 /* create a new SMC link group */
@@ -166,7 +176,7 @@ static int smc_lgr_create(struct smc_sock *smc,
                goto out;
        }
        lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
-       lgr->sync_err = false;
+       lgr->sync_err = 0;
        memcpy(lgr->peer_systemid, peer_systemid, SMC_SYSTEMID_LEN);
        lgr->vlan_id = vlan_id;
        rwlock_init(&lgr->sndbufs_lock);
@@ -175,8 +185,8 @@ static int smc_lgr_create(struct smc_sock *smc,
                INIT_LIST_HEAD(&lgr->sndbufs[i]);
                INIT_LIST_HEAD(&lgr->rmbs[i]);
        }
-       smc_lgr_num += SMC_LGR_NUM_INCR;
-       memcpy(&lgr->id, (u8 *)&smc_lgr_num, SMC_LGR_ID_SIZE);
+       smc_lgr_list.num += SMC_LGR_NUM_INCR;
+       memcpy(&lgr->id, (u8 *)&smc_lgr_list.num, SMC_LGR_ID_SIZE);
        INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work);
        lgr->conns_all = RB_ROOT;
 
@@ -191,9 +201,12 @@ static int smc_lgr_create(struct smc_sock *smc,
                smc_ib_setup_per_ibdev(smcibdev);
        get_random_bytes(rndvec, sizeof(rndvec));
        lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) + (rndvec[2] << 16);
-       rc = smc_wr_alloc_link_mem(lnk);
+       rc = smc_llc_link_init(lnk);
        if (rc)
                goto free_lgr;
+       rc = smc_wr_alloc_link_mem(lnk);
+       if (rc)
+               goto clear_llc_lnk;
        rc = smc_ib_create_protection_domain(lnk);
        if (rc)
                goto free_link_mem;
@@ -203,10 +216,6 @@ static int smc_lgr_create(struct smc_sock *smc,
        rc = smc_wr_create_link(lnk);
        if (rc)
                goto destroy_qp;
-       init_completion(&lnk->llc_confirm);
-       init_completion(&lnk->llc_confirm_resp);
-       init_completion(&lnk->llc_add);
-       init_completion(&lnk->llc_add_resp);
 
        smc->conn.lgr = lgr;
        rwlock_init(&lgr->conns_lock);
@@ -221,6 +230,8 @@ static int smc_lgr_create(struct smc_sock *smc,
        smc_ib_dealloc_protection_domain(lnk);
 free_link_mem:
        smc_wr_free_link_mem(lnk);
+clear_llc_lnk:
+       smc_llc_link_clear(lnk);
 free_lgr:
        kfree(lgr);
 out:
@@ -229,14 +240,22 @@ static int smc_lgr_create(struct smc_sock *smc,
 
 static void smc_buf_unuse(struct smc_connection *conn)
 {
-       if (conn->sndbuf_desc) {
+       if (conn->sndbuf_desc)
                conn->sndbuf_desc->used = 0;
-               conn->sndbuf_size = 0;
-       }
        if (conn->rmb_desc) {
-               conn->rmb_desc->reused = true;
-               conn->rmb_desc->used = 0;
-               conn->rmbe_size = 0;
+               if (!conn->rmb_desc->regerr) {
+                       conn->rmb_desc->reused = 1;
+                       conn->rmb_desc->used = 0;
+               } else {
+                       /* buf registration failed, reuse not possible */
+                       struct smc_link_group *lgr = conn->lgr;
+
+                       write_lock_bh(&lgr->rmbs_lock);
+                       list_del(&conn->rmb_desc->list);
+                       write_unlock_bh(&lgr->rmbs_lock);
+
+                       smc_buf_free(lgr, true, conn->rmb_desc);
+               }
        }
 }
 
@@ -253,6 +272,7 @@ void smc_conn_free(struct smc_connection *conn)
 static void smc_link_clear(struct smc_link *lnk)
 {
        lnk->peer_qpn = 0;
+       smc_llc_link_clear(lnk);
        smc_ib_modify_qp_reset(lnk);
        smc_wr_free_link(lnk);
        smc_ib_destroy_queue_pair(lnk);
@@ -260,9 +280,11 @@ static void smc_link_clear(struct smc_link *lnk)
        smc_wr_free_link_mem(lnk);
 }
 
-static void smc_buf_free(struct smc_buf_desc *buf_desc, struct smc_link *lnk,
-                        bool is_rmb)
+static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
+                        struct smc_buf_desc *buf_desc)
 {
+       struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
+
        if (is_rmb) {
                if (buf_desc->mr_rx[SMC_SINGLE_LINK])
                        smc_ib_put_memory_region(
@@ -274,14 +296,13 @@ static void smc_buf_free(struct smc_buf_desc *buf_desc, struct smc_link *lnk,
                                    DMA_TO_DEVICE);
        }
        sg_free_table(&buf_desc->sgt[SMC_SINGLE_LINK]);
-       if (buf_desc->cpu_addr)
-               free_pages((unsigned long)buf_desc->cpu_addr, buf_desc->order);
+       if (buf_desc->pages)
+               __free_pages(buf_desc->pages, buf_desc->order);
        kfree(buf_desc);
 }
 
 static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb)
 {
-       struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
        struct smc_buf_desc *buf_desc, *bf_desc;
        struct list_head *buf_list;
        int i;
@@ -294,7 +315,7 @@ static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb)
                list_for_each_entry_safe(buf_desc, bf_desc, buf_list,
                                         list) {
                        list_del(&buf_desc->list);
-                       smc_buf_free(buf_desc, lnk, is_rmb);
+                       smc_buf_free(lgr, is_rmb, buf_desc);
                }
        }
 }
@@ -331,7 +352,11 @@ void smc_lgr_terminate(struct smc_link_group *lgr)
        struct smc_sock *smc;
        struct rb_node *node;
 
+       if (lgr->terminating)
+               return; /* lgr already terminating */
+       lgr->terminating = 1;
        smc_lgr_forget(lgr);
+       smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]);
 
        write_lock_bh(&lgr->conns_lock);
        node = rb_first(&lgr->conns_all);
@@ -352,13 +377,26 @@ void smc_lgr_terminate(struct smc_link_group *lgr)
        smc_lgr_schedule_free_work(lgr);
 }
 
+/* Called when IB port is terminated */
+void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport)
+{
+       struct smc_link_group *lgr, *l;
+
+       list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) {
+               if (lgr->lnk[SMC_SINGLE_LINK].smcibdev == smcibdev &&
+                   lgr->lnk[SMC_SINGLE_LINK].ibport == ibport)
+                       smc_lgr_terminate(lgr);
+       }
+}
+
 /* Determine vlan of internal TCP socket.
  * @vlan_id: address to store the determined vlan id into
  */
 static int smc_vlan_by_tcpsk(struct socket *clcsock, unsigned short *vlan_id)
 {
        struct dst_entry *dst = sk_dst_get(clcsock->sk);
-       int rc = 0;
+       struct net_device *ndev;
+       int i, nest_lvl, rc = 0;
 
        *vlan_id = 0;
        if (!dst) {
@@ -370,8 +408,27 @@ static int smc_vlan_by_tcpsk(struct socket *clcsock, unsigned short *vlan_id)
                goto out_rel;
        }
 
-       if (is_vlan_dev(dst->dev))
-               *vlan_id = vlan_dev_vlan_id(dst->dev);
+       ndev = dst->dev;
+       if (is_vlan_dev(ndev)) {
+               *vlan_id = vlan_dev_vlan_id(ndev);
+               goto out_rel;
+       }
+
+       rtnl_lock();
+       nest_lvl = dev_get_nest_level(ndev);
+       for (i = 0; i < nest_lvl; i++) {
+               struct list_head *lower = &ndev->adj_list.lower;
+
+               if (list_empty(lower))
+                       break;
+               lower = lower->next;
+               ndev = (struct net_device *)netdev_lower_get_next(ndev, &lower);
+               if (is_vlan_dev(ndev)) {
+                       *vlan_id = vlan_dev_vlan_id(ndev);
+                       break;
+               }
+       }
+       rtnl_unlock();
 
 out_rel:
        dst_release(dst);
@@ -416,10 +473,10 @@ int smc_conn_create(struct smc_sock *smc,
                    struct smc_clc_msg_local *lcl, int srv_first_contact)
 {
        struct smc_connection *conn = &smc->conn;
+       int local_contact = SMC_FIRST_CONTACT;
        struct smc_link_group *lgr;
        unsigned short vlan_id;
        enum smc_lgr_role role;
-       int local_contact = SMC_FIRST_CONTACT;
        int rc = 0;
 
        role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
@@ -485,14 +542,39 @@ int smc_conn_create(struct smc_sock *smc,
        return rc ? rc : local_contact;
 }
 
+/* convert the RMB size into the compressed notation - minimum 16K.
+ * In contrast to plain ilog2, this rounds towards the next power of 2,
+ * so the socket application gets at least its desired sndbuf / rcvbuf size.
+ */
+static u8 smc_compress_bufsize(int size)
+{
+       u8 compressed;
+
+       if (size <= SMC_BUF_MIN_SIZE)
+               return 0;
+
+       size = (size - 1) >> 14;
+       compressed = ilog2(size) + 1;
+       if (compressed >= SMC_RMBE_SIZES)
+               compressed = SMC_RMBE_SIZES - 1;
+       return compressed;
+}
+
+/* convert the RMB size from compressed notation into integer */
+int smc_uncompress_bufsize(u8 compressed)
+{
+       u32 size;
+
+       size = 0x00000001 << (((int)compressed) + 14);
+       return (int)size;
+}
+
 /* try to reuse a sndbuf or rmb description slot for a certain
  * buffer size; if not available, return NULL
  */
-static inline
-struct smc_buf_desc *smc_buf_get_slot(struct smc_link_group *lgr,
-                                     int compressed_bufsize,
-                                     rwlock_t *lock,
-                                     struct list_head *buf_list)
+static struct smc_buf_desc *smc_buf_get_slot(int compressed_bufsize,
+                                            rwlock_t *lock,
+                                            struct list_head *buf_list)
 {
        struct smc_buf_desc *buf_slot;
 
@@ -528,23 +610,23 @@ static struct smc_buf_desc *smc_new_buf_create(struct smc_link_group *lgr,
        if (!buf_desc)
                return ERR_PTR(-ENOMEM);
 
-       buf_desc->cpu_addr =
-               (void *)__get_free_pages(GFP_KERNEL | __GFP_NOWARN |
-                                        __GFP_NOMEMALLOC |
-                                        __GFP_NORETRY | __GFP_ZERO,
-                                        get_order(bufsize));
-       if (!buf_desc->cpu_addr) {
+       buf_desc->order = get_order(bufsize);
+       buf_desc->pages = alloc_pages(GFP_KERNEL | __GFP_NOWARN |
+                                     __GFP_NOMEMALLOC | __GFP_COMP |
+                                     __GFP_NORETRY | __GFP_ZERO,
+                                     buf_desc->order);
+       if (!buf_desc->pages) {
                kfree(buf_desc);
                return ERR_PTR(-EAGAIN);
        }
-       buf_desc->order = get_order(bufsize);
+       buf_desc->cpu_addr = (void *)page_address(buf_desc->pages);
 
        /* build the sg table from the pages */
        lnk = &lgr->lnk[SMC_SINGLE_LINK];
        rc = sg_alloc_table(&buf_desc->sgt[SMC_SINGLE_LINK], 1,
                            GFP_KERNEL);
        if (rc) {
-               smc_buf_free(buf_desc, lnk, is_rmb);
+               smc_buf_free(lgr, is_rmb, buf_desc);
                return ERR_PTR(rc);
        }
        sg_set_buf(buf_desc->sgt[SMC_SINGLE_LINK].sgl,
@@ -555,7 +637,7 @@ static struct smc_buf_desc *smc_new_buf_create(struct smc_link_group *lgr,
                               is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
        /* SMC protocol depends on mapping to one DMA address only */
        if (rc != 1)  {
-               smc_buf_free(buf_desc, lnk, is_rmb);
+               smc_buf_free(lgr, is_rmb, buf_desc);
                return ERR_PTR(-EAGAIN);
        }
 
@@ -566,19 +648,20 @@ static struct smc_buf_desc *smc_new_buf_create(struct smc_link_group *lgr,
                                              IB_ACCESS_LOCAL_WRITE,
                                              buf_desc);
                if (rc) {
-                       smc_buf_free(buf_desc, lnk, is_rmb);
+                       smc_buf_free(lgr, is_rmb, buf_desc);
                        return ERR_PTR(rc);
                }
        }
 
+       buf_desc->len = bufsize;
        return buf_desc;
 }
 
 static int __smc_buf_create(struct smc_sock *smc, bool is_rmb)
 {
+       struct smc_buf_desc *buf_desc = ERR_PTR(-ENOMEM);
        struct smc_connection *conn = &smc->conn;
        struct smc_link_group *lgr = conn->lgr;
-       struct smc_buf_desc *buf_desc = ERR_PTR(-ENOMEM);
        struct list_head *buf_list;
        int bufsize, bufsize_short;
        int sk_buf_size;
@@ -606,7 +689,7 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_rmb)
                        continue;
 
                /* check for reusable slot in the link group */
-               buf_desc = smc_buf_get_slot(lgr, bufsize_short, lock, buf_list);
+               buf_desc = smc_buf_get_slot(bufsize_short, lock, buf_list);
                if (buf_desc) {
                        memset(buf_desc->cpu_addr, 0, bufsize);
                        break; /* found reusable slot */
@@ -630,14 +713,12 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_rmb)
 
        if (is_rmb) {
                conn->rmb_desc = buf_desc;
-               conn->rmbe_size = bufsize;
                conn->rmbe_size_short = bufsize_short;
                smc->sk.sk_rcvbuf = bufsize * 2;
                atomic_set(&conn->bytes_to_rcv, 0);
                conn->rmbe_update_limit = smc_rmb_wnd_update_limit(bufsize);
        } else {
                conn->sndbuf_desc = buf_desc;
-               conn->sndbuf_size = bufsize;
                smc->sk.sk_sndbuf = bufsize * 2;
                atomic_set(&conn->sndbuf_space, bufsize);
        }
@@ -693,8 +774,7 @@ int smc_buf_create(struct smc_sock *smc)
        /* create rmb */
        rc = __smc_buf_create(smc, true);
        if (rc)
-               smc_buf_free(smc->conn.sndbuf_desc,
-                            &smc->conn.lgr->lnk[SMC_SINGLE_LINK], false);
+               smc_buf_free(smc->conn.lgr, false, smc->conn.sndbuf_desc);
        return rc;
 }
 
@@ -761,3 +841,21 @@ int smc_rmb_rtoken_handling(struct smc_connection *conn,
                return conn->rtoken_idx;
        return 0;
 }
+
+/* Called (from smc_exit) when module is removed */
+void smc_core_exit(void)
+{
+       struct smc_link_group *lgr, *lg;
+       LIST_HEAD(lgr_freeing_list);
+
+       spin_lock_bh(&smc_lgr_list.lock);
+       if (!list_empty(&smc_lgr_list.list))
+               list_splice_init(&smc_lgr_list.list, &lgr_freeing_list);
+       spin_unlock_bh(&smc_lgr_list.lock);
+       list_for_each_entry_safe(lgr, lg, &lgr_freeing_list, list) {
+               list_del_init(&lgr->list);
+               smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]);
+               cancel_delayed_work_sync(&lgr->free_work);
+               smc_lgr_free(lgr); /* free link group */
+       }
+}
index 07e2a393e6d98b672d4160ff1486c0bd6e43df4b..93cb3523bf5093dbe4e30d34bd02e05eab98c5c3 100644 (file)
 struct smc_lgr_list {                  /* list of link group definition */
        struct list_head        list;
        spinlock_t              lock;   /* protects list of link groups */
+       u32                     num;    /* unique link group number */
 };
 
-extern struct smc_lgr_list     smc_lgr_list; /* list of link groups */
-
 enum smc_lgr_role {            /* possible roles of a link group */
        SMC_CLNT,       /* client */
        SMC_SERV        /* server */
@@ -79,6 +78,7 @@ struct smc_link {
        dma_addr_t              wr_rx_dma_addr; /* DMA address of wr_rx_bufs */
        u64                     wr_rx_id;       /* seq # of last recv WR */
        u32                     wr_rx_cnt;      /* number of WR recv buffers */
+       unsigned long           wr_rx_tstamp;   /* jiffies when last buf rx */
 
        struct ib_reg_wr        wr_reg;         /* WR register memory region */
        wait_queue_head_t       wr_reg_wait;    /* wait for wr_reg result */
@@ -95,12 +95,18 @@ struct smc_link {
        u8                      link_id;        /* unique # within link group */
 
        enum smc_link_state     state;          /* state of link */
+       struct workqueue_struct *llc_wq;        /* single thread work queue */
        struct completion       llc_confirm;    /* wait for rx of conf link */
        struct completion       llc_confirm_resp; /* wait 4 rx of cnf lnk rsp */
        int                     llc_confirm_rc; /* rc from confirm link msg */
        int                     llc_confirm_resp_rc; /* rc from conf_resp msg */
        struct completion       llc_add;        /* wait for rx of add link */
        struct completion       llc_add_resp;   /* wait for rx of add link rsp*/
+       struct delayed_work     llc_testlink_wrk; /* testlink worker */
+       struct completion       llc_testlink_resp; /* wait for rx of testlink */
+       int                     llc_testlink_time; /* testlink interval */
+       struct completion       llc_confirm_rkey; /* wait 4 rx of cnf rkey */
+       int                     llc_confirm_rkey_rc; /* rc from cnf rkey msg */
 };
 
 /* For now we just allow one parallel link per link group. The SMC protocol
@@ -116,6 +122,8 @@ struct smc_link {
 struct smc_buf_desc {
        struct list_head        list;
        void                    *cpu_addr;      /* virtual address of buffer */
+       struct page             *pages;
+       int                     len;            /* length of buffer */
        struct sg_table         sgt[SMC_LINKS_PER_LGR_MAX];/* virtual buffer */
        struct ib_mr            *mr_rx[SMC_LINKS_PER_LGR_MAX];
                                                /* for rmb only: memory region
@@ -123,7 +131,8 @@ struct smc_buf_desc {
                                                 */
        u32                     order;          /* allocation order */
        u32                     used;           /* currently used / unused */
-       bool                    reused;         /* new created / reused */
+       u8                      reused  : 1;    /* new created / reused */
+       u8                      regerr  : 1;    /* err during registration */
 };
 
 struct smc_rtoken {                            /* address/key of remote RMB */
@@ -132,6 +141,12 @@ struct smc_rtoken {                                /* address/key of remote RMB */
 };
 
 #define SMC_LGR_ID_SIZE                4
+#define SMC_BUF_MIN_SIZE       16384   /* minimum size of an RMB */
+#define SMC_RMBE_SIZES         16      /* number of distinct RMBE sizes */
+/* theoretically, the RFC states that largest size would be 512K,
+ * i.e. compressed 5 and thus 6 sizes (0..5), despite
+ * struct smc_clc_msg_accept_confirm.rmbe_size being a 4 bit value (0..15)
+ */
 
 struct smc_link_group {
        struct list_head        list;
@@ -157,7 +172,8 @@ struct smc_link_group {
 
        u8                      id[SMC_LGR_ID_SIZE];    /* unique lgr id */
        struct delayed_work     free_work;      /* delayed freeing of an lgr */
-       bool                    sync_err;       /* lgr no longer fits to peer */
+       u8                      sync_err : 1;   /* lgr no longer fits to peer */
+       u8                      terminating : 1;/* lgr is terminating */
 };
 
 /* Find the connection associated with the given alert token in the link group.
@@ -195,11 +211,14 @@ static inline struct smc_connection *smc_lgr_find_conn(
 
 struct smc_sock;
 struct smc_clc_msg_accept_confirm;
+struct smc_clc_msg_local;
 
 void smc_lgr_free(struct smc_link_group *lgr);
 void smc_lgr_forget(struct smc_link_group *lgr);
 void smc_lgr_terminate(struct smc_link_group *lgr);
+void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport);
 int smc_buf_create(struct smc_sock *smc);
+int smc_uncompress_bufsize(u8 compressed);
 int smc_rmb_rtoken_handling(struct smc_connection *conn,
                            struct smc_clc_msg_accept_confirm *clc);
 int smc_rtoken_add(struct smc_link_group *lgr, __be64 nw_vaddr, __be32 nw_rkey);
@@ -208,4 +227,9 @@ void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn);
 void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn);
 void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn);
 void smc_rmb_sync_sg_for_device(struct smc_connection *conn);
+void smc_conn_free(struct smc_connection *conn);
+int smc_conn_create(struct smc_sock *smc,
+                   struct smc_ib_device *smcibdev, u8 ibport,
+                   struct smc_clc_msg_local *lcl, int srv_first_contact);
+void smc_core_exit(void);
 #endif
index 427b91c1c964f05e01c1cf221a397dc8d6d46628..839354402215a836556fd881c350ca0ddb6b1c1b 100644 (file)
@@ -38,17 +38,27 @@ static void smc_diag_msg_common_fill(struct smc_diag_msg *r, struct sock *sk)
 {
        struct smc_sock *smc = smc_sk(sk);
 
-       r->diag_family = sk->sk_family;
        if (!smc->clcsock)
                return;
        r->id.idiag_sport = htons(smc->clcsock->sk->sk_num);
        r->id.idiag_dport = smc->clcsock->sk->sk_dport;
        r->id.idiag_if = smc->clcsock->sk->sk_bound_dev_if;
        sock_diag_save_cookie(sk, r->id.idiag_cookie);
-       memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src));
-       memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst));
-       r->id.idiag_src[0] = smc->clcsock->sk->sk_rcv_saddr;
-       r->id.idiag_dst[0] = smc->clcsock->sk->sk_daddr;
+       if (sk->sk_protocol == SMCPROTO_SMC) {
+               r->diag_family = PF_INET;
+               memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src));
+               memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst));
+               r->id.idiag_src[0] = smc->clcsock->sk->sk_rcv_saddr;
+               r->id.idiag_dst[0] = smc->clcsock->sk->sk_daddr;
+#if IS_ENABLED(CONFIG_IPV6)
+       } else if (sk->sk_protocol == SMCPROTO_SMC6) {
+               r->diag_family = PF_INET6;
+               memcpy(&r->id.idiag_src, &smc->clcsock->sk->sk_v6_rcv_saddr,
+                      sizeof(smc->clcsock->sk->sk_v6_rcv_saddr));
+               memcpy(&r->id.idiag_dst, &smc->clcsock->sk->sk_v6_daddr,
+                      sizeof(smc->clcsock->sk->sk_v6_daddr));
+#endif
+       }
 }
 
 static int smc_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb,
@@ -91,8 +101,9 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
                struct smc_connection *conn = &smc->conn;
                struct smc_diag_conninfo cinfo = {
                        .token = conn->alert_token_local,
-                       .sndbuf_size = conn->sndbuf_size,
-                       .rmbe_size = conn->rmbe_size,
+                       .sndbuf_size = conn->sndbuf_desc ?
+                               conn->sndbuf_desc->len : 0,
+                       .rmbe_size = conn->rmb_desc ? conn->rmb_desc->len : 0,
                        .peer_rmbe_size = conn->peer_rmbe_size,
 
                        .rx_prod.wrap = conn->local_rx_ctrl.prod.wrap,
@@ -153,7 +164,8 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
        return -EMSGSIZE;
 }
 
-static int smc_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
+static int smc_diag_dump_proto(struct proto *prot, struct sk_buff *skb,
+                              struct netlink_callback *cb)
 {
        struct net *net = sock_net(skb->sk);
        struct nlattr *bc = NULL;
@@ -161,8 +173,8 @@ static int smc_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
        struct sock *sk;
        int rc = 0;
 
-       read_lock(&smc_proto.h.smc_hash->lock);
-       head = &smc_proto.h.smc_hash->ht;
+       read_lock(&prot->h.smc_hash->lock);
+       head = &prot->h.smc_hash->ht;
        if (hlist_empty(head))
                goto out;
 
@@ -175,7 +187,17 @@ static int smc_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
        }
 
 out:
-       read_unlock(&smc_proto.h.smc_hash->lock);
+       read_unlock(&prot->h.smc_hash->lock);
+       return rc;
+}
+
+static int smc_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+       int rc = 0;
+
+       rc = smc_diag_dump_proto(&smc_proto, skb, cb);
+       if (!rc)
+               rc = smc_diag_dump_proto(&smc_proto6, skb, cb);
        return rc;
 }
 
index 26df554f7588d665b704438a014c693e18fc69ac..0eed7ab9f28b54c77010d85558d1b26b8e65b208 100644 (file)
@@ -143,17 +143,6 @@ int smc_ib_ready_link(struct smc_link *lnk)
        return rc;
 }
 
-static void smc_ib_port_terminate(struct smc_ib_device *smcibdev, u8 ibport)
-{
-       struct smc_link_group *lgr, *l;
-
-       list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) {
-               if (lgr->lnk[SMC_SINGLE_LINK].smcibdev == smcibdev &&
-                   lgr->lnk[SMC_SINGLE_LINK].ibport == ibport)
-                       smc_lgr_terminate(lgr);
-       }
-}
-
 /* process context wrapper for might_sleep smc_ib_remember_port_attr */
 static void smc_ib_port_event_work(struct work_struct *work)
 {
@@ -165,7 +154,7 @@ static void smc_ib_port_event_work(struct work_struct *work)
                smc_ib_remember_port_attr(smcibdev, port_idx + 1);
                clear_bit(port_idx, &smcibdev->port_event_mask);
                if (!smc_ib_port_active(smcibdev, port_idx + 1))
-                       smc_ib_port_terminate(smcibdev, port_idx + 1);
+                       smc_port_terminate(smcibdev, port_idx + 1);
        }
 }
 
index ea4b21981b4bd13d510bbc452398e868db36cfbc..5800a6b43d830018ffc91a4272190508e8c9f233 100644 (file)
@@ -214,12 +214,11 @@ int smc_llc_send_confirm_link(struct smc_link *link, u8 mac[],
        return rc;
 }
 
-/* send ADD LINK request or response */
-int smc_llc_send_add_link(struct smc_link *link, u8 mac[],
-                         union ib_gid *gid,
-                         enum smc_llc_reqresp reqresp)
+/* send LLC confirm rkey request */
+static int smc_llc_send_confirm_rkey(struct smc_link *link,
+                                    struct smc_buf_desc *rmb_desc)
 {
-       struct smc_llc_msg_add_link *addllc;
+       struct smc_llc_msg_confirm_rkey *rkeyllc;
        struct smc_wr_tx_pend_priv *pend;
        struct smc_wr_buf *wr_buf;
        int rc;
@@ -227,7 +226,25 @@ int smc_llc_send_add_link(struct smc_link *link, u8 mac[],
        rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
        if (rc)
                return rc;
-       addllc = (struct smc_llc_msg_add_link *)wr_buf;
+       rkeyllc = (struct smc_llc_msg_confirm_rkey *)wr_buf;
+       memset(rkeyllc, 0, sizeof(*rkeyllc));
+       rkeyllc->hd.common.type = SMC_LLC_CONFIRM_RKEY;
+       rkeyllc->hd.length = sizeof(struct smc_llc_msg_confirm_rkey);
+       rkeyllc->rtoken[0].rmb_key =
+               htonl(rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
+       rkeyllc->rtoken[0].rmb_vaddr = cpu_to_be64(
+               (u64)sg_dma_address(rmb_desc->sgt[SMC_SINGLE_LINK].sgl));
+       /* send llc message */
+       rc = smc_wr_tx_send(link, pend);
+       return rc;
+}
+
+/* prepare an add link message */
+static void smc_llc_prep_add_link(struct smc_llc_msg_add_link *addllc,
+                                 struct smc_link *link, u8 mac[],
+                                 union ib_gid *gid,
+                                 enum smc_llc_reqresp reqresp)
+{
        memset(addllc, 0, sizeof(*addllc));
        addllc->hd.common.type = SMC_LLC_ADD_LINK;
        addllc->hd.length = sizeof(struct smc_llc_msg_add_link);
@@ -239,16 +256,14 @@ int smc_llc_send_add_link(struct smc_link *link, u8 mac[],
        }
        memcpy(addllc->sender_mac, mac, ETH_ALEN);
        memcpy(addllc->sender_gid, gid, SMC_GID_SIZE);
-       /* send llc message */
-       rc = smc_wr_tx_send(link, pend);
-       return rc;
 }
 
-/* send DELETE LINK request or response */
-int smc_llc_send_delete_link(struct smc_link *link,
-                            enum smc_llc_reqresp reqresp)
+/* send ADD LINK request or response */
+int smc_llc_send_add_link(struct smc_link *link, u8 mac[],
+                         union ib_gid *gid,
+                         enum smc_llc_reqresp reqresp)
 {
-       struct smc_llc_msg_del_link *delllc;
+       struct smc_llc_msg_add_link *addllc;
        struct smc_wr_tx_pend_priv *pend;
        struct smc_wr_buf *wr_buf;
        int rc;
@@ -256,7 +271,18 @@ int smc_llc_send_delete_link(struct smc_link *link,
        rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
        if (rc)
                return rc;
-       delllc = (struct smc_llc_msg_del_link *)wr_buf;
+       addllc = (struct smc_llc_msg_add_link *)wr_buf;
+       smc_llc_prep_add_link(addllc, link, mac, gid, reqresp);
+       /* send llc message */
+       rc = smc_wr_tx_send(link, pend);
+       return rc;
+}
+
+/* prepare a delete link message */
+static void smc_llc_prep_delete_link(struct smc_llc_msg_del_link *delllc,
+                                    struct smc_link *link,
+                                    enum smc_llc_reqresp reqresp)
+{
        memset(delllc, 0, sizeof(*delllc));
        delllc->hd.common.type = SMC_LLC_DELETE_LINK;
        delllc->hd.length = sizeof(struct smc_llc_msg_add_link);
@@ -266,14 +292,29 @@ int smc_llc_send_delete_link(struct smc_link *link,
        delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ALL;
        delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ORDERLY;
        delllc->link_num = link->link_id;
+}
+
+/* send DELETE LINK request or response */
+int smc_llc_send_delete_link(struct smc_link *link,
+                            enum smc_llc_reqresp reqresp)
+{
+       struct smc_llc_msg_del_link *delllc;
+       struct smc_wr_tx_pend_priv *pend;
+       struct smc_wr_buf *wr_buf;
+       int rc;
+
+       rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
+       if (rc)
+               return rc;
+       delllc = (struct smc_llc_msg_del_link *)wr_buf;
+       smc_llc_prep_delete_link(delllc, link, reqresp);
        /* send llc message */
        rc = smc_wr_tx_send(link, pend);
        return rc;
 }
 
-/* send LLC test link request or response */
-int smc_llc_send_test_link(struct smc_link *link, u8 user_data[16],
-                          enum smc_llc_reqresp reqresp)
+/* send LLC test link request */
+static int smc_llc_send_test_link(struct smc_link *link, u8 user_data[16])
 {
        struct smc_llc_msg_test_link *testllc;
        struct smc_wr_tx_pend_priv *pend;
@@ -287,28 +328,52 @@ int smc_llc_send_test_link(struct smc_link *link, u8 user_data[16],
        memset(testllc, 0, sizeof(*testllc));
        testllc->hd.common.type = SMC_LLC_TEST_LINK;
        testllc->hd.length = sizeof(struct smc_llc_msg_test_link);
-       if (reqresp == SMC_LLC_RESP)
-               testllc->hd.flags |= SMC_LLC_FLAG_RESP;
        memcpy(testllc->user_data, user_data, sizeof(testllc->user_data));
        /* send llc message */
        rc = smc_wr_tx_send(link, pend);
        return rc;
 }
 
-/* send a prepared message */
-static int smc_llc_send_message(struct smc_link *link, void *llcbuf, int llclen)
+struct smc_llc_send_work {
+       struct work_struct work;
+       struct smc_link *link;
+       int llclen;
+       union smc_llc_msg llcbuf;
+};
+
+/* worker that sends a prepared message */
+static void smc_llc_send_message_work(struct work_struct *work)
 {
+       struct smc_llc_send_work *llcwrk = container_of(work,
+                                               struct smc_llc_send_work, work);
        struct smc_wr_tx_pend_priv *pend;
        struct smc_wr_buf *wr_buf;
        int rc;
 
-       rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
+       if (llcwrk->link->state == SMC_LNK_INACTIVE)
+               goto out;
+       rc = smc_llc_add_pending_send(llcwrk->link, &wr_buf, &pend);
        if (rc)
-               return rc;
-       memcpy(wr_buf, llcbuf, llclen);
-       /* send llc message */
-       rc = smc_wr_tx_send(link, pend);
-       return rc;
+               goto out;
+       memcpy(wr_buf, &llcwrk->llcbuf, llcwrk->llclen);
+       smc_wr_tx_send(llcwrk->link, pend);
+out:
+       kfree(llcwrk);
+}
+
+/* copy llcbuf and schedule an llc send on link */
+static int smc_llc_send_message(struct smc_link *link, void *llcbuf, int llclen)
+{
+       struct smc_llc_send_work *wrk = kmalloc(sizeof(*wrk), GFP_ATOMIC);
+
+       if (!wrk)
+               return -ENOMEM;
+       INIT_WORK(&wrk->work, smc_llc_send_message_work);
+       wrk->link = link;
+       wrk->llclen = llclen;
+       memcpy(&wrk->llcbuf, llcbuf, llclen);
+       queue_work(link->llc_wq, &wrk->work);
+       return 0;
 }
 
 /********************************* receive ***********************************/
@@ -359,17 +424,18 @@ static void smc_llc_rx_add_link(struct smc_link *link,
                }
 
                if (lgr->role == SMC_SERV) {
-                       smc_llc_send_add_link(link,
+                       smc_llc_prep_add_link(llc, link,
                                        link->smcibdev->mac[link->ibport - 1],
                                        &link->smcibdev->gid[link->ibport - 1],
                                        SMC_LLC_REQ);
 
                } else {
-                       smc_llc_send_add_link(link,
+                       smc_llc_prep_add_link(llc, link,
                                        link->smcibdev->mac[link->ibport - 1],
                                        &link->smcibdev->gid[link->ibport - 1],
                                        SMC_LLC_RESP);
                }
+               smc_llc_send_message(link, llc, sizeof(*llc));
        }
 }
 
@@ -385,9 +451,11 @@ static void smc_llc_rx_delete_link(struct smc_link *link,
        } else {
                if (lgr->role == SMC_SERV) {
                        smc_lgr_forget(lgr);
-                       smc_llc_send_delete_link(link, SMC_LLC_REQ);
+                       smc_llc_prep_delete_link(llc, link, SMC_LLC_REQ);
+                       smc_llc_send_message(link, llc, sizeof(*llc));
                } else {
-                       smc_llc_send_delete_link(link, SMC_LLC_RESP);
+                       smc_llc_prep_delete_link(llc, link, SMC_LLC_RESP);
+                       smc_llc_send_message(link, llc, sizeof(*llc));
                        smc_lgr_terminate(lgr);
                }
        }
@@ -397,9 +465,11 @@ static void smc_llc_rx_test_link(struct smc_link *link,
                                 struct smc_llc_msg_test_link *llc)
 {
        if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
-               /* unused as long as we don't send this type of msg */
+               if (link->state == SMC_LNK_ACTIVE)
+                       complete(&link->llc_testlink_resp);
        } else {
-               smc_llc_send_test_link(link, llc->user_data, SMC_LLC_RESP);
+               llc->hd.flags |= SMC_LLC_FLAG_RESP;
+               smc_llc_send_message(link, llc, sizeof(*llc));
        }
 }
 
@@ -412,7 +482,9 @@ static void smc_llc_rx_confirm_rkey(struct smc_link *link,
        lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]);
 
        if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
-               /* unused as long as we don't send this type of msg */
+               link->llc_confirm_rkey_rc = llc->hd.flags &
+                                           SMC_LLC_FLAG_RKEY_NEG;
+               complete(&link->llc_confirm_rkey);
        } else {
                rc = smc_rtoken_add(lgr,
                                    llc->rtoken[0].rmb_vaddr,
@@ -423,7 +495,7 @@ static void smc_llc_rx_confirm_rkey(struct smc_link *link,
                llc->hd.flags |= SMC_LLC_FLAG_RESP;
                if (rc < 0)
                        llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG;
-               smc_llc_send_message(link, (void *)llc, sizeof(*llc));
+               smc_llc_send_message(link, llc, sizeof(*llc));
        }
 }
 
@@ -435,7 +507,7 @@ static void smc_llc_rx_confirm_rkey_cont(struct smc_link *link,
        } else {
                /* ignore rtokens for other links, we have only one link */
                llc->hd.flags |= SMC_LLC_FLAG_RESP;
-               smc_llc_send_message(link, (void *)llc, sizeof(*llc));
+               smc_llc_send_message(link, llc, sizeof(*llc));
        }
 }
 
@@ -463,7 +535,7 @@ static void smc_llc_rx_delete_rkey(struct smc_link *link,
                }
 
                llc->hd.flags |= SMC_LLC_FLAG_RESP;
-               smc_llc_send_message(link, (void *)llc, sizeof(*llc));
+               smc_llc_send_message(link, llc, sizeof(*llc));
        }
 }
 
@@ -476,6 +548,8 @@ static void smc_llc_rx_handler(struct ib_wc *wc, void *buf)
                return; /* short message */
        if (llc->raw.hdr.length != sizeof(*llc))
                return; /* invalid message */
+       if (link->state == SMC_LNK_INACTIVE)
+               return; /* link not active, drop msg */
 
        switch (llc->raw.hdr.common.type) {
        case SMC_LLC_TEST_LINK:
@@ -502,6 +576,100 @@ static void smc_llc_rx_handler(struct ib_wc *wc, void *buf)
        }
 }
 
+/***************************** worker, utils *********************************/
+
+static void smc_llc_testlink_work(struct work_struct *work)
+{
+       struct smc_link *link = container_of(to_delayed_work(work),
+                                            struct smc_link, llc_testlink_wrk);
+       unsigned long next_interval;
+       struct smc_link_group *lgr;
+       unsigned long expire_time;
+       u8 user_data[16] = { 0 };
+       int rc;
+
+       lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]);
+       if (link->state != SMC_LNK_ACTIVE)
+               return;         /* don't reschedule worker */
+       expire_time = link->wr_rx_tstamp + link->llc_testlink_time;
+       if (time_is_after_jiffies(expire_time)) {
+               next_interval = expire_time - jiffies;
+               goto out;
+       }
+       reinit_completion(&link->llc_testlink_resp);
+       smc_llc_send_test_link(link, user_data);
+       /* receive TEST LINK response over RoCE fabric */
+       rc = wait_for_completion_interruptible_timeout(&link->llc_testlink_resp,
+                                                      SMC_LLC_WAIT_TIME);
+       if (rc <= 0) {
+               smc_lgr_terminate(lgr);
+               return;
+       }
+       next_interval = link->llc_testlink_time;
+out:
+       queue_delayed_work(link->llc_wq, &link->llc_testlink_wrk,
+                          next_interval);
+}
+
+int smc_llc_link_init(struct smc_link *link)
+{
+       struct smc_link_group *lgr = container_of(link, struct smc_link_group,
+                                                 lnk[SMC_SINGLE_LINK]);
+       link->llc_wq = alloc_ordered_workqueue("llc_wq-%x:%x)", WQ_MEM_RECLAIM,
+                                              *((u32 *)lgr->id),
+                                              link->link_id);
+       if (!link->llc_wq)
+               return -ENOMEM;
+       init_completion(&link->llc_confirm);
+       init_completion(&link->llc_confirm_resp);
+       init_completion(&link->llc_add);
+       init_completion(&link->llc_add_resp);
+       init_completion(&link->llc_confirm_rkey);
+       init_completion(&link->llc_testlink_resp);
+       INIT_DELAYED_WORK(&link->llc_testlink_wrk, smc_llc_testlink_work);
+       return 0;
+}
+
+void smc_llc_link_active(struct smc_link *link, int testlink_time)
+{
+       link->state = SMC_LNK_ACTIVE;
+       if (testlink_time) {
+               link->llc_testlink_time = testlink_time * HZ;
+               queue_delayed_work(link->llc_wq, &link->llc_testlink_wrk,
+                                  link->llc_testlink_time);
+       }
+}
+
+/* called in tasklet context */
+void smc_llc_link_inactive(struct smc_link *link)
+{
+       link->state = SMC_LNK_INACTIVE;
+       cancel_delayed_work(&link->llc_testlink_wrk);
+}
+
+/* called in worker context */
+void smc_llc_link_clear(struct smc_link *link)
+{
+       flush_workqueue(link->llc_wq);
+       destroy_workqueue(link->llc_wq);
+}
+
+/* register a new rtoken at the remote peer */
+int smc_llc_do_confirm_rkey(struct smc_link *link,
+                           struct smc_buf_desc *rmb_desc)
+{
+       int rc;
+
+       reinit_completion(&link->llc_confirm_rkey);
+       smc_llc_send_confirm_rkey(link, rmb_desc);
+       /* receive CONFIRM RKEY response from server over RoCE fabric */
+       rc = wait_for_completion_interruptible_timeout(&link->llc_confirm_rkey,
+                                                      SMC_LLC_WAIT_TIME);
+       if (rc <= 0 || link->llc_confirm_rkey_rc)
+               return -EFAULT;
+       return 0;
+}
+
 /***************************** init, exit, misc ******************************/
 
 static struct smc_wr_rx_handler smc_llc_rx_handlers[] = {
index e4a7d5e234d5d61cee281af9ac7e396df34d87f5..65c8645e96a1438febacdcbe3470003d5b75b504 100644 (file)
@@ -42,8 +42,12 @@ int smc_llc_send_add_link(struct smc_link *link, u8 mac[], union ib_gid *gid,
                          enum smc_llc_reqresp reqresp);
 int smc_llc_send_delete_link(struct smc_link *link,
                             enum smc_llc_reqresp reqresp);
-int smc_llc_send_test_link(struct smc_link *lnk, u8 user_data[16],
-                          enum smc_llc_reqresp reqresp);
+int smc_llc_link_init(struct smc_link *link);
+void smc_llc_link_active(struct smc_link *link, int testlink_time);
+void smc_llc_link_inactive(struct smc_link *link);
+void smc_llc_link_clear(struct smc_link *link);
+int smc_llc_do_confirm_rkey(struct smc_link *link,
+                           struct smc_buf_desc *rmb_desc);
 int smc_llc_init(void) __init;
 
 #endif /* SMC_LLC_H */
index 74568cdbca7087532c20b891496edeb467b13829..d7b88b2d1b224195b2d82523c047052c67f2e1eb 100644 (file)
@@ -245,40 +245,45 @@ static struct smc_ib_device *smc_pnet_find_ib(char *ib_name)
 static int smc_pnet_fill_entry(struct net *net, struct smc_pnetentry *pnetelem,
                               struct nlattr *tb[])
 {
-       char *string, *ibname = NULL;
-       int rc = 0;
+       char *string, *ibname;
+       int rc;
 
        memset(pnetelem, 0, sizeof(*pnetelem));
        INIT_LIST_HEAD(&pnetelem->list);
-       if (tb[SMC_PNETID_NAME]) {
-               string = (char *)nla_data(tb[SMC_PNETID_NAME]);
-               if (!smc_pnetid_valid(string, pnetelem->pnet_name)) {
-                       rc = -EINVAL;
-                       goto error;
-               }
-       }
-       if (tb[SMC_PNETID_ETHNAME]) {
-               string = (char *)nla_data(tb[SMC_PNETID_ETHNAME]);
-               pnetelem->ndev = dev_get_by_name(net, string);
-               if (!pnetelem->ndev)
-                       return -ENOENT;
-       }
-       if (tb[SMC_PNETID_IBNAME]) {
-               ibname = (char *)nla_data(tb[SMC_PNETID_IBNAME]);
-               ibname = strim(ibname);
-               pnetelem->smcibdev = smc_pnet_find_ib(ibname);
-               if (!pnetelem->smcibdev) {
-                       rc = -ENOENT;
-                       goto error;
-               }
-       }
-       if (tb[SMC_PNETID_IBPORT]) {
-               pnetelem->ib_port = nla_get_u8(tb[SMC_PNETID_IBPORT]);
-               if (pnetelem->ib_port > SMC_MAX_PORTS) {
-                       rc = -EINVAL;
-                       goto error;
-               }
-       }
+
+       rc = -EINVAL;
+       if (!tb[SMC_PNETID_NAME])
+               goto error;
+       string = (char *)nla_data(tb[SMC_PNETID_NAME]);
+       if (!smc_pnetid_valid(string, pnetelem->pnet_name))
+               goto error;
+
+       rc = -EINVAL;
+       if (!tb[SMC_PNETID_ETHNAME])
+               goto error;
+       rc = -ENOENT;
+       string = (char *)nla_data(tb[SMC_PNETID_ETHNAME]);
+       pnetelem->ndev = dev_get_by_name(net, string);
+       if (!pnetelem->ndev)
+               goto error;
+
+       rc = -EINVAL;
+       if (!tb[SMC_PNETID_IBNAME])
+               goto error;
+       rc = -ENOENT;
+       ibname = (char *)nla_data(tb[SMC_PNETID_IBNAME]);
+       ibname = strim(ibname);
+       pnetelem->smcibdev = smc_pnet_find_ib(ibname);
+       if (!pnetelem->smcibdev)
+               goto error;
+
+       rc = -EINVAL;
+       if (!tb[SMC_PNETID_IBPORT])
+               goto error;
+       pnetelem->ib_port = nla_get_u8(tb[SMC_PNETID_IBPORT]);
+       if (pnetelem->ib_port < 1 || pnetelem->ib_port > SMC_MAX_PORTS)
+               goto error;
+
        return 0;
 
 error:
@@ -307,6 +312,8 @@ static int smc_pnet_get(struct sk_buff *skb, struct genl_info *info)
        void *hdr;
        int rc;
 
+       if (!info->attrs[SMC_PNETID_NAME])
+               return -EINVAL;
        pnetelem = smc_pnet_find_pnetid(
                                (char *)nla_data(info->attrs[SMC_PNETID_NAME]));
        if (!pnetelem)
@@ -359,6 +366,8 @@ static int smc_pnet_add(struct sk_buff *skb, struct genl_info *info)
 
 static int smc_pnet_del(struct sk_buff *skb, struct genl_info *info)
 {
+       if (!info->attrs[SMC_PNETID_NAME])
+               return -EINVAL;
        return smc_pnet_remove_by_pnetid(
                                (char *)nla_data(info->attrs[SMC_PNETID_NAME]));
 }
index eff4e0d0bb31098149d523d5744d4a1ff22e7ddd..290a434471d16dfda284ec8eb849d122879ecf01 100644 (file)
 #include "smc_tx.h" /* smc_tx_consumer_update() */
 #include "smc_rx.h"
 
-/* callback implementation for sk.sk_data_ready()
- * to wakeup rcvbuf consumers that blocked with smc_rx_wait_data().
+/* callback implementation to wakeup consumers blocked with smc_rx_wait().
  * indirectly called by smc_cdc_msg_recv_action().
  */
-static void smc_rx_data_ready(struct sock *sk)
+static void smc_rx_wake_up(struct sock *sk)
 {
        struct socket_wq *wq;
 
@@ -44,28 +43,140 @@ static void smc_rx_data_ready(struct sock *sk)
        rcu_read_unlock();
 }
 
+/* Update consumer cursor
+ *   @conn   connection to update
+ *   @cons   consumer cursor
+ *   @len    number of Bytes consumed
+ */
+static void smc_rx_update_consumer(struct smc_connection *conn,
+                                  union smc_host_cursor cons, size_t len)
+{
+       smc_curs_add(conn->rmb_desc->len, &cons, len);
+       smc_curs_write(&conn->local_tx_ctrl.cons, smc_curs_read(&cons, conn),
+                      conn);
+       /* send consumer cursor update if required */
+       /* similar to advertising new TCP rcv_wnd if required */
+       smc_tx_consumer_update(conn);
+}
+
+struct smc_spd_priv {
+       struct smc_sock *smc;
+       size_t           len;
+};
+
+static void smc_rx_pipe_buf_release(struct pipe_inode_info *pipe,
+                                   struct pipe_buffer *buf)
+{
+       struct smc_spd_priv *priv = (struct smc_spd_priv *)buf->private;
+       struct smc_sock *smc = priv->smc;
+       struct smc_connection *conn;
+       union smc_host_cursor cons;
+       struct sock *sk = &smc->sk;
+
+       if (sk->sk_state == SMC_CLOSED ||
+           sk->sk_state == SMC_PEERFINCLOSEWAIT ||
+           sk->sk_state == SMC_APPFINCLOSEWAIT)
+               goto out;
+       conn = &smc->conn;
+       lock_sock(sk);
+       smc_curs_write(&cons, smc_curs_read(&conn->local_tx_ctrl.cons, conn),
+                      conn);
+       smc_rx_update_consumer(conn, cons, priv->len);
+       release_sock(sk);
+       if (atomic_sub_and_test(priv->len, &conn->splice_pending))
+               smc_rx_wake_up(sk);
+out:
+       kfree(priv);
+       put_page(buf->page);
+       sock_put(sk);
+}
+
+static int smc_rx_pipe_buf_nosteal(struct pipe_inode_info *pipe,
+                                  struct pipe_buffer *buf)
+{
+       return 1;
+}
+
+static const struct pipe_buf_operations smc_pipe_ops = {
+       .can_merge = 0,
+       .confirm = generic_pipe_buf_confirm,
+       .release = smc_rx_pipe_buf_release,
+       .steal = smc_rx_pipe_buf_nosteal,
+       .get = generic_pipe_buf_get
+};
+
+static void smc_rx_spd_release(struct splice_pipe_desc *spd,
+                              unsigned int i)
+{
+       put_page(spd->pages[i]);
+}
+
+static int smc_rx_splice(struct pipe_inode_info *pipe, char *src, size_t len,
+                        struct smc_sock *smc)
+{
+       struct splice_pipe_desc spd;
+       struct partial_page partial;
+       struct smc_spd_priv *priv;
+       struct page *page;
+       int bytes;
+
+       page = virt_to_page(smc->conn.rmb_desc->cpu_addr);
+       priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+       if (!priv)
+               return -ENOMEM;
+       priv->len = len;
+       priv->smc = smc;
+       partial.offset = src - (char *)smc->conn.rmb_desc->cpu_addr;
+       partial.len = len;
+       partial.private = (unsigned long)priv;
+
+       spd.nr_pages_max = 1;
+       spd.nr_pages = 1;
+       spd.pages = &page;
+       spd.partial = &partial;
+       spd.ops = &smc_pipe_ops;
+       spd.spd_release = smc_rx_spd_release;
+
+       bytes = splice_to_pipe(pipe, &spd);
+       if (bytes > 0) {
+               sock_hold(&smc->sk);
+               get_page(smc->conn.rmb_desc->pages);
+               atomic_add(bytes, &smc->conn.splice_pending);
+       }
+
+       return bytes;
+}
+
+static int smc_rx_data_available_and_no_splice_pend(struct smc_connection *conn)
+{
+       return atomic_read(&conn->bytes_to_rcv) &&
+              !atomic_read(&conn->splice_pending);
+}
+
 /* blocks rcvbuf consumer until >=len bytes available or timeout or interrupted
  *   @smc    smc socket
  *   @timeo  pointer to max seconds to wait, pointer to value 0 for no timeout
+ *   @fcrit  add'l criterion to evaluate as function pointer
  * Returns:
  * 1 if at least 1 byte available in rcvbuf or if socket error/shutdown.
  * 0 otherwise (nothing in rcvbuf nor timeout, e.g. interrupted).
  */
-static int smc_rx_wait_data(struct smc_sock *smc, long *timeo)
+int smc_rx_wait(struct smc_sock *smc, long *timeo,
+               int (*fcrit)(struct smc_connection *conn))
 {
        DEFINE_WAIT_FUNC(wait, woken_wake_function);
        struct smc_connection *conn = &smc->conn;
        struct sock *sk = &smc->sk;
        int rc;
 
-       if (atomic_read(&conn->bytes_to_rcv))
+       if (fcrit(conn))
                return 1;
        sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
        add_wait_queue(sk_sleep(sk), &wait);
        rc = sk_wait_event(sk, timeo,
                           sk->sk_err ||
                           sk->sk_shutdown & RCV_SHUTDOWN ||
-                          atomic_read(&conn->bytes_to_rcv) ||
+                          fcrit(conn) ||
                           smc_cdc_rxed_any_close_or_senddone(conn),
                           &wait);
        remove_wait_queue(sk_sleep(sk), &wait);
@@ -73,19 +184,25 @@ static int smc_rx_wait_data(struct smc_sock *smc, long *timeo)
        return rc;
 }
 
-/* rcvbuf consumer: main API called by socket layer.
- * called under sk lock.
+/* smc_rx_recvmsg - receive data from RMBE
+ * @msg:       copy data to receive buffer
+ * @pipe:      copy data to pipe if set - indicates splice() call
+ *
+ * rcvbuf consumer: main API called by socket layer.
+ * Called under sk lock.
  */
-int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg, size_t len,
-                  int flags)
+int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg,
+                  struct pipe_inode_info *pipe, size_t len, int flags)
 {
        size_t copylen, read_done = 0, read_remaining = len;
        size_t chunk_len, chunk_off, chunk_len_sum;
        struct smc_connection *conn = &smc->conn;
+       int (*func)(struct smc_connection *conn);
        union smc_host_cursor cons;
        int readable, chunk;
        char *rcvbuf_base;
        struct sock *sk;
+       int splbytes;
        long timeo;
        int target;             /* Read at least these many bytes */
        int rc;
@@ -101,37 +218,32 @@ int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg, size_t len,
        timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
        target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
 
-       msg->msg_namelen = 0;
        /* we currently use 1 RMBE per RMB, so RMBE == RMB base addr */
        rcvbuf_base = conn->rmb_desc->cpu_addr;
 
        do { /* while (read_remaining) */
-               if (read_done >= target)
+               if (read_done >= target || (pipe && read_done))
                        break;
 
                if (atomic_read(&conn->bytes_to_rcv))
                        goto copy;
 
+               if (sk->sk_shutdown & RCV_SHUTDOWN ||
+                   smc_cdc_rxed_any_close_or_senddone(conn) ||
+                   conn->local_tx_ctrl.conn_state_flags.peer_conn_abort)
+                       break;
+
                if (read_done) {
                        if (sk->sk_err ||
                            sk->sk_state == SMC_CLOSED ||
-                           sk->sk_shutdown & RCV_SHUTDOWN ||
                            !timeo ||
-                           signal_pending(current) ||
-                           smc_cdc_rxed_any_close_or_senddone(conn) ||
-                           conn->local_tx_ctrl.conn_state_flags.
-                           peer_conn_abort)
+                           signal_pending(current))
                                break;
                } else {
                        if (sk->sk_err) {
                                read_done = sock_error(sk);
                                break;
                        }
-                       if (sk->sk_shutdown & RCV_SHUTDOWN ||
-                           smc_cdc_rxed_any_close_or_senddone(conn) ||
-                           conn->local_tx_ctrl.conn_state_flags.
-                           peer_conn_abort)
-                               break;
                        if (sk->sk_state == SMC_CLOSED) {
                                if (!sock_flag(sk, SOCK_DONE)) {
                                        /* This occurs when user tries to read
@@ -150,32 +262,52 @@ int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg, size_t len,
                                return -EAGAIN;
                }
 
-               if (!atomic_read(&conn->bytes_to_rcv)) {
-                       smc_rx_wait_data(smc, &timeo);
+               if (!smc_rx_data_available(conn)) {
+                       smc_rx_wait(smc, &timeo, smc_rx_data_available);
                        continue;
                }
 
 copy:
                /* initialize variables for 1st iteration of subsequent loop */
-               /* could be just 1 byte, even after smc_rx_wait_data above */
+               /* could be just 1 byte, even after waiting on data above */
                readable = atomic_read(&conn->bytes_to_rcv);
+               splbytes = atomic_read(&conn->splice_pending);
+               if (!readable || (msg && splbytes)) {
+                       if (splbytes)
+                               func = smc_rx_data_available_and_no_splice_pend;
+                       else
+                               func = smc_rx_data_available;
+                       smc_rx_wait(smc, &timeo, func);
+                       continue;
+               }
+
                /* not more than what user space asked for */
                copylen = min_t(size_t, read_remaining, readable);
                smc_curs_write(&cons,
                               smc_curs_read(&conn->local_tx_ctrl.cons, conn),
                               conn);
+               /* subsequent splice() calls pick up where previous left */
+               if (splbytes)
+                       smc_curs_add(conn->rmb_desc->len, &cons, splbytes);
                /* determine chunks where to read from rcvbuf */
                /* either unwrapped case, or 1st chunk of wrapped case */
-               chunk_len = min_t(size_t,
-                                 copylen, conn->rmbe_size - cons.count);
+               chunk_len = min_t(size_t, copylen, conn->rmb_desc->len -
+                                 cons.count);
                chunk_len_sum = chunk_len;
                chunk_off = cons.count;
                smc_rmb_sync_sg_for_cpu(conn);
                for (chunk = 0; chunk < 2; chunk++) {
                        if (!(flags & MSG_TRUNC)) {
-                               rc = memcpy_to_msg(msg, rcvbuf_base + chunk_off,
-                                                  chunk_len);
-                               if (rc) {
+                               if (msg) {
+                                       rc = memcpy_to_msg(msg, rcvbuf_base +
+                                                          chunk_off,
+                                                          chunk_len);
+                               } else {
+                                       rc = smc_rx_splice(pipe, rcvbuf_base +
+                                                       chunk_off, chunk_len,
+                                                       smc);
+                               }
+                               if (rc < 0) {
                                        if (!read_done)
                                                read_done = -EFAULT;
                                        smc_rmb_sync_sg_for_device(conn);
@@ -196,18 +328,13 @@ int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg, size_t len,
 
                /* update cursors */
                if (!(flags & MSG_PEEK)) {
-                       smc_curs_add(conn->rmbe_size, &cons, copylen);
                        /* increased in recv tasklet smc_cdc_msg_rcv() */
                        smp_mb__before_atomic();
                        atomic_sub(copylen, &conn->bytes_to_rcv);
-                       /* guarantee 0 <= bytes_to_rcv <= rmbe_size */
+                       /* guarantee 0 <= bytes_to_rcv <= rmb_desc->len */
                        smp_mb__after_atomic();
-                       smc_curs_write(&conn->local_tx_ctrl.cons,
-                                      smc_curs_read(&cons, conn),
-                                      conn);
-                       /* send consumer cursor update if required */
-                       /* similar to advertising new TCP rcv_wnd if required */
-                       smc_tx_consumer_update(conn);
+                       if (msg)
+                               smc_rx_update_consumer(conn, cons, copylen);
                }
        } while (read_remaining);
 out:
@@ -217,5 +344,6 @@ int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg, size_t len,
 /* Initialize receive properties on connection establishment. NB: not __init! */
 void smc_rx_init(struct smc_sock *smc)
 {
-       smc->sk.sk_data_ready = smc_rx_data_ready;
+       smc->sk.sk_data_ready = smc_rx_wake_up;
+       atomic_set(&smc->conn.splice_pending, 0);
 }
index 3a32b59bf06c56b52871a3ea51f758d7677dc8b3..db823c97d824ea6ebd66ccd2f0e38e82528b8e96 100644 (file)
 #include "smc.h"
 
 void smc_rx_init(struct smc_sock *smc);
-int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg, size_t len,
-                  int flags);
+
+int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg,
+                  struct pipe_inode_info *pipe, size_t len, int flags);
+int smc_rx_wait(struct smc_sock *smc, long *timeo,
+               int (*fcrit)(struct smc_connection *conn));
+static inline int smc_rx_data_available(struct smc_connection *conn)
+{
+       return atomic_read(&conn->bytes_to_rcv);
+}
 
 #endif /* SMC_RX_H */
index 72f004c9c9b13c2f9b4637af8e77aba3772d7bec..1f4a38b857f0a725014fa75bc842d509453a7c87 100644 (file)
@@ -19,6 +19,7 @@
 #include <linux/sched/signal.h>
 
 #include <net/sock.h>
+#include <net/tcp.h>
 
 #include "smc.h"
 #include "smc_wr.h"
@@ -26,6 +27,7 @@
 #include "smc_tx.h"
 
 #define SMC_TX_WORK_DELAY      HZ
+#define SMC_TX_CORK_DELAY      (HZ >> 2)       /* 250 ms */
 
 /***************************** sndbuf producer *******************************/
 
@@ -115,6 +117,13 @@ static int smc_tx_wait_memory(struct smc_sock *smc, int flags)
        return rc;
 }
 
+static bool smc_tx_is_corked(struct smc_sock *smc)
+{
+       struct tcp_sock *tp = tcp_sk(smc->clcsock->sk);
+
+       return (tp->nonagle & TCP_NAGLE_CORK) ? true : false;
+}
+
 /* sndbuf producer: main API called by socket layer.
  * called under sock lock.
  */
@@ -171,8 +180,8 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len)
                tx_cnt_prep = prep.count;
                /* determine chunks where to write into sndbuf */
                /* either unwrapped case, or 1st chunk of wrapped case */
-               chunk_len = min_t(size_t,
-                                 copylen, conn->sndbuf_size - tx_cnt_prep);
+               chunk_len = min_t(size_t, copylen, conn->sndbuf_desc->len -
+                                 tx_cnt_prep);
                chunk_len_sum = chunk_len;
                chunk_off = tx_cnt_prep;
                smc_sndbuf_sync_sg_for_cpu(conn);
@@ -197,19 +206,28 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len)
                }
                smc_sndbuf_sync_sg_for_device(conn);
                /* update cursors */
-               smc_curs_add(conn->sndbuf_size, &prep, copylen);
+               smc_curs_add(conn->sndbuf_desc->len, &prep, copylen);
                smc_curs_write(&conn->tx_curs_prep,
                               smc_curs_read(&prep, conn),
                               conn);
                /* increased in send tasklet smc_cdc_tx_handler() */
                smp_mb__before_atomic();
                atomic_sub(copylen, &conn->sndbuf_space);
-               /* guarantee 0 <= sndbuf_space <= sndbuf_size */
+               /* guarantee 0 <= sndbuf_space <= sndbuf_desc->len */
                smp_mb__after_atomic();
                /* since we just produced more new data into sndbuf,
                 * trigger sndbuf consumer: RDMA write into peer RMBE and CDC
                 */
-               smc_tx_sndbuf_nonempty(conn);
+               if ((msg->msg_flags & MSG_MORE || smc_tx_is_corked(smc)) &&
+                   (atomic_read(&conn->sndbuf_space) >
+                                               (conn->sndbuf_desc->len >> 1)))
+                       /* for a corked socket defer the RDMA writes if there
+                        * is still sufficient sndbuf_space available
+                        */
+                       schedule_delayed_work(&conn->tx_work,
+                                             SMC_TX_CORK_DELAY);
+               else
+                       smc_tx_sndbuf_nonempty(conn);
        } /* while (msg_data_left(msg)) */
 
        return send_done;
@@ -243,7 +261,7 @@ static int smc_tx_rdma_write(struct smc_connection *conn, int peer_rmbe_offset,
        rdma_wr.remote_addr =
                lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].dma_addr +
                /* RMBE within RMB */
-               ((conn->peer_conn_idx - 1) * conn->peer_rmbe_size) +
+               conn->tx_off +
                /* offset within RMBE */
                peer_rmbe_offset;
        rdma_wr.rkey = lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].rkey;
@@ -268,7 +286,7 @@ static inline void smc_tx_advance_cursors(struct smc_connection *conn,
        atomic_sub(len, &conn->peer_rmbe_space);
        /* guarantee 0 <= peer_rmbe_space <= peer_rmbe_size */
        smp_mb__after_atomic();
-       smc_curs_add(conn->sndbuf_size, sent, len);
+       smc_curs_add(conn->sndbuf_desc->len, sent, len);
 }
 
 /* sndbuf consumer: prepare all necessary (src&dst) chunks of data transmit;
@@ -291,7 +309,7 @@ static int smc_tx_rdma_writes(struct smc_connection *conn)
        smc_curs_write(&sent, smc_curs_read(&conn->tx_curs_sent, conn), conn);
        smc_curs_write(&prep, smc_curs_read(&conn->tx_curs_prep, conn), conn);
        /* cf. wmem_alloc - (snd_max - snd_una) */
-       to_send = smc_curs_diff(conn->sndbuf_size, &sent, &prep);
+       to_send = smc_curs_diff(conn->sndbuf_desc->len, &sent, &prep);
        if (to_send <= 0)
                return 0;
 
@@ -333,12 +351,12 @@ static int smc_tx_rdma_writes(struct smc_connection *conn)
        dst_len_sum = dst_len;
        src_off = sent.count;
        /* dst_len determines the maximum src_len */
-       if (sent.count + dst_len <= conn->sndbuf_size) {
+       if (sent.count + dst_len <= conn->sndbuf_desc->len) {
                /* unwrapped src case: single chunk of entire dst_len */
                src_len = dst_len;
        } else {
                /* wrapped src case: 2 chunks of sum dst_len; start with 1st: */
-               src_len = conn->sndbuf_size - sent.count;
+               src_len = conn->sndbuf_desc->len - sent.count;
        }
        src_len_sum = src_len;
        dma_addr = sg_dma_address(conn->sndbuf_desc->sgt[SMC_SINGLE_LINK].sgl);
@@ -350,8 +368,8 @@ static int smc_tx_rdma_writes(struct smc_connection *conn)
                        sges[srcchunk].lkey = link->roce_pd->local_dma_lkey;
                        num_sges++;
                        src_off += src_len;
-                       if (src_off >= conn->sndbuf_size)
-                               src_off -= conn->sndbuf_size;
+                       if (src_off >= conn->sndbuf_desc->len)
+                               src_off -= conn->sndbuf_desc->len;
                                                /* modulo in send ring */
                        if (src_len_sum == dst_len)
                                break; /* either on 1st or 2nd iteration */
@@ -369,7 +387,7 @@ static int smc_tx_rdma_writes(struct smc_connection *conn)
                dst_len = len - dst_len; /* remainder */
                dst_len_sum += dst_len;
                src_len = min_t(int,
-                               dst_len, conn->sndbuf_size - sent.count);
+                               dst_len, conn->sndbuf_desc->len - sent.count);
                src_len_sum = src_len;
        }
 
@@ -409,8 +427,8 @@ int smc_tx_sndbuf_nonempty(struct smc_connection *conn)
                        }
                        rc = 0;
                        if (conn->alert_token_local) /* connection healthy */
-                               schedule_delayed_work(&conn->tx_work,
-                                                     SMC_TX_WORK_DELAY);
+                               mod_delayed_work(system_wq, &conn->tx_work,
+                                                SMC_TX_WORK_DELAY);
                }
                goto out_unlock;
        }
@@ -432,7 +450,7 @@ int smc_tx_sndbuf_nonempty(struct smc_connection *conn)
 /* Wakeup sndbuf consumers from process context
  * since there is more data to transmit
  */
-static void smc_tx_work(struct work_struct *work)
+void smc_tx_work(struct work_struct *work)
 {
        struct smc_connection *conn = container_of(to_delayed_work(work),
                                                   struct smc_connection,
@@ -466,11 +484,11 @@ void smc_tx_consumer_update(struct smc_connection *conn)
        smc_curs_write(&cfed,
                       smc_curs_read(&conn->rx_curs_confirmed, conn),
                       conn);
-       to_confirm = smc_curs_diff(conn->rmbe_size, &cfed, &cons);
+       to_confirm = smc_curs_diff(conn->rmb_desc->len, &cfed, &cons);
 
        if (conn->local_rx_ctrl.prod_flags.cons_curs_upd_req ||
            ((to_confirm > conn->rmbe_update_limit) &&
-            ((to_confirm > (conn->rmbe_size / 2)) ||
+            ((to_confirm > (conn->rmb_desc->len / 2)) ||
              conn->local_rx_ctrl.prod_flags.write_blocked))) {
                if ((smc_cdc_get_slot_and_msg_send(conn) < 0) &&
                    conn->alert_token_local) { /* connection healthy */
@@ -494,6 +512,4 @@ void smc_tx_consumer_update(struct smc_connection *conn)
 void smc_tx_init(struct smc_sock *smc)
 {
        smc->sk.sk_write_space = smc_tx_write_space;
-       INIT_DELAYED_WORK(&smc->conn.tx_work, smc_tx_work);
-       spin_lock_init(&smc->conn.send_lock);
 }
index 78255964fa4dc1c69f96548e035e74a167999a62..44d0779429769b7161ae7c9e667ad833e4db8e61 100644 (file)
@@ -24,9 +24,10 @@ static inline int smc_tx_prepared_sends(struct smc_connection *conn)
 
        smc_curs_write(&sent, smc_curs_read(&conn->tx_curs_sent, conn), conn);
        smc_curs_write(&prep, smc_curs_read(&conn->tx_curs_prep, conn), conn);
-       return smc_curs_diff(conn->sndbuf_size, &sent, &prep);
+       return smc_curs_diff(conn->sndbuf_desc->len, &sent, &prep);
 }
 
+void smc_tx_work(struct work_struct *work);
 void smc_tx_init(struct smc_sock *smc);
 int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len);
 int smc_tx_sndbuf_nonempty(struct smc_connection *conn);
index 1b8af23e6e2b96fc12fd1985eab5c8f16d407003..cc7c1bb60fe87115e942f96f7c6a87602ccd6094 100644 (file)
@@ -376,6 +376,7 @@ static inline void smc_wr_rx_process_cqes(struct ib_wc wc[], int num)
        for (i = 0; i < num; i++) {
                link = wc[i].qp->qp_context;
                if (wc[i].status == IB_WC_SUCCESS) {
+                       link->wr_rx_tstamp = jiffies;
                        smc_wr_rx_demultiplex(&wc[i]);
                        smc_wr_rx_post(link); /* refill WR RX */
                } else {
index 805b139756db4699f0148b9e26bdd8b6a37d2e10..092bebc7004835fc4ad56a5474e16c6876c910ac 100644 (file)
@@ -67,7 +67,7 @@ static void strp_abort_strp(struct strparser *strp, int err)
 
 static void strp_start_timer(struct strparser *strp, long timeo)
 {
-       if (timeo)
+       if (timeo && timeo != LONG_MAX)
                mod_delayed_work(strp_wq, &strp->msg_timer_work, timeo);
 }
 
index 5cc68a824f451b16d66a88c7cb43f2d16628ea38..f2f63959fddd3f5b6610fb9e566f06f42e5377b1 100644 (file)
@@ -72,6 +72,7 @@ fmr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
        if (IS_ERR(mr->fmr.fm_mr))
                goto out_fmr_err;
 
+       INIT_LIST_HEAD(&mr->mr_list);
        return 0;
 
 out_fmr_err:
@@ -102,10 +103,6 @@ fmr_op_release_mr(struct rpcrdma_mr *mr)
        LIST_HEAD(unmap_list);
        int rc;
 
-       /* Ensure MW is not on any rl_registered list */
-       if (!list_empty(&mr->mr_list))
-               list_del(&mr->mr_list);
-
        kfree(mr->fmr.fm_physaddrs);
        kfree(mr->mr_sg);
 
index c5743a0960be46a10a67b6021ab5332d9a0b37a8..c59c5c788db0e8478ef06a7d7615dd2e813ef395 100644 (file)
@@ -110,6 +110,7 @@ frwr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
        if (!mr->mr_sg)
                goto out_list_err;
 
+       INIT_LIST_HEAD(&mr->mr_list);
        sg_init_table(mr->mr_sg, depth);
        init_completion(&frwr->fr_linv_done);
        return 0;
@@ -133,10 +134,6 @@ frwr_op_release_mr(struct rpcrdma_mr *mr)
 {
        int rc;
 
-       /* Ensure MR is not on any rl_registered list */
-       if (!list_empty(&mr->mr_list))
-               list_del(&mr->mr_list);
-
        rc = ib_dereg_mr(mr->frwr.fr_mr);
        if (rc)
                pr_err("rpcrdma: final ib_dereg_mr for %p returned %i\n",
@@ -195,7 +192,7 @@ frwr_op_recover_mr(struct rpcrdma_mr *mr)
        return;
 
 out_release:
-       pr_err("rpcrdma: FRWR reset failed %d, %p release\n", rc, mr);
+       pr_err("rpcrdma: FRWR reset failed %d, %p released\n", rc, mr);
        r_xprt->rx_stats.mrs_orphaned++;
 
        spin_lock(&r_xprt->rx_buf.rb_mrlock);
@@ -476,7 +473,7 @@ frwr_op_reminv(struct rpcrdma_rep *rep, struct list_head *mrs)
 
        list_for_each_entry(mr, mrs, mr_list)
                if (mr->mr_handle == rep->rr_inv_rkey) {
-                       list_del(&mr->mr_list);
+                       list_del_init(&mr->mr_list);
                        trace_xprtrdma_remoteinv(mr);
                        mr->frwr.fr_state = FRWR_IS_INVALID;
                        rpcrdma_mr_unmap_and_put(mr);
index fe5eaca2d19744df58f0e240766048d4a8be25e9..c345d365af886514d61c35c79291718a77f155e7 100644 (file)
@@ -1254,6 +1254,11 @@ rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf)
                list_del(&mr->mr_all);
 
                spin_unlock(&buf->rb_mrlock);
+
+               /* Ensure MW is not on any rl_registered list */
+               if (!list_empty(&mr->mr_list))
+                       list_del(&mr->mr_list);
+
                ia->ri_ops->ro_release_mr(mr);
                count++;
                spin_lock(&buf->rb_mrlock);
index 3d3b423fa9c1d3a9cf8497d109c25599042bea68..cb41b12a3bf8d985ae155d7977cb69f043af4f1c 100644 (file)
@@ -380,7 +380,7 @@ rpcrdma_mr_pop(struct list_head *list)
        struct rpcrdma_mr *mr;
 
        mr = list_first_entry(list, struct rpcrdma_mr, mr_list);
-       list_del(&mr->mr_list);
+       list_del_init(&mr->mr_list);
        return mr;
 }
 
index dd1c4fa2eb78aef7357f7e76a0be55832944e4b8..bebe88cae07bef565d191aea1bb08acd1f3c1cbd 100644 (file)
@@ -136,12 +136,12 @@ static struct tipc_service *tipc_service_create(u32 type, struct hlist_head *hd)
 }
 
 /**
- * tipc_service_find_range - find service range matching a service instance
+ * tipc_service_first_range - find first service range in tree matching instance
  *
  * Very time-critical, so binary search through range rb tree
  */
-static struct service_range *tipc_service_find_range(struct tipc_service *sc,
-                                                    u32 instance)
+static struct service_range *tipc_service_first_range(struct tipc_service *sc,
+                                                     u32 instance)
 {
        struct rb_node *n = sc->ranges.rb_node;
        struct service_range *sr;
@@ -158,6 +158,30 @@ static struct service_range *tipc_service_find_range(struct tipc_service *sc,
        return NULL;
 }
 
+/*  tipc_service_find_range - find service range matching publication parameters
+ */
+static struct service_range *tipc_service_find_range(struct tipc_service *sc,
+                                                    u32 lower, u32 upper)
+{
+       struct rb_node *n = sc->ranges.rb_node;
+       struct service_range *sr;
+
+       sr = tipc_service_first_range(sc, lower);
+       if (!sr)
+               return NULL;
+
+       /* Look for exact match */
+       for (n = &sr->tree_node; n; n = rb_next(n)) {
+               sr = container_of(n, struct service_range, tree_node);
+               if (sr->upper == upper)
+                       break;
+       }
+       if (!n || sr->lower != lower || sr->upper != upper)
+               return NULL;
+
+       return sr;
+}
+
 static struct service_range *tipc_service_create_range(struct tipc_service *sc,
                                                       u32 lower, u32 upper)
 {
@@ -238,54 +262,19 @@ static struct publication *tipc_service_insert_publ(struct net *net,
 /**
  * tipc_service_remove_publ - remove a publication from a service
  */
-static struct publication *tipc_service_remove_publ(struct net *net,
-                                                   struct tipc_service *sc,
-                                                   u32 lower, u32 upper,
-                                                   u32 node, u32 key,
-                                                   struct service_range **rng)
+static struct publication *tipc_service_remove_publ(struct service_range *sr,
+                                                   u32 node, u32 key)
 {
-       struct tipc_subscription *sub, *tmp;
-       struct service_range *sr;
        struct publication *p;
-       bool found = false;
-       bool last = false;
-       struct rb_node *n;
-
-       sr = tipc_service_find_range(sc, lower);
-       if (!sr)
-               return NULL;
 
-       /* Find exact matching service range */
-       for (n = &sr->tree_node; n; n = rb_next(n)) {
-               sr = container_of(n, struct service_range, tree_node);
-               if (sr->upper == upper)
-                       break;
-       }
-       if (!n || sr->lower != lower || sr->upper != upper)
-               return NULL;
-
-       /* Find publication, if it exists */
        list_for_each_entry(p, &sr->all_publ, all_publ) {
                if (p->key != key || (node && node != p->node))
                        continue;
-               found = true;
-               break;
+               list_del(&p->all_publ);
+               list_del(&p->local_publ);
+               return p;
        }
-       if (!found)
-               return NULL;
-
-       list_del(&p->all_publ);
-       list_del(&p->local_publ);
-       if (list_empty(&sr->all_publ))
-               last = true;
-
-       /* Notify any waiting subscriptions */
-       list_for_each_entry_safe(sub, tmp, &sc->subscriptions, service_list) {
-               tipc_sub_report_overlap(sub, p->lower, p->upper, TIPC_WITHDRAWN,
-                                       p->port, p->node, p->scope, last);
-       }
-       *rng = sr;
-       return p;
+       return NULL;
 }
 
 /**
@@ -376,17 +365,31 @@ struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type,
                                             u32 node, u32 key)
 {
        struct tipc_service *sc = tipc_service_find(net, type);
+       struct tipc_subscription *sub, *tmp;
        struct service_range *sr = NULL;
        struct publication *p = NULL;
+       bool last;
 
        if (!sc)
                return NULL;
 
        spin_lock_bh(&sc->lock);
-       p = tipc_service_remove_publ(net, sc, lower, upper, node, key, &sr);
+       sr = tipc_service_find_range(sc, lower, upper);
+       if (!sr)
+               goto exit;
+       p = tipc_service_remove_publ(sr, node, key);
+       if (!p)
+               goto exit;
+
+       /* Notify any waiting subscriptions */
+       last = list_empty(&sr->all_publ);
+       list_for_each_entry_safe(sub, tmp, &sc->subscriptions, service_list) {
+               tipc_sub_report_overlap(sub, lower, upper, TIPC_WITHDRAWN,
+                                       p->port, node, p->scope, last);
+       }
 
        /* Remove service range item if this was its last publication */
-       if (sr && list_empty(&sr->all_publ)) {
+       if (list_empty(&sr->all_publ)) {
                rb_erase(&sr->tree_node, &sc->ranges);
                kfree(sr);
        }
@@ -396,6 +399,7 @@ struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type,
                hlist_del_init_rcu(&sc->service_list);
                kfree_rcu(sc, rcu);
        }
+exit:
        spin_unlock_bh(&sc->lock);
        return p;
 }
@@ -437,7 +441,7 @@ u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance, u32 *dnode)
                goto not_found;
 
        spin_lock_bh(&sc->lock);
-       sr = tipc_service_find_range(sc, instance);
+       sr = tipc_service_first_range(sc, instance);
        if (unlikely(!sr))
                goto no_match;
 
@@ -484,7 +488,7 @@ bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 scope,
 
        spin_lock_bh(&sc->lock);
 
-       sr = tipc_service_find_range(sc, instance);
+       sr = tipc_service_first_range(sc, instance);
        if (!sr)
                goto no_match;
 
@@ -756,8 +760,7 @@ static void tipc_service_delete(struct net *net, struct tipc_service *sc)
        spin_lock_bh(&sc->lock);
        rbtree_postorder_for_each_entry_safe(sr, tmpr, &sc->ranges, tree_node) {
                list_for_each_entry_safe(p, tmp, &sr->all_publ, all_publ) {
-                       tipc_service_remove_publ(net, sc, p->lower, p->upper,
-                                                p->node, p->key, &sr);
+                       tipc_service_remove_publ(sr, p->node, p->key);
                        kfree_rcu(p, rcu);
                }
                rb_erase(&sr->tree_node, &sc->ranges);
index e9c52e1416c55ee4d9e644f36cd7a8f0f0b002a0..6a44eb812baf4a2fe31eeb55b04023f9f402666b 100644 (file)
@@ -195,6 +195,27 @@ int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel)
        return mtu;
 }
 
+bool tipc_node_get_id(struct net *net, u32 addr, u8 *id)
+{
+       u8 *own_id = tipc_own_id(net);
+       struct tipc_node *n;
+
+       if (!own_id)
+               return true;
+
+       if (addr == tipc_own_addr(net)) {
+               memcpy(id, own_id, TIPC_NODEID_LEN);
+               return true;
+       }
+       n = tipc_node_find(net, addr);
+       if (!n)
+               return false;
+
+       memcpy(id, &n->peer_id, TIPC_NODEID_LEN);
+       tipc_node_put(n);
+       return true;
+}
+
 u16 tipc_node_get_capabilities(struct net *net, u32 addr)
 {
        struct tipc_node *n;
@@ -1956,6 +1977,7 @@ int tipc_nl_node_set_link(struct sk_buff *skb, struct genl_info *info)
 int tipc_nl_node_get_link(struct sk_buff *skb, struct genl_info *info)
 {
        struct net *net = genl_info_net(info);
+       struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1];
        struct tipc_nl_msg msg;
        char *name;
        int err;
@@ -1963,9 +1985,19 @@ int tipc_nl_node_get_link(struct sk_buff *skb, struct genl_info *info)
        msg.portid = info->snd_portid;
        msg.seq = info->snd_seq;
 
-       if (!info->attrs[TIPC_NLA_LINK_NAME])
+       if (!info->attrs[TIPC_NLA_LINK])
+               return -EINVAL;
+
+       err = nla_parse_nested(attrs, TIPC_NLA_LINK_MAX,
+                              info->attrs[TIPC_NLA_LINK],
+                              tipc_nl_link_policy, info->extack);
+       if (err)
+               return err;
+
+       if (!attrs[TIPC_NLA_LINK_NAME])
                return -EINVAL;
-       name = nla_data(info->attrs[TIPC_NLA_LINK_NAME]);
+
+       name = nla_data(attrs[TIPC_NLA_LINK_NAME]);
 
        msg.skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
        if (!msg.skb)
@@ -2250,7 +2282,7 @@ int tipc_nl_node_dump_monitor(struct sk_buff *skb, struct netlink_callback *cb)
 
        rtnl_lock();
        for (bearer_id = prev_bearer; bearer_id < MAX_BEARERS; bearer_id++) {
-               err = __tipc_nl_add_monitor(net, &msg, prev_bearer);
+               err = __tipc_nl_add_monitor(net, &msg, bearer_id);
                if (err)
                        break;
        }
index bb271a37c93f841443cd33d1a510118638be39f5..846c8f240872f25c93af27edcb32692916223b4a 100644 (file)
@@ -60,6 +60,7 @@ enum {
 #define INVALID_BEARER_ID -1
 
 void tipc_node_stop(struct net *net);
+bool tipc_node_get_id(struct net *net, u32 addr, u8 *id);
 u32 tipc_node_try_addr(struct net *net, u8 *id, u32 addr);
 void tipc_node_check_dest(struct net *net, u32 onode, u8 *peer_id128,
                          struct tipc_bearer *bearer,
index 252a52ae0893261fc6f146ad81111c59f375fdce..930852c54d7a6e97207c61a7c942e487781457e7 100644 (file)
@@ -1516,10 +1516,10 @@ static void tipc_sk_set_orig_addr(struct msghdr *m, struct sk_buff *skb)
 
        srcaddr->sock.family = AF_TIPC;
        srcaddr->sock.addrtype = TIPC_ADDR_ID;
+       srcaddr->sock.scope = 0;
        srcaddr->sock.addr.id.ref = msg_origport(hdr);
        srcaddr->sock.addr.id.node = msg_orignode(hdr);
        srcaddr->sock.addr.name.domain = 0;
-       srcaddr->sock.scope = 0;
        m->msg_namelen = sizeof(struct sockaddr_tipc);
 
        if (!msg_in_group(hdr))
@@ -1528,6 +1528,7 @@ static void tipc_sk_set_orig_addr(struct msghdr *m, struct sk_buff *skb)
        /* Group message users may also want to know sending member's id */
        srcaddr->member.family = AF_TIPC;
        srcaddr->member.addrtype = TIPC_ADDR_NAME;
+       srcaddr->member.scope = 0;
        srcaddr->member.addr.name.name.type = msg_nametype(hdr);
        srcaddr->member.addr.name.name.instance = TIPC_SKB_CB(skb)->orig_member;
        srcaddr->member.addr.name.domain = 0;
@@ -2973,7 +2974,8 @@ static int tipc_getsockopt(struct socket *sock, int lvl, int opt,
 
 static int tipc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 {
-       struct sock *sk = sock->sk;
+       struct net *net = sock_net(sock->sk);
+       struct tipc_sioc_nodeid_req nr = {0};
        struct tipc_sioc_ln_req lnr;
        void __user *argp = (void __user *)arg;
 
@@ -2981,7 +2983,7 @@ static int tipc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
        case SIOCGETLINKNAME:
                if (copy_from_user(&lnr, argp, sizeof(lnr)))
                        return -EFAULT;
-               if (!tipc_node_get_linkname(sock_net(sk),
+               if (!tipc_node_get_linkname(net,
                                            lnr.bearer_id & 0xffff, lnr.peer,
                                            lnr.linkname, TIPC_MAX_LINK_NAME)) {
                        if (copy_to_user(argp, &lnr, sizeof(lnr)))
@@ -2989,6 +2991,14 @@ static int tipc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
                        return 0;
                }
                return -EADDRNOTAVAIL;
+       case SIOCGETNODEID:
+               if (copy_from_user(&nr, argp, sizeof(nr)))
+                       return -EFAULT;
+               if (!tipc_node_get_id(net, nr.peer, nr.node_id))
+                       return -EADDRNOTAVAIL;
+               if (copy_to_user(argp, &nr, sizeof(nr)))
+                       return -EFAULT;
+               return 0;
        default:
                return -ENOIOCTLCMD;
        }
index 89b8745a986f06c00bd35f7a4fc6fff47c25120a..73f05ece53d0c955df2bac6d043e1e354d8fb7dc 100644 (file)
@@ -14,3 +14,13 @@ config TLS
        encryption handling of the TLS protocol to be done in-kernel.
 
        If unsure, say N.
+
+config TLS_DEVICE
+       bool "Transport Layer Security HW offload"
+       depends on TLS
+       select SOCK_VALIDATE_XMIT
+       default n
+       help
+       Enable kernel support for HW offload of the TLS protocol.
+
+       If unsure, say N.
index a930fd1c4f7b88651d5686355cbea8ba55942941..4d6b728a67d0c122a41c1c0d5d5840598ba56472 100644 (file)
@@ -5,3 +5,5 @@
 obj-$(CONFIG_TLS) += tls.o
 
 tls-y := tls_main.o tls_sw.o
+
+tls-$(CONFIG_TLS_DEVICE) += tls_device.o tls_device_fallback.o
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
new file mode 100644 (file)
index 0000000..a7a8f8e
--- /dev/null
@@ -0,0 +1,766 @@
+/* Copyright (c) 2018, Mellanox Technologies All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <crypto/aead.h>
+#include <linux/highmem.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <net/dst.h>
+#include <net/inet_connection_sock.h>
+#include <net/tcp.h>
+#include <net/tls.h>
+
+/* device_offload_lock is used to synchronize tls_dev_add
+ * against NETDEV_DOWN notifications.
+ */
+static DECLARE_RWSEM(device_offload_lock);
+
+static void tls_device_gc_task(struct work_struct *work);
+
+static DECLARE_WORK(tls_device_gc_work, tls_device_gc_task);
+static LIST_HEAD(tls_device_gc_list);
+static LIST_HEAD(tls_device_list);
+static DEFINE_SPINLOCK(tls_device_lock);
+
+static void tls_device_free_ctx(struct tls_context *ctx)
+{
+       struct tls_offload_context *offload_ctx = tls_offload_ctx(ctx);
+
+       kfree(offload_ctx);
+       kfree(ctx);
+}
+
+static void tls_device_gc_task(struct work_struct *work)
+{
+       struct tls_context *ctx, *tmp;
+       unsigned long flags;
+       LIST_HEAD(gc_list);
+
+       spin_lock_irqsave(&tls_device_lock, flags);
+       list_splice_init(&tls_device_gc_list, &gc_list);
+       spin_unlock_irqrestore(&tls_device_lock, flags);
+
+       list_for_each_entry_safe(ctx, tmp, &gc_list, list) {
+               struct net_device *netdev = ctx->netdev;
+
+               if (netdev) {
+                       netdev->tlsdev_ops->tls_dev_del(netdev, ctx,
+                                                       TLS_OFFLOAD_CTX_DIR_TX);
+                       dev_put(netdev);
+               }
+
+               list_del(&ctx->list);
+               tls_device_free_ctx(ctx);
+       }
+}
+
+static void tls_device_queue_ctx_destruction(struct tls_context *ctx)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&tls_device_lock, flags);
+       list_move_tail(&ctx->list, &tls_device_gc_list);
+
+       /* schedule_work inside the spinlock
+        * to make sure tls_device_down waits for that work.
+        */
+       schedule_work(&tls_device_gc_work);
+
+       spin_unlock_irqrestore(&tls_device_lock, flags);
+}
+
+/* We assume that the socket is already connected */
+static struct net_device *get_netdev_for_sock(struct sock *sk)
+{
+       struct dst_entry *dst = sk_dst_get(sk);
+       struct net_device *netdev = NULL;
+
+       if (likely(dst)) {
+               netdev = dst->dev;
+               dev_hold(netdev);
+       }
+
+       dst_release(dst);
+
+       return netdev;
+}
+
+static void destroy_record(struct tls_record_info *record)
+{
+       int nr_frags = record->num_frags;
+       skb_frag_t *frag;
+
+       while (nr_frags-- > 0) {
+               frag = &record->frags[nr_frags];
+               __skb_frag_unref(frag);
+       }
+       kfree(record);
+}
+
+static void delete_all_records(struct tls_offload_context *offload_ctx)
+{
+       struct tls_record_info *info, *temp;
+
+       list_for_each_entry_safe(info, temp, &offload_ctx->records_list, list) {
+               list_del(&info->list);
+               destroy_record(info);
+       }
+
+       offload_ctx->retransmit_hint = NULL;
+}
+
+static void tls_icsk_clean_acked(struct sock *sk, u32 acked_seq)
+{
+       struct tls_context *tls_ctx = tls_get_ctx(sk);
+       struct tls_record_info *info, *temp;
+       struct tls_offload_context *ctx;
+       u64 deleted_records = 0;
+       unsigned long flags;
+
+       if (!tls_ctx)
+               return;
+
+       ctx = tls_offload_ctx(tls_ctx);
+
+       spin_lock_irqsave(&ctx->lock, flags);
+       info = ctx->retransmit_hint;
+       if (info && !before(acked_seq, info->end_seq)) {
+               ctx->retransmit_hint = NULL;
+               list_del(&info->list);
+               destroy_record(info);
+               deleted_records++;
+       }
+
+       list_for_each_entry_safe(info, temp, &ctx->records_list, list) {
+               if (before(acked_seq, info->end_seq))
+                       break;
+               list_del(&info->list);
+
+               destroy_record(info);
+               deleted_records++;
+       }
+
+       ctx->unacked_record_sn += deleted_records;
+       spin_unlock_irqrestore(&ctx->lock, flags);
+}
+
+/* At this point, there should be no references on this
+ * socket and no in-flight SKBs associated with this
+ * socket, so it is safe to free all the resources.
+ */
+void tls_device_sk_destruct(struct sock *sk)
+{
+       struct tls_context *tls_ctx = tls_get_ctx(sk);
+       struct tls_offload_context *ctx = tls_offload_ctx(tls_ctx);
+
+       if (ctx->open_record)
+               destroy_record(ctx->open_record);
+
+       delete_all_records(ctx);
+       crypto_free_aead(ctx->aead_send);
+       ctx->sk_destruct(sk);
+       clean_acked_data_disable(inet_csk(sk));
+
+       if (refcount_dec_and_test(&tls_ctx->refcount))
+               tls_device_queue_ctx_destruction(tls_ctx);
+}
+EXPORT_SYMBOL(tls_device_sk_destruct);
+
+static void tls_append_frag(struct tls_record_info *record,
+                           struct page_frag *pfrag,
+                           int size)
+{
+       skb_frag_t *frag;
+
+       frag = &record->frags[record->num_frags - 1];
+       if (frag->page.p == pfrag->page &&
+           frag->page_offset + frag->size == pfrag->offset) {
+               frag->size += size;
+       } else {
+               ++frag;
+               frag->page.p = pfrag->page;
+               frag->page_offset = pfrag->offset;
+               frag->size = size;
+               ++record->num_frags;
+               get_page(pfrag->page);
+       }
+
+       pfrag->offset += size;
+       record->len += size;
+}
+
+static int tls_push_record(struct sock *sk,
+                          struct tls_context *ctx,
+                          struct tls_offload_context *offload_ctx,
+                          struct tls_record_info *record,
+                          struct page_frag *pfrag,
+                          int flags,
+                          unsigned char record_type)
+{
+       struct tcp_sock *tp = tcp_sk(sk);
+       struct page_frag dummy_tag_frag;
+       skb_frag_t *frag;
+       int i;
+
+       /* fill prepend */
+       frag = &record->frags[0];
+       tls_fill_prepend(ctx,
+                        skb_frag_address(frag),
+                        record->len - ctx->tx.prepend_size,
+                        record_type);
+
+       /* HW doesn't care about the data in the tag, because it fills it. */
+       dummy_tag_frag.page = skb_frag_page(frag);
+       dummy_tag_frag.offset = 0;
+
+       tls_append_frag(record, &dummy_tag_frag, ctx->tx.tag_size);
+       record->end_seq = tp->write_seq + record->len;
+       spin_lock_irq(&offload_ctx->lock);
+       list_add_tail(&record->list, &offload_ctx->records_list);
+       spin_unlock_irq(&offload_ctx->lock);
+       offload_ctx->open_record = NULL;
+       set_bit(TLS_PENDING_CLOSED_RECORD, &ctx->flags);
+       tls_advance_record_sn(sk, &ctx->tx);
+
+       for (i = 0; i < record->num_frags; i++) {
+               frag = &record->frags[i];
+               sg_unmark_end(&offload_ctx->sg_tx_data[i]);
+               sg_set_page(&offload_ctx->sg_tx_data[i], skb_frag_page(frag),
+                           frag->size, frag->page_offset);
+               sk_mem_charge(sk, frag->size);
+               get_page(skb_frag_page(frag));
+       }
+       sg_mark_end(&offload_ctx->sg_tx_data[record->num_frags - 1]);
+
+       /* all ready, send */
+       return tls_push_sg(sk, ctx, offload_ctx->sg_tx_data, 0, flags);
+}
+
+static int tls_create_new_record(struct tls_offload_context *offload_ctx,
+                                struct page_frag *pfrag,
+                                size_t prepend_size)
+{
+       struct tls_record_info *record;
+       skb_frag_t *frag;
+
+       record = kmalloc(sizeof(*record), GFP_KERNEL);
+       if (!record)
+               return -ENOMEM;
+
+       frag = &record->frags[0];
+       __skb_frag_set_page(frag, pfrag->page);
+       frag->page_offset = pfrag->offset;
+       skb_frag_size_set(frag, prepend_size);
+
+       get_page(pfrag->page);
+       pfrag->offset += prepend_size;
+
+       record->num_frags = 1;
+       record->len = prepend_size;
+       offload_ctx->open_record = record;
+       return 0;
+}
+
+static int tls_do_allocation(struct sock *sk,
+                            struct tls_offload_context *offload_ctx,
+                            struct page_frag *pfrag,
+                            size_t prepend_size)
+{
+       int ret;
+
+       if (!offload_ctx->open_record) {
+               if (unlikely(!skb_page_frag_refill(prepend_size, pfrag,
+                                                  sk->sk_allocation))) {
+                       sk->sk_prot->enter_memory_pressure(sk);
+                       sk_stream_moderate_sndbuf(sk);
+                       return -ENOMEM;
+               }
+
+               ret = tls_create_new_record(offload_ctx, pfrag, prepend_size);
+               if (ret)
+                       return ret;
+
+               if (pfrag->size > pfrag->offset)
+                       return 0;
+       }
+
+       if (!sk_page_frag_refill(sk, pfrag))
+               return -ENOMEM;
+
+       return 0;
+}
+
+static int tls_push_data(struct sock *sk,
+                        struct iov_iter *msg_iter,
+                        size_t size, int flags,
+                        unsigned char record_type)
+{
+       struct tls_context *tls_ctx = tls_get_ctx(sk);
+       struct tls_offload_context *ctx = tls_offload_ctx(tls_ctx);
+       int tls_push_record_flags = flags | MSG_SENDPAGE_NOTLAST;
+       int more = flags & (MSG_SENDPAGE_NOTLAST | MSG_MORE);
+       struct tls_record_info *record = ctx->open_record;
+       struct page_frag *pfrag;
+       size_t orig_size = size;
+       u32 max_open_record_len;
+       int copy, rc = 0;
+       bool done = false;
+       long timeo;
+
+       if (flags &
+           ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | MSG_SENDPAGE_NOTLAST))
+               return -ENOTSUPP;
+
+       if (sk->sk_err)
+               return -sk->sk_err;
+
+       timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
+       rc = tls_complete_pending_work(sk, tls_ctx, flags, &timeo);
+       if (rc < 0)
+               return rc;
+
+       pfrag = sk_page_frag(sk);
+
+       /* TLS_HEADER_SIZE is not counted as part of the TLS record, and
+        * we need to leave room for an authentication tag.
+        */
+       max_open_record_len = TLS_MAX_PAYLOAD_SIZE +
+                             tls_ctx->tx.prepend_size;
+       do {
+               rc = tls_do_allocation(sk, ctx, pfrag,
+                                      tls_ctx->tx.prepend_size);
+               if (rc) {
+                       rc = sk_stream_wait_memory(sk, &timeo);
+                       if (!rc)
+                               continue;
+
+                       record = ctx->open_record;
+                       if (!record)
+                               break;
+handle_error:
+                       if (record_type != TLS_RECORD_TYPE_DATA) {
+                               /* avoid sending partial
+                                * record with type !=
+                                * application_data
+                                */
+                               size = orig_size;
+                               destroy_record(record);
+                               ctx->open_record = NULL;
+                       } else if (record->len > tls_ctx->tx.prepend_size) {
+                               goto last_record;
+                       }
+
+                       break;
+               }
+
+               record = ctx->open_record;
+               copy = min_t(size_t, size, (pfrag->size - pfrag->offset));
+               copy = min_t(size_t, copy, (max_open_record_len - record->len));
+
+               if (copy_from_iter_nocache(page_address(pfrag->page) +
+                                              pfrag->offset,
+                                          copy, msg_iter) != copy) {
+                       rc = -EFAULT;
+                       goto handle_error;
+               }
+               tls_append_frag(record, pfrag, copy);
+
+               size -= copy;
+               if (!size) {
+last_record:
+                       tls_push_record_flags = flags;
+                       if (more) {
+                               tls_ctx->pending_open_record_frags =
+                                               record->num_frags;
+                               break;
+                       }
+
+                       done = true;
+               }
+
+               if (done || record->len >= max_open_record_len ||
+                   (record->num_frags >= MAX_SKB_FRAGS - 1)) {
+                       rc = tls_push_record(sk,
+                                            tls_ctx,
+                                            ctx,
+                                            record,
+                                            pfrag,
+                                            tls_push_record_flags,
+                                            record_type);
+                       if (rc < 0)
+                               break;
+               }
+       } while (!done);
+
+       if (orig_size - size > 0)
+               rc = orig_size - size;
+
+       return rc;
+}
+
+int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
+{
+       unsigned char record_type = TLS_RECORD_TYPE_DATA;
+       int rc;
+
+       lock_sock(sk);
+
+       if (unlikely(msg->msg_controllen)) {
+               rc = tls_proccess_cmsg(sk, msg, &record_type);
+               if (rc)
+                       goto out;
+       }
+
+       rc = tls_push_data(sk, &msg->msg_iter, size,
+                          msg->msg_flags, record_type);
+
+out:
+       release_sock(sk);
+       return rc;
+}
+
+int tls_device_sendpage(struct sock *sk, struct page *page,
+                       int offset, size_t size, int flags)
+{
+       struct iov_iter msg_iter;
+       char *kaddr = kmap(page);
+       struct kvec iov;
+       int rc;
+
+       if (flags & MSG_SENDPAGE_NOTLAST)
+               flags |= MSG_MORE;
+
+       lock_sock(sk);
+
+       if (flags & MSG_OOB) {
+               rc = -ENOTSUPP;
+               goto out;
+       }
+
+       iov.iov_base = kaddr + offset;
+       iov.iov_len = size;
+       iov_iter_kvec(&msg_iter, WRITE | ITER_KVEC, &iov, 1, size);
+       rc = tls_push_data(sk, &msg_iter, size,
+                          flags, TLS_RECORD_TYPE_DATA);
+       kunmap(page);
+
+out:
+       release_sock(sk);
+       return rc;
+}
+
+struct tls_record_info *tls_get_record(struct tls_offload_context *context,
+                                      u32 seq, u64 *p_record_sn)
+{
+       u64 record_sn = context->hint_record_sn;
+       struct tls_record_info *info;
+
+       info = context->retransmit_hint;
+       if (!info ||
+           before(seq, info->end_seq - info->len)) {
+               /* if retransmit_hint is irrelevant start
+                * from the beggining of the list
+                */
+               info = list_first_entry(&context->records_list,
+                                       struct tls_record_info, list);
+               record_sn = context->unacked_record_sn;
+       }
+
+       list_for_each_entry_from(info, &context->records_list, list) {
+               if (before(seq, info->end_seq)) {
+                       if (!context->retransmit_hint ||
+                           after(info->end_seq,
+                                 context->retransmit_hint->end_seq)) {
+                               context->hint_record_sn = record_sn;
+                               context->retransmit_hint = info;
+                       }
+                       *p_record_sn = record_sn;
+                       return info;
+               }
+               record_sn++;
+       }
+
+       return NULL;
+}
+EXPORT_SYMBOL(tls_get_record);
+
+static int tls_device_push_pending_record(struct sock *sk, int flags)
+{
+       struct iov_iter msg_iter;
+
+       iov_iter_kvec(&msg_iter, WRITE | ITER_KVEC, NULL, 0, 0);
+       return tls_push_data(sk, &msg_iter, 0, flags, TLS_RECORD_TYPE_DATA);
+}
+
+int tls_set_device_offload(struct sock *sk, struct tls_context *ctx)
+{
+       u16 nonce_size, tag_size, iv_size, rec_seq_size;
+       struct tls_record_info *start_marker_record;
+       struct tls_offload_context *offload_ctx;
+       struct tls_crypto_info *crypto_info;
+       struct net_device *netdev;
+       char *iv, *rec_seq;
+       struct sk_buff *skb;
+       int rc = -EINVAL;
+       __be64 rcd_sn;
+
+       if (!ctx)
+               goto out;
+
+       if (ctx->priv_ctx_tx) {
+               rc = -EEXIST;
+               goto out;
+       }
+
+       start_marker_record = kmalloc(sizeof(*start_marker_record), GFP_KERNEL);
+       if (!start_marker_record) {
+               rc = -ENOMEM;
+               goto out;
+       }
+
+       offload_ctx = kzalloc(TLS_OFFLOAD_CONTEXT_SIZE, GFP_KERNEL);
+       if (!offload_ctx) {
+               rc = -ENOMEM;
+               goto free_marker_record;
+       }
+
+       crypto_info = &ctx->crypto_send;
+       switch (crypto_info->cipher_type) {
+       case TLS_CIPHER_AES_GCM_128:
+               nonce_size = TLS_CIPHER_AES_GCM_128_IV_SIZE;
+               tag_size = TLS_CIPHER_AES_GCM_128_TAG_SIZE;
+               iv_size = TLS_CIPHER_AES_GCM_128_IV_SIZE;
+               iv = ((struct tls12_crypto_info_aes_gcm_128 *)crypto_info)->iv;
+               rec_seq_size = TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE;
+               rec_seq =
+                ((struct tls12_crypto_info_aes_gcm_128 *)crypto_info)->rec_seq;
+               break;
+       default:
+               rc = -EINVAL;
+               goto free_offload_ctx;
+       }
+
+       ctx->tx.prepend_size = TLS_HEADER_SIZE + nonce_size;
+       ctx->tx.tag_size = tag_size;
+       ctx->tx.overhead_size = ctx->tx.prepend_size + ctx->tx.tag_size;
+       ctx->tx.iv_size = iv_size;
+       ctx->tx.iv = kmalloc(iv_size + TLS_CIPHER_AES_GCM_128_SALT_SIZE,
+                            GFP_KERNEL);
+       if (!ctx->tx.iv) {
+               rc = -ENOMEM;
+               goto free_offload_ctx;
+       }
+
+       memcpy(ctx->tx.iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, iv, iv_size);
+
+       ctx->tx.rec_seq_size = rec_seq_size;
+       ctx->tx.rec_seq = kmalloc(rec_seq_size, GFP_KERNEL);
+       if (!ctx->tx.rec_seq) {
+               rc = -ENOMEM;
+               goto free_iv;
+       }
+       memcpy(ctx->tx.rec_seq, rec_seq, rec_seq_size);
+
+       rc = tls_sw_fallback_init(sk, offload_ctx, crypto_info);
+       if (rc)
+               goto free_rec_seq;
+
+       /* start at rec_seq - 1 to account for the start marker record */
+       memcpy(&rcd_sn, ctx->tx.rec_seq, sizeof(rcd_sn));
+       offload_ctx->unacked_record_sn = be64_to_cpu(rcd_sn) - 1;
+
+       start_marker_record->end_seq = tcp_sk(sk)->write_seq;
+       start_marker_record->len = 0;
+       start_marker_record->num_frags = 0;
+
+       INIT_LIST_HEAD(&offload_ctx->records_list);
+       list_add_tail(&start_marker_record->list, &offload_ctx->records_list);
+       spin_lock_init(&offload_ctx->lock);
+       sg_init_table(offload_ctx->sg_tx_data,
+                     ARRAY_SIZE(offload_ctx->sg_tx_data));
+
+       clean_acked_data_enable(inet_csk(sk), &tls_icsk_clean_acked);
+       ctx->push_pending_record = tls_device_push_pending_record;
+       offload_ctx->sk_destruct = sk->sk_destruct;
+
+       /* TLS offload is greatly simplified if we don't send
+        * SKBs where only part of the payload needs to be encrypted.
+        * So mark the last skb in the write queue as end of record.
+        */
+       skb = tcp_write_queue_tail(sk);
+       if (skb)
+               TCP_SKB_CB(skb)->eor = 1;
+
+       refcount_set(&ctx->refcount, 1);
+
+       /* We support starting offload on multiple sockets
+        * concurrently, so we only need a read lock here.
+        * This lock must precede get_netdev_for_sock to prevent races between
+        * NETDEV_DOWN and setsockopt.
+        */
+       down_read(&device_offload_lock);
+       netdev = get_netdev_for_sock(sk);
+       if (!netdev) {
+               pr_err_ratelimited("%s: netdev not found\n", __func__);
+               rc = -EINVAL;
+               goto release_lock;
+       }
+
+       if (!(netdev->features & NETIF_F_HW_TLS_TX)) {
+               rc = -ENOTSUPP;
+               goto release_netdev;
+       }
+
+       /* Avoid offloading if the device is down
+        * We don't want to offload new flows after
+        * the NETDEV_DOWN event
+        */
+       if (!(netdev->flags & IFF_UP)) {
+               rc = -EINVAL;
+               goto release_netdev;
+       }
+
+       ctx->priv_ctx_tx = offload_ctx;
+       rc = netdev->tlsdev_ops->tls_dev_add(netdev, sk, TLS_OFFLOAD_CTX_DIR_TX,
+                                            &ctx->crypto_send,
+                                            tcp_sk(sk)->write_seq);
+       if (rc)
+               goto release_netdev;
+
+       ctx->netdev = netdev;
+
+       spin_lock_irq(&tls_device_lock);
+       list_add_tail(&ctx->list, &tls_device_list);
+       spin_unlock_irq(&tls_device_lock);
+
+       sk->sk_validate_xmit_skb = tls_validate_xmit_skb;
+       /* following this assignment tls_is_sk_tx_device_offloaded
+        * will return true and the context might be accessed
+        * by the netdev's xmit function.
+        */
+       smp_store_release(&sk->sk_destruct,
+                         &tls_device_sk_destruct);
+       up_read(&device_offload_lock);
+       goto out;
+
+release_netdev:
+       dev_put(netdev);
+release_lock:
+       up_read(&device_offload_lock);
+       clean_acked_data_disable(inet_csk(sk));
+       crypto_free_aead(offload_ctx->aead_send);
+free_rec_seq:
+       kfree(ctx->tx.rec_seq);
+free_iv:
+       kfree(ctx->tx.iv);
+free_offload_ctx:
+       kfree(offload_ctx);
+       ctx->priv_ctx_tx = NULL;
+free_marker_record:
+       kfree(start_marker_record);
+out:
+       return rc;
+}
+
+static int tls_device_down(struct net_device *netdev)
+{
+       struct tls_context *ctx, *tmp;
+       unsigned long flags;
+       LIST_HEAD(list);
+
+       /* Request a write lock to block new offload attempts */
+       down_write(&device_offload_lock);
+
+       spin_lock_irqsave(&tls_device_lock, flags);
+       list_for_each_entry_safe(ctx, tmp, &tls_device_list, list) {
+               if (ctx->netdev != netdev ||
+                   !refcount_inc_not_zero(&ctx->refcount))
+                       continue;
+
+               list_move(&ctx->list, &list);
+       }
+       spin_unlock_irqrestore(&tls_device_lock, flags);
+
+       list_for_each_entry_safe(ctx, tmp, &list, list) {
+               netdev->tlsdev_ops->tls_dev_del(netdev, ctx,
+                                               TLS_OFFLOAD_CTX_DIR_TX);
+               ctx->netdev = NULL;
+               dev_put(netdev);
+               list_del_init(&ctx->list);
+
+               if (refcount_dec_and_test(&ctx->refcount))
+                       tls_device_free_ctx(ctx);
+       }
+
+       up_write(&device_offload_lock);
+
+       flush_work(&tls_device_gc_work);
+
+       return NOTIFY_DONE;
+}
+
+static int tls_dev_event(struct notifier_block *this, unsigned long event,
+                        void *ptr)
+{
+       struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+
+       if (!(dev->features & NETIF_F_HW_TLS_TX))
+               return NOTIFY_DONE;
+
+       switch (event) {
+       case NETDEV_REGISTER:
+       case NETDEV_FEAT_CHANGE:
+               if  (dev->tlsdev_ops &&
+                    dev->tlsdev_ops->tls_dev_add &&
+                    dev->tlsdev_ops->tls_dev_del)
+                       return NOTIFY_DONE;
+               else
+                       return NOTIFY_BAD;
+       case NETDEV_DOWN:
+               return tls_device_down(dev);
+       }
+       return NOTIFY_DONE;
+}
+
+static struct notifier_block tls_dev_notifier = {
+       .notifier_call  = tls_dev_event,
+};
+
+void __init tls_device_init(void)
+{
+       register_netdevice_notifier(&tls_dev_notifier);
+}
+
+void __exit tls_device_cleanup(void)
+{
+       unregister_netdevice_notifier(&tls_dev_notifier);
+       flush_work(&tls_device_gc_work);
+}
diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c
new file mode 100644 (file)
index 0000000..748914a
--- /dev/null
@@ -0,0 +1,450 @@
+/* Copyright (c) 2018, Mellanox Technologies All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <net/tls.h>
+#include <crypto/aead.h>
+#include <crypto/scatterwalk.h>
+#include <net/ip6_checksum.h>
+
+static void chain_to_walk(struct scatterlist *sg, struct scatter_walk *walk)
+{
+       struct scatterlist *src = walk->sg;
+       int diff = walk->offset - src->offset;
+
+       sg_set_page(sg, sg_page(src),
+                   src->length - diff, walk->offset);
+
+       scatterwalk_crypto_chain(sg, sg_next(src), 0, 2);
+}
+
+static int tls_enc_record(struct aead_request *aead_req,
+                         struct crypto_aead *aead, char *aad,
+                         char *iv, __be64 rcd_sn,
+                         struct scatter_walk *in,
+                         struct scatter_walk *out, int *in_len)
+{
+       unsigned char buf[TLS_HEADER_SIZE + TLS_CIPHER_AES_GCM_128_IV_SIZE];
+       struct scatterlist sg_in[3];
+       struct scatterlist sg_out[3];
+       u16 len;
+       int rc;
+
+       len = min_t(int, *in_len, ARRAY_SIZE(buf));
+
+       scatterwalk_copychunks(buf, in, len, 0);
+       scatterwalk_copychunks(buf, out, len, 1);
+
+       *in_len -= len;
+       if (!*in_len)
+               return 0;
+
+       scatterwalk_pagedone(in, 0, 1);
+       scatterwalk_pagedone(out, 1, 1);
+
+       len = buf[4] | (buf[3] << 8);
+       len -= TLS_CIPHER_AES_GCM_128_IV_SIZE;
+
+       tls_make_aad(aad, len - TLS_CIPHER_AES_GCM_128_TAG_SIZE,
+                    (char *)&rcd_sn, sizeof(rcd_sn), buf[0]);
+
+       memcpy(iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, buf + TLS_HEADER_SIZE,
+              TLS_CIPHER_AES_GCM_128_IV_SIZE);
+
+       sg_init_table(sg_in, ARRAY_SIZE(sg_in));
+       sg_init_table(sg_out, ARRAY_SIZE(sg_out));
+       sg_set_buf(sg_in, aad, TLS_AAD_SPACE_SIZE);
+       sg_set_buf(sg_out, aad, TLS_AAD_SPACE_SIZE);
+       chain_to_walk(sg_in + 1, in);
+       chain_to_walk(sg_out + 1, out);
+
+       *in_len -= len;
+       if (*in_len < 0) {
+               *in_len += TLS_CIPHER_AES_GCM_128_TAG_SIZE;
+               /* the input buffer doesn't contain the entire record.
+                * trim len accordingly. The resulting authentication tag
+                * will contain garbage, but we don't care, so we won't
+                * include any of it in the output skb
+                * Note that we assume the output buffer length
+                * is larger then input buffer length + tag size
+                */
+               if (*in_len < 0)
+                       len += *in_len;
+
+               *in_len = 0;
+       }
+
+       if (*in_len) {
+               scatterwalk_copychunks(NULL, in, len, 2);
+               scatterwalk_pagedone(in, 0, 1);
+               scatterwalk_copychunks(NULL, out, len, 2);
+               scatterwalk_pagedone(out, 1, 1);
+       }
+
+       len -= TLS_CIPHER_AES_GCM_128_TAG_SIZE;
+       aead_request_set_crypt(aead_req, sg_in, sg_out, len, iv);
+
+       rc = crypto_aead_encrypt(aead_req);
+
+       return rc;
+}
+
+static void tls_init_aead_request(struct aead_request *aead_req,
+                                 struct crypto_aead *aead)
+{
+       aead_request_set_tfm(aead_req, aead);
+       aead_request_set_ad(aead_req, TLS_AAD_SPACE_SIZE);
+}
+
+static struct aead_request *tls_alloc_aead_request(struct crypto_aead *aead,
+                                                  gfp_t flags)
+{
+       unsigned int req_size = sizeof(struct aead_request) +
+               crypto_aead_reqsize(aead);
+       struct aead_request *aead_req;
+
+       aead_req = kzalloc(req_size, flags);
+       if (aead_req)
+               tls_init_aead_request(aead_req, aead);
+       return aead_req;
+}
+
+static int tls_enc_records(struct aead_request *aead_req,
+                          struct crypto_aead *aead, struct scatterlist *sg_in,
+                          struct scatterlist *sg_out, char *aad, char *iv,
+                          u64 rcd_sn, int len)
+{
+       struct scatter_walk out, in;
+       int rc;
+
+       scatterwalk_start(&in, sg_in);
+       scatterwalk_start(&out, sg_out);
+
+       do {
+               rc = tls_enc_record(aead_req, aead, aad, iv,
+                                   cpu_to_be64(rcd_sn), &in, &out, &len);
+               rcd_sn++;
+
+       } while (rc == 0 && len);
+
+       scatterwalk_done(&in, 0, 0);
+       scatterwalk_done(&out, 1, 0);
+
+       return rc;
+}
+
+/* Can't use icsk->icsk_af_ops->send_check here because the ip addresses
+ * might have been changed by NAT.
+ */
+static void update_chksum(struct sk_buff *skb, int headln)
+{
+       struct tcphdr *th = tcp_hdr(skb);
+       int datalen = skb->len - headln;
+       const struct ipv6hdr *ipv6h;
+       const struct iphdr *iph;
+
+       /* We only changed the payload so if we are using partial we don't
+        * need to update anything.
+        */
+       if (likely(skb->ip_summed == CHECKSUM_PARTIAL))
+               return;
+
+       skb->ip_summed = CHECKSUM_PARTIAL;
+       skb->csum_start = skb_transport_header(skb) - skb->head;
+       skb->csum_offset = offsetof(struct tcphdr, check);
+
+       if (skb->sk->sk_family == AF_INET6) {
+               ipv6h = ipv6_hdr(skb);
+               th->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
+                                            datalen, IPPROTO_TCP, 0);
+       } else {
+               iph = ip_hdr(skb);
+               th->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, datalen,
+                                              IPPROTO_TCP, 0);
+       }
+}
+
+static void complete_skb(struct sk_buff *nskb, struct sk_buff *skb, int headln)
+{
+       skb_copy_header(nskb, skb);
+
+       skb_put(nskb, skb->len);
+       memcpy(nskb->data, skb->data, headln);
+       update_chksum(nskb, headln);
+
+       nskb->destructor = skb->destructor;
+       nskb->sk = skb->sk;
+       skb->destructor = NULL;
+       skb->sk = NULL;
+       refcount_add(nskb->truesize - skb->truesize,
+                    &nskb->sk->sk_wmem_alloc);
+}
+
+/* This function may be called after the user socket is already
+ * closed so make sure we don't use anything freed during
+ * tls_sk_proto_close here
+ */
+
+static int fill_sg_in(struct scatterlist *sg_in,
+                     struct sk_buff *skb,
+                     struct tls_offload_context *ctx,
+                     u64 *rcd_sn,
+                     s32 *sync_size,
+                     int *resync_sgs)
+{
+       int tcp_payload_offset = skb_transport_offset(skb) + tcp_hdrlen(skb);
+       int payload_len = skb->len - tcp_payload_offset;
+       u32 tcp_seq = ntohl(tcp_hdr(skb)->seq);
+       struct tls_record_info *record;
+       unsigned long flags;
+       int remaining;
+       int i;
+
+       spin_lock_irqsave(&ctx->lock, flags);
+       record = tls_get_record(ctx, tcp_seq, rcd_sn);
+       if (!record) {
+               spin_unlock_irqrestore(&ctx->lock, flags);
+               WARN(1, "Record not found for seq %u\n", tcp_seq);
+               return -EINVAL;
+       }
+
+       *sync_size = tcp_seq - tls_record_start_seq(record);
+       if (*sync_size < 0) {
+               int is_start_marker = tls_record_is_start_marker(record);
+
+               spin_unlock_irqrestore(&ctx->lock, flags);
+               /* This should only occur if the relevant record was
+                * already acked. In that case it should be ok
+                * to drop the packet and avoid retransmission.
+                *
+                * There is a corner case where the packet contains
+                * both an acked and a non-acked record.
+                * We currently don't handle that case and rely
+                * on TCP to retranmit a packet that doesn't contain
+                * already acked payload.
+                */
+               if (!is_start_marker)
+                       *sync_size = 0;
+               return -EINVAL;
+       }
+
+       remaining = *sync_size;
+       for (i = 0; remaining > 0; i++) {
+               skb_frag_t *frag = &record->frags[i];
+
+               __skb_frag_ref(frag);
+               sg_set_page(sg_in + i, skb_frag_page(frag),
+                           skb_frag_size(frag), frag->page_offset);
+
+               remaining -= skb_frag_size(frag);
+
+               if (remaining < 0)
+                       sg_in[i].length += remaining;
+       }
+       *resync_sgs = i;
+
+       spin_unlock_irqrestore(&ctx->lock, flags);
+       if (skb_to_sgvec(skb, &sg_in[i], tcp_payload_offset, payload_len) < 0)
+               return -EINVAL;
+
+       return 0;
+}
+
+static void fill_sg_out(struct scatterlist sg_out[3], void *buf,
+                       struct tls_context *tls_ctx,
+                       struct sk_buff *nskb,
+                       int tcp_payload_offset,
+                       int payload_len,
+                       int sync_size,
+                       void *dummy_buf)
+{
+       sg_set_buf(&sg_out[0], dummy_buf, sync_size);
+       sg_set_buf(&sg_out[1], nskb->data + tcp_payload_offset, payload_len);
+       /* Add room for authentication tag produced by crypto */
+       dummy_buf += sync_size;
+       sg_set_buf(&sg_out[2], dummy_buf, TLS_CIPHER_AES_GCM_128_TAG_SIZE);
+}
+
+static struct sk_buff *tls_enc_skb(struct tls_context *tls_ctx,
+                                  struct scatterlist sg_out[3],
+                                  struct scatterlist *sg_in,
+                                  struct sk_buff *skb,
+                                  s32 sync_size, u64 rcd_sn)
+{
+       int tcp_payload_offset = skb_transport_offset(skb) + tcp_hdrlen(skb);
+       struct tls_offload_context *ctx = tls_offload_ctx(tls_ctx);
+       int payload_len = skb->len - tcp_payload_offset;
+       void *buf, *iv, *aad, *dummy_buf;
+       struct aead_request *aead_req;
+       struct sk_buff *nskb = NULL;
+       int buf_len;
+
+       aead_req = tls_alloc_aead_request(ctx->aead_send, GFP_ATOMIC);
+       if (!aead_req)
+               return NULL;
+
+       buf_len = TLS_CIPHER_AES_GCM_128_SALT_SIZE +
+                 TLS_CIPHER_AES_GCM_128_IV_SIZE +
+                 TLS_AAD_SPACE_SIZE +
+                 sync_size +
+                 TLS_CIPHER_AES_GCM_128_TAG_SIZE;
+       buf = kmalloc(buf_len, GFP_ATOMIC);
+       if (!buf)
+               goto free_req;
+
+       iv = buf;
+       memcpy(iv, tls_ctx->crypto_send_aes_gcm_128.salt,
+              TLS_CIPHER_AES_GCM_128_SALT_SIZE);
+       aad = buf + TLS_CIPHER_AES_GCM_128_SALT_SIZE +
+             TLS_CIPHER_AES_GCM_128_IV_SIZE;
+       dummy_buf = aad + TLS_AAD_SPACE_SIZE;
+
+       nskb = alloc_skb(skb_headroom(skb) + skb->len, GFP_ATOMIC);
+       if (!nskb)
+               goto free_buf;
+
+       skb_reserve(nskb, skb_headroom(skb));
+
+       fill_sg_out(sg_out, buf, tls_ctx, nskb, tcp_payload_offset,
+                   payload_len, sync_size, dummy_buf);
+
+       if (tls_enc_records(aead_req, ctx->aead_send, sg_in, sg_out, aad, iv,
+                           rcd_sn, sync_size + payload_len) < 0)
+               goto free_nskb;
+
+       complete_skb(nskb, skb, tcp_payload_offset);
+
+       /* validate_xmit_skb_list assumes that if the skb wasn't segmented
+        * nskb->prev will point to the skb itself
+        */
+       nskb->prev = nskb;
+
+free_buf:
+       kfree(buf);
+free_req:
+       kfree(aead_req);
+       return nskb;
+free_nskb:
+       kfree_skb(nskb);
+       nskb = NULL;
+       goto free_buf;
+}
+
+static struct sk_buff *tls_sw_fallback(struct sock *sk, struct sk_buff *skb)
+{
+       int tcp_payload_offset = skb_transport_offset(skb) + tcp_hdrlen(skb);
+       struct tls_context *tls_ctx = tls_get_ctx(sk);
+       struct tls_offload_context *ctx = tls_offload_ctx(tls_ctx);
+       int payload_len = skb->len - tcp_payload_offset;
+       struct scatterlist *sg_in, sg_out[3];
+       struct sk_buff *nskb = NULL;
+       int sg_in_max_elements;
+       int resync_sgs = 0;
+       s32 sync_size = 0;
+       u64 rcd_sn;
+
+       /* worst case is:
+        * MAX_SKB_FRAGS in tls_record_info
+        * MAX_SKB_FRAGS + 1 in SKB head and frags.
+        */
+       sg_in_max_elements = 2 * MAX_SKB_FRAGS + 1;
+
+       if (!payload_len)
+               return skb;
+
+       sg_in = kmalloc_array(sg_in_max_elements, sizeof(*sg_in), GFP_ATOMIC);
+       if (!sg_in)
+               goto free_orig;
+
+       sg_init_table(sg_in, sg_in_max_elements);
+       sg_init_table(sg_out, ARRAY_SIZE(sg_out));
+
+       if (fill_sg_in(sg_in, skb, ctx, &rcd_sn, &sync_size, &resync_sgs)) {
+               /* bypass packets before kernel TLS socket option was set */
+               if (sync_size < 0 && payload_len <= -sync_size)
+                       nskb = skb_get(skb);
+               goto put_sg;
+       }
+
+       nskb = tls_enc_skb(tls_ctx, sg_out, sg_in, skb, sync_size, rcd_sn);
+
+put_sg:
+       while (resync_sgs)
+               put_page(sg_page(&sg_in[--resync_sgs]));
+       kfree(sg_in);
+free_orig:
+       kfree_skb(skb);
+       return nskb;
+}
+
+struct sk_buff *tls_validate_xmit_skb(struct sock *sk,
+                                     struct net_device *dev,
+                                     struct sk_buff *skb)
+{
+       if (dev == tls_get_ctx(sk)->netdev)
+               return skb;
+
+       return tls_sw_fallback(sk, skb);
+}
+
+int tls_sw_fallback_init(struct sock *sk,
+                        struct tls_offload_context *offload_ctx,
+                        struct tls_crypto_info *crypto_info)
+{
+       const u8 *key;
+       int rc;
+
+       offload_ctx->aead_send =
+           crypto_alloc_aead("gcm(aes)", 0, CRYPTO_ALG_ASYNC);
+       if (IS_ERR(offload_ctx->aead_send)) {
+               rc = PTR_ERR(offload_ctx->aead_send);
+               pr_err_ratelimited("crypto_alloc_aead failed rc=%d\n", rc);
+               offload_ctx->aead_send = NULL;
+               goto err_out;
+       }
+
+       key = ((struct tls12_crypto_info_aes_gcm_128 *)crypto_info)->key;
+
+       rc = crypto_aead_setkey(offload_ctx->aead_send, key,
+                               TLS_CIPHER_AES_GCM_128_KEY_SIZE);
+       if (rc)
+               goto free_aead;
+
+       rc = crypto_aead_setauthsize(offload_ctx->aead_send,
+                                    TLS_CIPHER_AES_GCM_128_TAG_SIZE);
+       if (rc)
+               goto free_aead;
+
+       return 0;
+free_aead:
+       crypto_free_aead(offload_ctx->aead_send);
+err_out:
+       return rc;
+}
index 0d379970960e6c2a101fbb489237f97f0edf68bc..301f224304698950544088c16518ea2e14ff41a6 100644 (file)
@@ -51,12 +51,12 @@ enum {
        TLSV6,
        TLS_NUM_PROTS,
 };
-
 enum {
        TLS_BASE,
-       TLS_SW_TX,
-       TLS_SW_RX,
-       TLS_SW_RXTX,
+       TLS_SW,
+#ifdef CONFIG_TLS_DEVICE
+       TLS_HW,
+#endif
        TLS_HW_RECORD,
        TLS_NUM_CONFIG,
 };
@@ -65,14 +65,14 @@ static struct proto *saved_tcpv6_prot;
 static DEFINE_MUTEX(tcpv6_prot_mutex);
 static LIST_HEAD(device_list);
 static DEFINE_MUTEX(device_mutex);
-static struct proto tls_prots[TLS_NUM_PROTS][TLS_NUM_CONFIG];
+static struct proto tls_prots[TLS_NUM_PROTS][TLS_NUM_CONFIG][TLS_NUM_CONFIG];
 static struct proto_ops tls_sw_proto_ops;
 
-static inline void update_sk_prot(struct sock *sk, struct tls_context *ctx)
+static void update_sk_prot(struct sock *sk, struct tls_context *ctx)
 {
        int ip_ver = sk->sk_family == AF_INET6 ? TLSV6 : TLSV4;
 
-       sk->sk_prot = &tls_prots[ip_ver][ctx->conf];
+       sk->sk_prot = &tls_prots[ip_ver][ctx->tx_conf][ctx->rx_conf];
 }
 
 int wait_on_pending_writer(struct sock *sk, long *timeo)
@@ -114,6 +114,7 @@ int tls_push_sg(struct sock *sk,
        size = sg->length - offset;
        offset += sg->offset;
 
+       ctx->in_tcp_sendpages = true;
        while (1) {
                if (sg_is_last(sg))
                        sendpage_flags = flags;
@@ -134,6 +135,7 @@ int tls_push_sg(struct sock *sk,
                        offset -= sg->offset;
                        ctx->partially_sent_offset = offset;
                        ctx->partially_sent_record = (void *)sg;
+                       ctx->in_tcp_sendpages = false;
                        return ret;
                }
 
@@ -148,6 +150,8 @@ int tls_push_sg(struct sock *sk,
        }
 
        clear_bit(TLS_PENDING_CLOSED_RECORD, &ctx->flags);
+       ctx->in_tcp_sendpages = false;
+       ctx->sk_write_space(sk);
 
        return 0;
 }
@@ -217,6 +221,10 @@ static void tls_write_space(struct sock *sk)
 {
        struct tls_context *ctx = tls_get_ctx(sk);
 
+       /* We are already sending pages, ignore notification */
+       if (ctx->in_tcp_sendpages)
+               return;
+
        if (!sk->sk_write_pending && tls_is_pending_closed_record(ctx)) {
                gfp_t sk_allocation = sk->sk_allocation;
                int rc;
@@ -241,16 +249,14 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
        struct tls_context *ctx = tls_get_ctx(sk);
        long timeo = sock_sndtimeo(sk, 0);
        void (*sk_proto_close)(struct sock *sk, long timeout);
+       bool free_ctx = false;
 
        lock_sock(sk);
        sk_proto_close = ctx->sk_proto_close;
 
-       if (ctx->conf == TLS_HW_RECORD)
-               goto skip_tx_cleanup;
-
-       if (ctx->conf == TLS_BASE) {
-               kfree(ctx);
-               ctx = NULL;
+       if ((ctx->tx_conf == TLS_HW_RECORD && ctx->rx_conf == TLS_HW_RECORD) ||
+           (ctx->tx_conf == TLS_BASE && ctx->rx_conf == TLS_BASE)) {
+               free_ctx = true;
                goto skip_tx_cleanup;
        }
 
@@ -270,15 +276,26 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
                }
        }
 
-       kfree(ctx->tx.rec_seq);
-       kfree(ctx->tx.iv);
-       kfree(ctx->rx.rec_seq);
-       kfree(ctx->rx.iv);
+       /* We need these for tls_sw_fallback handling of other packets */
+       if (ctx->tx_conf == TLS_SW) {
+               kfree(ctx->tx.rec_seq);
+               kfree(ctx->tx.iv);
+               tls_sw_free_resources_tx(sk);
+       }
+
+       if (ctx->rx_conf == TLS_SW) {
+               kfree(ctx->rx.rec_seq);
+               kfree(ctx->rx.iv);
+               tls_sw_free_resources_rx(sk);
+       }
 
-       if (ctx->conf == TLS_SW_TX ||
-           ctx->conf == TLS_SW_RX ||
-           ctx->conf == TLS_SW_RXTX) {
-               tls_sw_free_resources(sk);
+#ifdef CONFIG_TLS_DEVICE
+       if (ctx->tx_conf != TLS_HW) {
+#else
+       {
+#endif
+               kfree(ctx);
+               ctx = NULL;
        }
 
 skip_tx_cleanup:
@@ -287,7 +304,7 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
        /* free ctx for TLS_HW_RECORD, used by tcp_set_state
         * for sk->sk_prot->unhash [tls_hw_unhash]
         */
-       if (ctx && ctx->conf == TLS_HW_RECORD)
+       if (free_ctx)
                kfree(ctx);
 }
 
@@ -441,25 +458,29 @@ static int do_tls_setsockopt_conf(struct sock *sk, char __user *optval,
                goto err_crypto_info;
        }
 
-       /* currently SW is default, we will have ethtool in future */
        if (tx) {
-               rc = tls_set_sw_offload(sk, ctx, 1);
-               if (ctx->conf == TLS_SW_RX)
-                       conf = TLS_SW_RXTX;
-               else
-                       conf = TLS_SW_TX;
+#ifdef CONFIG_TLS_DEVICE
+               rc = tls_set_device_offload(sk, ctx);
+               conf = TLS_HW;
+               if (rc) {
+#else
+               {
+#endif
+                       rc = tls_set_sw_offload(sk, ctx, 1);
+                       conf = TLS_SW;
+               }
        } else {
                rc = tls_set_sw_offload(sk, ctx, 0);
-               if (ctx->conf == TLS_SW_TX)
-                       conf = TLS_SW_RXTX;
-               else
-                       conf = TLS_SW_RX;
+               conf = TLS_SW;
        }
 
        if (rc)
                goto err_crypto_info;
 
-       ctx->conf = conf;
+       if (tx)
+               ctx->tx_conf = conf;
+       else
+               ctx->rx_conf = conf;
        update_sk_prot(sk, ctx);
        if (tx) {
                ctx->sk_write_space = sk->sk_write_space;
@@ -535,7 +556,8 @@ static int tls_hw_prot(struct sock *sk)
                        ctx->hash = sk->sk_prot->hash;
                        ctx->unhash = sk->sk_prot->unhash;
                        ctx->sk_proto_close = sk->sk_prot->close;
-                       ctx->conf = TLS_HW_RECORD;
+                       ctx->rx_conf = TLS_HW_RECORD;
+                       ctx->tx_conf = TLS_HW_RECORD;
                        update_sk_prot(sk, ctx);
                        rc = 1;
                        break;
@@ -579,29 +601,40 @@ static int tls_hw_hash(struct sock *sk)
        return err;
 }
 
-static void build_protos(struct proto *prot, struct proto *base)
+static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG],
+                        struct proto *base)
 {
-       prot[TLS_BASE] = *base;
-       prot[TLS_BASE].setsockopt       = tls_setsockopt;
-       prot[TLS_BASE].getsockopt       = tls_getsockopt;
-       prot[TLS_BASE].close            = tls_sk_proto_close;
-
-       prot[TLS_SW_TX] = prot[TLS_BASE];
-       prot[TLS_SW_TX].sendmsg         = tls_sw_sendmsg;
-       prot[TLS_SW_TX].sendpage        = tls_sw_sendpage;
-
-       prot[TLS_SW_RX] = prot[TLS_BASE];
-       prot[TLS_SW_RX].recvmsg         = tls_sw_recvmsg;
-       prot[TLS_SW_RX].close           = tls_sk_proto_close;
-
-       prot[TLS_SW_RXTX] = prot[TLS_SW_TX];
-       prot[TLS_SW_RXTX].recvmsg       = tls_sw_recvmsg;
-       prot[TLS_SW_RXTX].close         = tls_sk_proto_close;
-
-       prot[TLS_HW_RECORD] = *base;
-       prot[TLS_HW_RECORD].hash        = tls_hw_hash;
-       prot[TLS_HW_RECORD].unhash      = tls_hw_unhash;
-       prot[TLS_HW_RECORD].close       = tls_sk_proto_close;
+       prot[TLS_BASE][TLS_BASE] = *base;
+       prot[TLS_BASE][TLS_BASE].setsockopt     = tls_setsockopt;
+       prot[TLS_BASE][TLS_BASE].getsockopt     = tls_getsockopt;
+       prot[TLS_BASE][TLS_BASE].close          = tls_sk_proto_close;
+
+       prot[TLS_SW][TLS_BASE] = prot[TLS_BASE][TLS_BASE];
+       prot[TLS_SW][TLS_BASE].sendmsg          = tls_sw_sendmsg;
+       prot[TLS_SW][TLS_BASE].sendpage         = tls_sw_sendpage;
+
+       prot[TLS_BASE][TLS_SW] = prot[TLS_BASE][TLS_BASE];
+       prot[TLS_BASE][TLS_SW].recvmsg          = tls_sw_recvmsg;
+       prot[TLS_BASE][TLS_SW].close            = tls_sk_proto_close;
+
+       prot[TLS_SW][TLS_SW] = prot[TLS_SW][TLS_BASE];
+       prot[TLS_SW][TLS_SW].recvmsg    = tls_sw_recvmsg;
+       prot[TLS_SW][TLS_SW].close      = tls_sk_proto_close;
+
+#ifdef CONFIG_TLS_DEVICE
+       prot[TLS_HW][TLS_BASE] = prot[TLS_BASE][TLS_BASE];
+       prot[TLS_HW][TLS_BASE].sendmsg          = tls_device_sendmsg;
+       prot[TLS_HW][TLS_BASE].sendpage         = tls_device_sendpage;
+
+       prot[TLS_HW][TLS_SW] = prot[TLS_BASE][TLS_SW];
+       prot[TLS_HW][TLS_SW].sendmsg            = tls_device_sendmsg;
+       prot[TLS_HW][TLS_SW].sendpage           = tls_device_sendpage;
+#endif
+
+       prot[TLS_HW_RECORD][TLS_HW_RECORD] = *base;
+       prot[TLS_HW_RECORD][TLS_HW_RECORD].hash         = tls_hw_hash;
+       prot[TLS_HW_RECORD][TLS_HW_RECORD].unhash       = tls_hw_unhash;
+       prot[TLS_HW_RECORD][TLS_HW_RECORD].close        = tls_sk_proto_close;
 }
 
 static int tls_init(struct sock *sk)
@@ -632,7 +665,7 @@ static int tls_init(struct sock *sk)
        ctx->getsockopt = sk->sk_prot->getsockopt;
        ctx->sk_proto_close = sk->sk_prot->close;
 
-       /* Build IPv6 TLS whenever the address of tcpv6_prot changes */
+       /* Build IPv6 TLS whenever the address of tcpv6 _prot changes */
        if (ip_ver == TLSV6 &&
            unlikely(sk->sk_prot != smp_load_acquire(&saved_tcpv6_prot))) {
                mutex_lock(&tcpv6_prot_mutex);
@@ -643,7 +676,8 @@ static int tls_init(struct sock *sk)
                mutex_unlock(&tcpv6_prot_mutex);
        }
 
-       ctx->conf = TLS_BASE;
+       ctx->tx_conf = TLS_BASE;
+       ctx->rx_conf = TLS_BASE;
        update_sk_prot(sk, ctx);
 out:
        return rc;
@@ -681,6 +715,9 @@ static int __init tls_register(void)
        tls_sw_proto_ops.poll = tls_sw_poll;
        tls_sw_proto_ops.splice_read = tls_sw_splice_read;
 
+#ifdef CONFIG_TLS_DEVICE
+       tls_device_init();
+#endif
        tcp_register_ulp(&tcp_tls_ulp_ops);
 
        return 0;
@@ -689,6 +726,9 @@ static int __init tls_register(void)
 static void __exit tls_unregister(void)
 {
        tcp_unregister_ulp(&tcp_tls_ulp_ops);
+#ifdef CONFIG_TLS_DEVICE
+       tls_device_cleanup();
+#endif
 }
 
 module_init(tls_register);
index 71e79597f940a20b7eb49d35e5bcccf5d2c60963..839e1e165a0c619fecafe2afacc62728774d271b 100644 (file)
@@ -52,7 +52,7 @@ static int tls_do_decryption(struct sock *sk,
                             gfp_t flags)
 {
        struct tls_context *tls_ctx = tls_get_ctx(sk);
-       struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+       struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
        struct strp_msg *rxm = strp_msg(skb);
        struct aead_request *aead_req;
 
@@ -122,7 +122,7 @@ static void trim_sg(struct sock *sk, struct scatterlist *sg,
 static void trim_both_sgl(struct sock *sk, int target_size)
 {
        struct tls_context *tls_ctx = tls_get_ctx(sk);
-       struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+       struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
 
        trim_sg(sk, ctx->sg_plaintext_data,
                &ctx->sg_plaintext_num_elem,
@@ -141,7 +141,7 @@ static void trim_both_sgl(struct sock *sk, int target_size)
 static int alloc_encrypted_sg(struct sock *sk, int len)
 {
        struct tls_context *tls_ctx = tls_get_ctx(sk);
-       struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+       struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
        int rc = 0;
 
        rc = sk_alloc_sg(sk, len,
@@ -155,7 +155,7 @@ static int alloc_encrypted_sg(struct sock *sk, int len)
 static int alloc_plaintext_sg(struct sock *sk, int len)
 {
        struct tls_context *tls_ctx = tls_get_ctx(sk);
-       struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+       struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
        int rc = 0;
 
        rc = sk_alloc_sg(sk, len, ctx->sg_plaintext_data, 0,
@@ -181,7 +181,7 @@ static void free_sg(struct sock *sk, struct scatterlist *sg,
 static void tls_free_both_sg(struct sock *sk)
 {
        struct tls_context *tls_ctx = tls_get_ctx(sk);
-       struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+       struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
 
        free_sg(sk, ctx->sg_encrypted_data, &ctx->sg_encrypted_num_elem,
                &ctx->sg_encrypted_size);
@@ -191,7 +191,7 @@ static void tls_free_both_sg(struct sock *sk)
 }
 
 static int tls_do_encryption(struct tls_context *tls_ctx,
-                            struct tls_sw_context *ctx, size_t data_len,
+                            struct tls_sw_context_tx *ctx, size_t data_len,
                             gfp_t flags)
 {
        unsigned int req_size = sizeof(struct aead_request) +
@@ -227,7 +227,7 @@ static int tls_push_record(struct sock *sk, int flags,
                           unsigned char record_type)
 {
        struct tls_context *tls_ctx = tls_get_ctx(sk);
-       struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+       struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
        int rc;
 
        sg_mark_end(ctx->sg_plaintext_data + ctx->sg_plaintext_num_elem - 1);
@@ -339,7 +339,7 @@ static int memcopy_from_iter(struct sock *sk, struct iov_iter *from,
                             int bytes)
 {
        struct tls_context *tls_ctx = tls_get_ctx(sk);
-       struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+       struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
        struct scatterlist *sg = ctx->sg_plaintext_data;
        int copy, i, rc = 0;
 
@@ -367,7 +367,7 @@ static int memcopy_from_iter(struct sock *sk, struct iov_iter *from,
 int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
 {
        struct tls_context *tls_ctx = tls_get_ctx(sk);
-       struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+       struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
        int ret = 0;
        int required_size;
        long timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
@@ -522,7 +522,7 @@ int tls_sw_sendpage(struct sock *sk, struct page *page,
                    int offset, size_t size, int flags)
 {
        struct tls_context *tls_ctx = tls_get_ctx(sk);
-       struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+       struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
        int ret = 0;
        long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
        bool eor;
@@ -636,7 +636,7 @@ static struct sk_buff *tls_wait_data(struct sock *sk, int flags,
                                     long timeo, int *err)
 {
        struct tls_context *tls_ctx = tls_get_ctx(sk);
-       struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+       struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
        struct sk_buff *skb;
        DEFINE_WAIT_FUNC(wait, woken_wake_function);
 
@@ -674,13 +674,12 @@ static int decrypt_skb(struct sock *sk, struct sk_buff *skb,
                       struct scatterlist *sgout)
 {
        struct tls_context *tls_ctx = tls_get_ctx(sk);
-       struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+       struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
        char iv[TLS_CIPHER_AES_GCM_128_SALT_SIZE + MAX_IV_SIZE];
        struct scatterlist sgin_arr[MAX_SKB_FRAGS + 2];
        struct scatterlist *sgin = &sgin_arr[0];
        struct strp_msg *rxm = strp_msg(skb);
        int ret, nsg = ARRAY_SIZE(sgin_arr);
-       char aad_recv[TLS_AAD_SPACE_SIZE];
        struct sk_buff *unused;
 
        ret = skb_copy_bits(skb, rxm->offset + TLS_HEADER_SIZE,
@@ -693,18 +692,17 @@ static int decrypt_skb(struct sock *sk, struct sk_buff *skb,
        if (!sgout) {
                nsg = skb_cow_data(skb, 0, &unused) + 1;
                sgin = kmalloc_array(nsg, sizeof(*sgin), sk->sk_allocation);
-               if (!sgout)
-                       sgout = sgin;
+               sgout = sgin;
        }
 
        sg_init_table(sgin, nsg);
-       sg_set_buf(&sgin[0], aad_recv, sizeof(aad_recv));
+       sg_set_buf(&sgin[0], ctx->rx_aad_ciphertext, TLS_AAD_SPACE_SIZE);
 
        nsg = skb_to_sgvec(skb, &sgin[1],
                           rxm->offset + tls_ctx->rx.prepend_size,
                           rxm->full_len - tls_ctx->rx.prepend_size);
 
-       tls_make_aad(aad_recv,
+       tls_make_aad(ctx->rx_aad_ciphertext,
                     rxm->full_len - tls_ctx->rx.overhead_size,
                     tls_ctx->rx.rec_seq,
                     tls_ctx->rx.rec_seq_size,
@@ -724,7 +722,7 @@ static bool tls_sw_advance_skb(struct sock *sk, struct sk_buff *skb,
                               unsigned int len)
 {
        struct tls_context *tls_ctx = tls_get_ctx(sk);
-       struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+       struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
        struct strp_msg *rxm = strp_msg(skb);
 
        if (len < rxm->full_len) {
@@ -750,7 +748,7 @@ int tls_sw_recvmsg(struct sock *sk,
                   int *addr_len)
 {
        struct tls_context *tls_ctx = tls_get_ctx(sk);
-       struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+       struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
        unsigned char control;
        struct strp_msg *rxm;
        struct sk_buff *skb;
@@ -803,12 +801,12 @@ int tls_sw_recvmsg(struct sock *sk,
                        if (to_copy <= len && page_count < MAX_SKB_FRAGS &&
                            likely(!(flags & MSG_PEEK)))  {
                                struct scatterlist sgin[MAX_SKB_FRAGS + 1];
-                               char unused[21];
                                int pages = 0;
 
                                zc = true;
                                sg_init_table(sgin, MAX_SKB_FRAGS + 1);
-                               sg_set_buf(&sgin[0], unused, 13);
+                               sg_set_buf(&sgin[0], ctx->rx_aad_plaintext,
+                                          TLS_AAD_SPACE_SIZE);
 
                                err = zerocopy_from_iter(sk, &msg->msg_iter,
                                                         to_copy, &pages,
@@ -870,7 +868,7 @@ ssize_t tls_sw_splice_read(struct socket *sock,  loff_t *ppos,
                           size_t len, unsigned int flags)
 {
        struct tls_context *tls_ctx = tls_get_ctx(sock->sk);
-       struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+       struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
        struct strp_msg *rxm = NULL;
        struct sock *sk = sock->sk;
        struct sk_buff *skb;
@@ -923,7 +921,7 @@ unsigned int tls_sw_poll(struct file *file, struct socket *sock,
        unsigned int ret;
        struct sock *sk = sock->sk;
        struct tls_context *tls_ctx = tls_get_ctx(sk);
-       struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+       struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
 
        /* Grab POLLOUT and POLLHUP from the underlying socket */
        ret = ctx->sk_poll(file, sock, wait);
@@ -939,7 +937,7 @@ unsigned int tls_sw_poll(struct file *file, struct socket *sock,
 static int tls_read_size(struct strparser *strp, struct sk_buff *skb)
 {
        struct tls_context *tls_ctx = tls_get_ctx(strp->sk);
-       struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+       struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
        char header[tls_ctx->rx.prepend_size];
        struct strp_msg *rxm = strp_msg(skb);
        size_t cipher_overhead;
@@ -988,7 +986,7 @@ static int tls_read_size(struct strparser *strp, struct sk_buff *skb)
 static void tls_queue(struct strparser *strp, struct sk_buff *skb)
 {
        struct tls_context *tls_ctx = tls_get_ctx(strp->sk);
-       struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+       struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
        struct strp_msg *rxm;
 
        rxm = strp_msg(skb);
@@ -1004,18 +1002,28 @@ static void tls_queue(struct strparser *strp, struct sk_buff *skb)
 static void tls_data_ready(struct sock *sk)
 {
        struct tls_context *tls_ctx = tls_get_ctx(sk);
-       struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+       struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
 
        strp_data_ready(&ctx->strp);
 }
 
-void tls_sw_free_resources(struct sock *sk)
+void tls_sw_free_resources_tx(struct sock *sk)
 {
        struct tls_context *tls_ctx = tls_get_ctx(sk);
-       struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+       struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
 
        if (ctx->aead_send)
                crypto_free_aead(ctx->aead_send);
+       tls_free_both_sg(sk);
+
+       kfree(ctx);
+}
+
+void tls_sw_free_resources_rx(struct sock *sk)
+{
+       struct tls_context *tls_ctx = tls_get_ctx(sk);
+       struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
+
        if (ctx->aead_recv) {
                if (ctx->recv_pkt) {
                        kfree_skb(ctx->recv_pkt);
@@ -1031,10 +1039,7 @@ void tls_sw_free_resources(struct sock *sk)
                lock_sock(sk);
        }
 
-       tls_free_both_sg(sk);
-
        kfree(ctx);
-       kfree(tls_ctx);
 }
 
 int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
@@ -1042,7 +1047,8 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
        char keyval[TLS_CIPHER_AES_GCM_128_KEY_SIZE];
        struct tls_crypto_info *crypto_info;
        struct tls12_crypto_info_aes_gcm_128 *gcm_128_info;
-       struct tls_sw_context *sw_ctx;
+       struct tls_sw_context_tx *sw_ctx_tx = NULL;
+       struct tls_sw_context_rx *sw_ctx_rx = NULL;
        struct cipher_context *cctx;
        struct crypto_aead **aead;
        struct strp_callbacks cb;
@@ -1055,27 +1061,32 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
                goto out;
        }
 
-       if (!ctx->priv_ctx) {
-               sw_ctx = kzalloc(sizeof(*sw_ctx), GFP_KERNEL);
-               if (!sw_ctx) {
+       if (tx) {
+               sw_ctx_tx = kzalloc(sizeof(*sw_ctx_tx), GFP_KERNEL);
+               if (!sw_ctx_tx) {
                        rc = -ENOMEM;
                        goto out;
                }
-               crypto_init_wait(&sw_ctx->async_wait);
+               crypto_init_wait(&sw_ctx_tx->async_wait);
+               ctx->priv_ctx_tx = sw_ctx_tx;
        } else {
-               sw_ctx = ctx->priv_ctx;
+               sw_ctx_rx = kzalloc(sizeof(*sw_ctx_rx), GFP_KERNEL);
+               if (!sw_ctx_rx) {
+                       rc = -ENOMEM;
+                       goto out;
+               }
+               crypto_init_wait(&sw_ctx_rx->async_wait);
+               ctx->priv_ctx_rx = sw_ctx_rx;
        }
 
-       ctx->priv_ctx = (struct tls_offload_context *)sw_ctx;
-
        if (tx) {
                crypto_info = &ctx->crypto_send;
                cctx = &ctx->tx;
-               aead = &sw_ctx->aead_send;
+               aead = &sw_ctx_tx->aead_send;
        } else {
                crypto_info = &ctx->crypto_recv;
                cctx = &ctx->rx;
-               aead = &sw_ctx->aead_recv;
+               aead = &sw_ctx_rx->aead_recv;
        }
 
        switch (crypto_info->cipher_type) {
@@ -1122,22 +1133,24 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
        }
        memcpy(cctx->rec_seq, rec_seq, rec_seq_size);
 
-       if (tx) {
-               sg_init_table(sw_ctx->sg_encrypted_data,
-                             ARRAY_SIZE(sw_ctx->sg_encrypted_data));
-               sg_init_table(sw_ctx->sg_plaintext_data,
-                             ARRAY_SIZE(sw_ctx->sg_plaintext_data));
-
-               sg_init_table(sw_ctx->sg_aead_in, 2);
-               sg_set_buf(&sw_ctx->sg_aead_in[0], sw_ctx->aad_space,
-                          sizeof(sw_ctx->aad_space));
-               sg_unmark_end(&sw_ctx->sg_aead_in[1]);
-               sg_chain(sw_ctx->sg_aead_in, 2, sw_ctx->sg_plaintext_data);
-               sg_init_table(sw_ctx->sg_aead_out, 2);
-               sg_set_buf(&sw_ctx->sg_aead_out[0], sw_ctx->aad_space,
-                          sizeof(sw_ctx->aad_space));
-               sg_unmark_end(&sw_ctx->sg_aead_out[1]);
-               sg_chain(sw_ctx->sg_aead_out, 2, sw_ctx->sg_encrypted_data);
+       if (sw_ctx_tx) {
+               sg_init_table(sw_ctx_tx->sg_encrypted_data,
+                             ARRAY_SIZE(sw_ctx_tx->sg_encrypted_data));
+               sg_init_table(sw_ctx_tx->sg_plaintext_data,
+                             ARRAY_SIZE(sw_ctx_tx->sg_plaintext_data));
+
+               sg_init_table(sw_ctx_tx->sg_aead_in, 2);
+               sg_set_buf(&sw_ctx_tx->sg_aead_in[0], sw_ctx_tx->aad_space,
+                          sizeof(sw_ctx_tx->aad_space));
+               sg_unmark_end(&sw_ctx_tx->sg_aead_in[1]);
+               sg_chain(sw_ctx_tx->sg_aead_in, 2,
+                        sw_ctx_tx->sg_plaintext_data);
+               sg_init_table(sw_ctx_tx->sg_aead_out, 2);
+               sg_set_buf(&sw_ctx_tx->sg_aead_out[0], sw_ctx_tx->aad_space,
+                          sizeof(sw_ctx_tx->aad_space));
+               sg_unmark_end(&sw_ctx_tx->sg_aead_out[1]);
+               sg_chain(sw_ctx_tx->sg_aead_out, 2,
+                        sw_ctx_tx->sg_encrypted_data);
        }
 
        if (!*aead) {
@@ -1162,22 +1175,22 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
        if (rc)
                goto free_aead;
 
-       if (!tx) {
+       if (sw_ctx_rx) {
                /* Set up strparser */
                memset(&cb, 0, sizeof(cb));
                cb.rcv_msg = tls_queue;
                cb.parse_msg = tls_read_size;
 
-               strp_init(&sw_ctx->strp, sk, &cb);
+               strp_init(&sw_ctx_rx->strp, sk, &cb);
 
                write_lock_bh(&sk->sk_callback_lock);
-               sw_ctx->saved_data_ready = sk->sk_data_ready;
+               sw_ctx_rx->saved_data_ready = sk->sk_data_ready;
                sk->sk_data_ready = tls_data_ready;
                write_unlock_bh(&sk->sk_callback_lock);
 
-               sw_ctx->sk_poll = sk->sk_socket->ops->poll;
+               sw_ctx_rx->sk_poll = sk->sk_socket->ops->poll;
 
-               strp_check_rcv(&sw_ctx->strp);
+               strp_check_rcv(&sw_ctx_rx->strp);
        }
 
        goto out;
@@ -1189,11 +1202,16 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
        kfree(cctx->rec_seq);
        cctx->rec_seq = NULL;
 free_iv:
-       kfree(ctx->tx.iv);
-       ctx->tx.iv = NULL;
+       kfree(cctx->iv);
+       cctx->iv = NULL;
 free_priv:
-       kfree(ctx->priv_ctx);
-       ctx->priv_ctx = NULL;
+       if (tx) {
+               kfree(ctx->priv_ctx_tx);
+               ctx->priv_ctx_tx = NULL;
+       } else {
+               kfree(ctx->priv_ctx_rx);
+               ctx->priv_ctx_rx = NULL;
+       }
 out:
        return rc;
 }
index a6f3cac8c640e4cdb0eb4fb9d3c77bf3fd352576..c0fd8a85e7f72e59fa8e93be41a4dee1ee0a5104 100644 (file)
@@ -95,6 +95,9 @@ static int cfg80211_dev_check_name(struct cfg80211_registered_device *rdev,
 
        ASSERT_RTNL();
 
+       if (strlen(newname) > NL80211_WIPHY_NAME_MAXLEN)
+               return -EINVAL;
+
        /* prohibit calling the thing phy%d when %d is not its number */
        sscanf(newname, PHY_NAME "%d%n", &wiphy_idx, &taken);
        if (taken == strlen(newname) && wiphy_idx != rdev->wiphy_idx) {
index ff28f8feeb09270296bab5b086e928cc3a76bea0..a052693c2e852b856f96d304cb97b6ddd0ff5319 100644 (file)
@@ -9214,6 +9214,7 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
 
        if (nla_get_flag(info->attrs[NL80211_ATTR_EXTERNAL_AUTH_SUPPORT])) {
                if (!info->attrs[NL80211_ATTR_SOCKET_OWNER]) {
+                       kzfree(connkeys);
                        GENL_SET_ERR_MSG(info,
                                         "external auth requires connection ownership");
                        return -EINVAL;
index 16c7e4ef58207cc781b80ef9bb3242ead81001a0..ac3e12c32aa30053a110d6a46f5bb813d660c8b6 100644 (file)
@@ -1026,6 +1026,7 @@ static int regdb_query_country(const struct fwdb_header *db,
 
                        if (!tmp_rd) {
                                kfree(regdom);
+                               kfree(wmm_ptrs);
                                return -ENOMEM;
                        }
                        regdom = tmp_rd;
diff --git a/net/xdp/Kconfig b/net/xdp/Kconfig
new file mode 100644 (file)
index 0000000..90e4a71
--- /dev/null
@@ -0,0 +1,7 @@
+config XDP_SOCKETS
+       bool "XDP sockets"
+       depends on BPF_SYSCALL
+       default n
+       help
+         XDP sockets allows a channel between XDP programs and
+         userspace applications.
diff --git a/net/xdp/Makefile b/net/xdp/Makefile
new file mode 100644 (file)
index 0000000..074fb2b
--- /dev/null
@@ -0,0 +1,2 @@
+obj-$(CONFIG_XDP_SOCKETS) += xsk.o xdp_umem.o xsk_queue.o
+
diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
new file mode 100644 (file)
index 0000000..2b47a1d
--- /dev/null
@@ -0,0 +1,260 @@
+// SPDX-License-Identifier: GPL-2.0
+/* XDP user-space packet buffer
+ * Copyright(c) 2018 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/init.h>
+#include <linux/sched/mm.h>
+#include <linux/sched/signal.h>
+#include <linux/sched/task.h>
+#include <linux/uaccess.h>
+#include <linux/slab.h>
+#include <linux/bpf.h>
+#include <linux/mm.h>
+
+#include "xdp_umem.h"
+
+#define XDP_UMEM_MIN_FRAME_SIZE 2048
+
+int xdp_umem_create(struct xdp_umem **umem)
+{
+       *umem = kzalloc(sizeof(**umem), GFP_KERNEL);
+
+       if (!(*umem))
+               return -ENOMEM;
+
+       return 0;
+}
+
+static void xdp_umem_unpin_pages(struct xdp_umem *umem)
+{
+       unsigned int i;
+
+       if (umem->pgs) {
+               for (i = 0; i < umem->npgs; i++) {
+                       struct page *page = umem->pgs[i];
+
+                       set_page_dirty_lock(page);
+                       put_page(page);
+               }
+
+               kfree(umem->pgs);
+               umem->pgs = NULL;
+       }
+}
+
+static void xdp_umem_unaccount_pages(struct xdp_umem *umem)
+{
+       if (umem->user) {
+               atomic_long_sub(umem->npgs, &umem->user->locked_vm);
+               free_uid(umem->user);
+       }
+}
+
+static void xdp_umem_release(struct xdp_umem *umem)
+{
+       struct task_struct *task;
+       struct mm_struct *mm;
+
+       if (umem->fq) {
+               xskq_destroy(umem->fq);
+               umem->fq = NULL;
+       }
+
+       if (umem->cq) {
+               xskq_destroy(umem->cq);
+               umem->cq = NULL;
+       }
+
+       if (umem->pgs) {
+               xdp_umem_unpin_pages(umem);
+
+               task = get_pid_task(umem->pid, PIDTYPE_PID);
+               put_pid(umem->pid);
+               if (!task)
+                       goto out;
+               mm = get_task_mm(task);
+               put_task_struct(task);
+               if (!mm)
+                       goto out;
+
+               mmput(mm);
+               umem->pgs = NULL;
+       }
+
+       xdp_umem_unaccount_pages(umem);
+out:
+       kfree(umem);
+}
+
+static void xdp_umem_release_deferred(struct work_struct *work)
+{
+       struct xdp_umem *umem = container_of(work, struct xdp_umem, work);
+
+       xdp_umem_release(umem);
+}
+
+void xdp_get_umem(struct xdp_umem *umem)
+{
+       atomic_inc(&umem->users);
+}
+
+void xdp_put_umem(struct xdp_umem *umem)
+{
+       if (!umem)
+               return;
+
+       if (atomic_dec_and_test(&umem->users)) {
+               INIT_WORK(&umem->work, xdp_umem_release_deferred);
+               schedule_work(&umem->work);
+       }
+}
+
+static int xdp_umem_pin_pages(struct xdp_umem *umem)
+{
+       unsigned int gup_flags = FOLL_WRITE;
+       long npgs;
+       int err;
+
+       umem->pgs = kcalloc(umem->npgs, sizeof(*umem->pgs), GFP_KERNEL);
+       if (!umem->pgs)
+               return -ENOMEM;
+
+       down_write(&current->mm->mmap_sem);
+       npgs = get_user_pages(umem->address, umem->npgs,
+                             gup_flags, &umem->pgs[0], NULL);
+       up_write(&current->mm->mmap_sem);
+
+       if (npgs != umem->npgs) {
+               if (npgs >= 0) {
+                       umem->npgs = npgs;
+                       err = -ENOMEM;
+                       goto out_pin;
+               }
+               err = npgs;
+               goto out_pgs;
+       }
+       return 0;
+
+out_pin:
+       xdp_umem_unpin_pages(umem);
+out_pgs:
+       kfree(umem->pgs);
+       umem->pgs = NULL;
+       return err;
+}
+
+static int xdp_umem_account_pages(struct xdp_umem *umem)
+{
+       unsigned long lock_limit, new_npgs, old_npgs;
+
+       if (capable(CAP_IPC_LOCK))
+               return 0;
+
+       lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+       umem->user = get_uid(current_user());
+
+       do {
+               old_npgs = atomic_long_read(&umem->user->locked_vm);
+               new_npgs = old_npgs + umem->npgs;
+               if (new_npgs > lock_limit) {
+                       free_uid(umem->user);
+                       umem->user = NULL;
+                       return -ENOBUFS;
+               }
+       } while (atomic_long_cmpxchg(&umem->user->locked_vm, old_npgs,
+                                    new_npgs) != old_npgs);
+       return 0;
+}
+
+int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
+{
+       u32 frame_size = mr->frame_size, frame_headroom = mr->frame_headroom;
+       u64 addr = mr->addr, size = mr->len;
+       unsigned int nframes, nfpp;
+       int size_chk, err;
+
+       if (!umem)
+               return -EINVAL;
+
+       if (frame_size < XDP_UMEM_MIN_FRAME_SIZE || frame_size > PAGE_SIZE) {
+               /* Strictly speaking we could support this, if:
+                * - huge pages, or*
+                * - using an IOMMU, or
+                * - making sure the memory area is consecutive
+                * but for now, we simply say "computer says no".
+                */
+               return -EINVAL;
+       }
+
+       if (!is_power_of_2(frame_size))
+               return -EINVAL;
+
+       if (!PAGE_ALIGNED(addr)) {
+               /* Memory area has to be page size aligned. For
+                * simplicity, this might change.
+                */
+               return -EINVAL;
+       }
+
+       if ((addr + size) < addr)
+               return -EINVAL;
+
+       nframes = (unsigned int)div_u64(size, frame_size);
+       if (nframes == 0 || nframes > UINT_MAX)
+               return -EINVAL;
+
+       nfpp = PAGE_SIZE / frame_size;
+       if (nframes < nfpp || nframes % nfpp)
+               return -EINVAL;
+
+       frame_headroom = ALIGN(frame_headroom, 64);
+
+       size_chk = frame_size - frame_headroom - XDP_PACKET_HEADROOM;
+       if (size_chk < 0)
+               return -EINVAL;
+
+       umem->pid = get_task_pid(current, PIDTYPE_PID);
+       umem->size = (size_t)size;
+       umem->address = (unsigned long)addr;
+       umem->props.frame_size = frame_size;
+       umem->props.nframes = nframes;
+       umem->frame_headroom = frame_headroom;
+       umem->npgs = size / PAGE_SIZE;
+       umem->pgs = NULL;
+       umem->user = NULL;
+
+       umem->frame_size_log2 = ilog2(frame_size);
+       umem->nfpp_mask = nfpp - 1;
+       umem->nfpplog2 = ilog2(nfpp);
+       atomic_set(&umem->users, 1);
+
+       err = xdp_umem_account_pages(umem);
+       if (err)
+               goto out;
+
+       err = xdp_umem_pin_pages(umem);
+       if (err)
+               goto out_account;
+       return 0;
+
+out_account:
+       xdp_umem_unaccount_pages(umem);
+out:
+       put_pid(umem->pid);
+       return err;
+}
+
+bool xdp_umem_validate_queues(struct xdp_umem *umem)
+{
+       return (umem->fq && umem->cq);
+}
diff --git a/net/xdp/xdp_umem.h b/net/xdp/xdp_umem.h
new file mode 100644 (file)
index 0000000..7e0b2fa
--- /dev/null
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * XDP user-space packet buffer
+ * Copyright(c) 2018 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef XDP_UMEM_H_
+#define XDP_UMEM_H_
+
+#include <linux/mm.h>
+#include <linux/if_xdp.h>
+#include <linux/workqueue.h>
+
+#include "xsk_queue.h"
+#include "xdp_umem_props.h"
+
+struct xdp_umem {
+       struct xsk_queue *fq;
+       struct xsk_queue *cq;
+       struct page **pgs;
+       struct xdp_umem_props props;
+       u32 npgs;
+       u32 frame_headroom;
+       u32 nfpp_mask;
+       u32 nfpplog2;
+       u32 frame_size_log2;
+       struct user_struct *user;
+       struct pid *pid;
+       unsigned long address;
+       size_t size;
+       atomic_t users;
+       struct work_struct work;
+};
+
+static inline char *xdp_umem_get_data(struct xdp_umem *umem, u32 idx)
+{
+       u64 pg, off;
+       char *data;
+
+       pg = idx >> umem->nfpplog2;
+       off = (idx & umem->nfpp_mask) << umem->frame_size_log2;
+
+       data = page_address(umem->pgs[pg]);
+       return data + off;
+}
+
+static inline char *xdp_umem_get_data_with_headroom(struct xdp_umem *umem,
+                                                   u32 idx)
+{
+       return xdp_umem_get_data(umem, idx) + umem->frame_headroom;
+}
+
+bool xdp_umem_validate_queues(struct xdp_umem *umem);
+int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr);
+void xdp_get_umem(struct xdp_umem *umem);
+void xdp_put_umem(struct xdp_umem *umem);
+int xdp_umem_create(struct xdp_umem **umem);
+
+#endif /* XDP_UMEM_H_ */
diff --git a/net/xdp/xdp_umem_props.h b/net/xdp/xdp_umem_props.h
new file mode 100644 (file)
index 0000000..77fb5da
--- /dev/null
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * XDP user-space packet buffer
+ * Copyright(c) 2018 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef XDP_UMEM_PROPS_H_
+#define XDP_UMEM_PROPS_H_
+
+struct xdp_umem_props {
+       u32 frame_size;
+       u32 nframes;
+};
+
+#endif /* XDP_UMEM_PROPS_H_ */
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
new file mode 100644 (file)
index 0000000..009c5af
--- /dev/null
@@ -0,0 +1,656 @@
+// SPDX-License-Identifier: GPL-2.0
+/* XDP sockets
+ *
+ * AF_XDP sockets allows a channel between XDP programs and userspace
+ * applications.
+ * Copyright(c) 2018 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * Author(s): Björn Töpel <bjorn.topel@intel.com>
+ *           Magnus Karlsson <magnus.karlsson@intel.com>
+ */
+
+#define pr_fmt(fmt) "AF_XDP: %s: " fmt, __func__
+
+#include <linux/if_xdp.h>
+#include <linux/init.h>
+#include <linux/sched/mm.h>
+#include <linux/sched/signal.h>
+#include <linux/sched/task.h>
+#include <linux/socket.h>
+#include <linux/file.h>
+#include <linux/uaccess.h>
+#include <linux/net.h>
+#include <linux/netdevice.h>
+#include <net/xdp_sock.h>
+#include <net/xdp.h>
+
+#include "xsk_queue.h"
+#include "xdp_umem.h"
+
+#define TX_BATCH_SIZE 16
+
+static struct xdp_sock *xdp_sk(struct sock *sk)
+{
+       return (struct xdp_sock *)sk;
+}
+
+bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs)
+{
+       return !!xs->rx;
+}
+
+static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
+{
+       u32 *id, len = xdp->data_end - xdp->data;
+       void *buffer;
+       int err = 0;
+
+       if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index)
+               return -EINVAL;
+
+       id = xskq_peek_id(xs->umem->fq);
+       if (!id)
+               return -ENOSPC;
+
+       buffer = xdp_umem_get_data_with_headroom(xs->umem, *id);
+       memcpy(buffer, xdp->data, len);
+       err = xskq_produce_batch_desc(xs->rx, *id, len,
+                                     xs->umem->frame_headroom);
+       if (!err)
+               xskq_discard_id(xs->umem->fq);
+
+       return err;
+}
+
+int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
+{
+       int err;
+
+       err = __xsk_rcv(xs, xdp);
+       if (likely(!err))
+               xdp_return_buff(xdp);
+       else
+               xs->rx_dropped++;
+
+       return err;
+}
+
+void xsk_flush(struct xdp_sock *xs)
+{
+       xskq_produce_flush_desc(xs->rx);
+       xs->sk.sk_data_ready(&xs->sk);
+}
+
+int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
+{
+       int err;
+
+       err = __xsk_rcv(xs, xdp);
+       if (!err)
+               xsk_flush(xs);
+       else
+               xs->rx_dropped++;
+
+       return err;
+}
+
+static void xsk_destruct_skb(struct sk_buff *skb)
+{
+       u32 id = (u32)(long)skb_shinfo(skb)->destructor_arg;
+       struct xdp_sock *xs = xdp_sk(skb->sk);
+
+       WARN_ON_ONCE(xskq_produce_id(xs->umem->cq, id));
+
+       sock_wfree(skb);
+}
+
+static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
+                           size_t total_len)
+{
+       bool need_wait = !(m->msg_flags & MSG_DONTWAIT);
+       u32 max_batch = TX_BATCH_SIZE;
+       struct xdp_sock *xs = xdp_sk(sk);
+       bool sent_frame = false;
+       struct xdp_desc desc;
+       struct sk_buff *skb;
+       int err = 0;
+
+       if (unlikely(!xs->tx))
+               return -ENOBUFS;
+       if (need_wait)
+               return -EOPNOTSUPP;
+
+       mutex_lock(&xs->mutex);
+
+       while (xskq_peek_desc(xs->tx, &desc)) {
+               char *buffer;
+               u32 id, len;
+
+               if (max_batch-- == 0) {
+                       err = -EAGAIN;
+                       goto out;
+               }
+
+               if (xskq_reserve_id(xs->umem->cq)) {
+                       err = -EAGAIN;
+                       goto out;
+               }
+
+               len = desc.len;
+               if (unlikely(len > xs->dev->mtu)) {
+                       err = -EMSGSIZE;
+                       goto out;
+               }
+
+               skb = sock_alloc_send_skb(sk, len, !need_wait, &err);
+               if (unlikely(!skb)) {
+                       err = -EAGAIN;
+                       goto out;
+               }
+
+               skb_put(skb, len);
+               id = desc.idx;
+               buffer = xdp_umem_get_data(xs->umem, id) + desc.offset;
+               err = skb_store_bits(skb, 0, buffer, len);
+               if (unlikely(err)) {
+                       kfree_skb(skb);
+                       goto out;
+               }
+
+               skb->dev = xs->dev;
+               skb->priority = sk->sk_priority;
+               skb->mark = sk->sk_mark;
+               skb_shinfo(skb)->destructor_arg = (void *)(long)id;
+               skb->destructor = xsk_destruct_skb;
+
+               err = dev_direct_xmit(skb, xs->queue_id);
+               /* Ignore NET_XMIT_CN as packet might have been sent */
+               if (err == NET_XMIT_DROP || err == NETDEV_TX_BUSY) {
+                       err = -EAGAIN;
+                       /* SKB consumed by dev_direct_xmit() */
+                       goto out;
+               }
+
+               sent_frame = true;
+               xskq_discard_desc(xs->tx);
+       }
+
+out:
+       if (sent_frame)
+               sk->sk_write_space(sk);
+
+       mutex_unlock(&xs->mutex);
+       return err;
+}
+
+static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
+{
+       struct sock *sk = sock->sk;
+       struct xdp_sock *xs = xdp_sk(sk);
+
+       if (unlikely(!xs->dev))
+               return -ENXIO;
+       if (unlikely(!(xs->dev->flags & IFF_UP)))
+               return -ENETDOWN;
+
+       return xsk_generic_xmit(sk, m, total_len);
+}
+
+static unsigned int xsk_poll(struct file *file, struct socket *sock,
+                            struct poll_table_struct *wait)
+{
+       unsigned int mask = datagram_poll(file, sock, wait);
+       struct sock *sk = sock->sk;
+       struct xdp_sock *xs = xdp_sk(sk);
+
+       if (xs->rx && !xskq_empty_desc(xs->rx))
+               mask |= POLLIN | POLLRDNORM;
+       if (xs->tx && !xskq_full_desc(xs->tx))
+               mask |= POLLOUT | POLLWRNORM;
+
+       return mask;
+}
+
+static int xsk_init_queue(u32 entries, struct xsk_queue **queue,
+                         bool umem_queue)
+{
+       struct xsk_queue *q;
+
+       if (entries == 0 || *queue || !is_power_of_2(entries))
+               return -EINVAL;
+
+       q = xskq_create(entries, umem_queue);
+       if (!q)
+               return -ENOMEM;
+
+       *queue = q;
+       return 0;
+}
+
+static void __xsk_release(struct xdp_sock *xs)
+{
+       /* Wait for driver to stop using the xdp socket. */
+       synchronize_net();
+
+       dev_put(xs->dev);
+}
+
+static int xsk_release(struct socket *sock)
+{
+       struct sock *sk = sock->sk;
+       struct xdp_sock *xs = xdp_sk(sk);
+       struct net *net;
+
+       if (!sk)
+               return 0;
+
+       net = sock_net(sk);
+
+       local_bh_disable();
+       sock_prot_inuse_add(net, sk->sk_prot, -1);
+       local_bh_enable();
+
+       if (xs->dev) {
+               __xsk_release(xs);
+               xs->dev = NULL;
+       }
+
+       sock_orphan(sk);
+       sock->sk = NULL;
+
+       sk_refcnt_debug_release(sk);
+       sock_put(sk);
+
+       return 0;
+}
+
+static struct socket *xsk_lookup_xsk_from_fd(int fd)
+{
+       struct socket *sock;
+       int err;
+
+       sock = sockfd_lookup(fd, &err);
+       if (!sock)
+               return ERR_PTR(-ENOTSOCK);
+
+       if (sock->sk->sk_family != PF_XDP) {
+               sockfd_put(sock);
+               return ERR_PTR(-ENOPROTOOPT);
+       }
+
+       return sock;
+}
+
+static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
+{
+       struct sockaddr_xdp *sxdp = (struct sockaddr_xdp *)addr;
+       struct sock *sk = sock->sk;
+       struct net_device *dev, *dev_curr;
+       struct xdp_sock *xs = xdp_sk(sk);
+       struct xdp_umem *old_umem = NULL;
+       int err = 0;
+
+       if (addr_len < sizeof(struct sockaddr_xdp))
+               return -EINVAL;
+       if (sxdp->sxdp_family != AF_XDP)
+               return -EINVAL;
+
+       mutex_lock(&xs->mutex);
+       dev_curr = xs->dev;
+       dev = dev_get_by_index(sock_net(sk), sxdp->sxdp_ifindex);
+       if (!dev) {
+               err = -ENODEV;
+               goto out_release;
+       }
+
+       if (!xs->rx && !xs->tx) {
+               err = -EINVAL;
+               goto out_unlock;
+       }
+
+       if (sxdp->sxdp_queue_id >= dev->num_rx_queues) {
+               err = -EINVAL;
+               goto out_unlock;
+       }
+
+       if (sxdp->sxdp_flags & XDP_SHARED_UMEM) {
+               struct xdp_sock *umem_xs;
+               struct socket *sock;
+
+               if (xs->umem) {
+                       /* We have already our own. */
+                       err = -EINVAL;
+                       goto out_unlock;
+               }
+
+               sock = xsk_lookup_xsk_from_fd(sxdp->sxdp_shared_umem_fd);
+               if (IS_ERR(sock)) {
+                       err = PTR_ERR(sock);
+                       goto out_unlock;
+               }
+
+               umem_xs = xdp_sk(sock->sk);
+               if (!umem_xs->umem) {
+                       /* No umem to inherit. */
+                       err = -EBADF;
+                       sockfd_put(sock);
+                       goto out_unlock;
+               } else if (umem_xs->dev != dev ||
+                          umem_xs->queue_id != sxdp->sxdp_queue_id) {
+                       err = -EINVAL;
+                       sockfd_put(sock);
+                       goto out_unlock;
+               }
+
+               xdp_get_umem(umem_xs->umem);
+               old_umem = xs->umem;
+               xs->umem = umem_xs->umem;
+               sockfd_put(sock);
+       } else if (!xs->umem || !xdp_umem_validate_queues(xs->umem)) {
+               err = -EINVAL;
+               goto out_unlock;
+       } else {
+               /* This xsk has its own umem. */
+               xskq_set_umem(xs->umem->fq, &xs->umem->props);
+               xskq_set_umem(xs->umem->cq, &xs->umem->props);
+       }
+
+       /* Rebind? */
+       if (dev_curr && (dev_curr != dev ||
+                        xs->queue_id != sxdp->sxdp_queue_id)) {
+               __xsk_release(xs);
+               if (old_umem)
+                       xdp_put_umem(old_umem);
+       }
+
+       xs->dev = dev;
+       xs->queue_id = sxdp->sxdp_queue_id;
+
+       xskq_set_umem(xs->rx, &xs->umem->props);
+       xskq_set_umem(xs->tx, &xs->umem->props);
+
+out_unlock:
+       if (err)
+               dev_put(dev);
+out_release:
+       mutex_unlock(&xs->mutex);
+       return err;
+}
+
+static int xsk_setsockopt(struct socket *sock, int level, int optname,
+                         char __user *optval, unsigned int optlen)
+{
+       struct sock *sk = sock->sk;
+       struct xdp_sock *xs = xdp_sk(sk);
+       int err;
+
+       if (level != SOL_XDP)
+               return -ENOPROTOOPT;
+
+       switch (optname) {
+       case XDP_RX_RING:
+       case XDP_TX_RING:
+       {
+               struct xsk_queue **q;
+               int entries;
+
+               if (optlen < sizeof(entries))
+                       return -EINVAL;
+               if (copy_from_user(&entries, optval, sizeof(entries)))
+                       return -EFAULT;
+
+               mutex_lock(&xs->mutex);
+               q = (optname == XDP_TX_RING) ? &xs->tx : &xs->rx;
+               err = xsk_init_queue(entries, q, false);
+               mutex_unlock(&xs->mutex);
+               return err;
+       }
+       case XDP_UMEM_REG:
+       {
+               struct xdp_umem_reg mr;
+               struct xdp_umem *umem;
+
+               if (xs->umem)
+                       return -EBUSY;
+
+               if (copy_from_user(&mr, optval, sizeof(mr)))
+                       return -EFAULT;
+
+               mutex_lock(&xs->mutex);
+               err = xdp_umem_create(&umem);
+
+               err = xdp_umem_reg(umem, &mr);
+               if (err) {
+                       kfree(umem);
+                       mutex_unlock(&xs->mutex);
+                       return err;
+               }
+
+               /* Make sure umem is ready before it can be seen by others */
+               smp_wmb();
+
+               xs->umem = umem;
+               mutex_unlock(&xs->mutex);
+               return 0;
+       }
+       case XDP_UMEM_FILL_RING:
+       case XDP_UMEM_COMPLETION_RING:
+       {
+               struct xsk_queue **q;
+               int entries;
+
+               if (!xs->umem)
+                       return -EINVAL;
+
+               if (copy_from_user(&entries, optval, sizeof(entries)))
+                       return -EFAULT;
+
+               mutex_lock(&xs->mutex);
+               q = (optname == XDP_UMEM_FILL_RING) ? &xs->umem->fq :
+                       &xs->umem->cq;
+               err = xsk_init_queue(entries, q, true);
+               mutex_unlock(&xs->mutex);
+               return err;
+       }
+       default:
+               break;
+       }
+
+       return -ENOPROTOOPT;
+}
+
+static int xsk_getsockopt(struct socket *sock, int level, int optname,
+                         char __user *optval, int __user *optlen)
+{
+       struct sock *sk = sock->sk;
+       struct xdp_sock *xs = xdp_sk(sk);
+       int len;
+
+       if (level != SOL_XDP)
+               return -ENOPROTOOPT;
+
+       if (get_user(len, optlen))
+               return -EFAULT;
+       if (len < 0)
+               return -EINVAL;
+
+       switch (optname) {
+       case XDP_STATISTICS:
+       {
+               struct xdp_statistics stats;
+
+               if (len < sizeof(stats))
+                       return -EINVAL;
+
+               mutex_lock(&xs->mutex);
+               stats.rx_dropped = xs->rx_dropped;
+               stats.rx_invalid_descs = xskq_nb_invalid_descs(xs->rx);
+               stats.tx_invalid_descs = xskq_nb_invalid_descs(xs->tx);
+               mutex_unlock(&xs->mutex);
+
+               if (copy_to_user(optval, &stats, sizeof(stats)))
+                       return -EFAULT;
+               if (put_user(sizeof(stats), optlen))
+                       return -EFAULT;
+
+               return 0;
+       }
+       default:
+               break;
+       }
+
+       return -EOPNOTSUPP;
+}
+
+static int xsk_mmap(struct file *file, struct socket *sock,
+                   struct vm_area_struct *vma)
+{
+       unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
+       unsigned long size = vma->vm_end - vma->vm_start;
+       struct xdp_sock *xs = xdp_sk(sock->sk);
+       struct xsk_queue *q = NULL;
+       unsigned long pfn;
+       struct page *qpg;
+
+       if (offset == XDP_PGOFF_RX_RING) {
+               q = xs->rx;
+       } else if (offset == XDP_PGOFF_TX_RING) {
+               q = xs->tx;
+       } else {
+               if (!xs->umem)
+                       return -EINVAL;
+
+               if (offset == XDP_UMEM_PGOFF_FILL_RING)
+                       q = xs->umem->fq;
+               else if (offset == XDP_UMEM_PGOFF_COMPLETION_RING)
+                       q = xs->umem->cq;
+       }
+
+       if (!q)
+               return -EINVAL;
+
+       qpg = virt_to_head_page(q->ring);
+       if (size > (PAGE_SIZE << compound_order(qpg)))
+               return -EINVAL;
+
+       pfn = virt_to_phys(q->ring) >> PAGE_SHIFT;
+       return remap_pfn_range(vma, vma->vm_start, pfn,
+                              size, vma->vm_page_prot);
+}
+
+static struct proto xsk_proto = {
+       .name =         "XDP",
+       .owner =        THIS_MODULE,
+       .obj_size =     sizeof(struct xdp_sock),
+};
+
+static const struct proto_ops xsk_proto_ops = {
+       .family =       PF_XDP,
+       .owner =        THIS_MODULE,
+       .release =      xsk_release,
+       .bind =         xsk_bind,
+       .connect =      sock_no_connect,
+       .socketpair =   sock_no_socketpair,
+       .accept =       sock_no_accept,
+       .getname =      sock_no_getname,
+       .poll =         xsk_poll,
+       .ioctl =        sock_no_ioctl,
+       .listen =       sock_no_listen,
+       .shutdown =     sock_no_shutdown,
+       .setsockopt =   xsk_setsockopt,
+       .getsockopt =   xsk_getsockopt,
+       .sendmsg =      xsk_sendmsg,
+       .recvmsg =      sock_no_recvmsg,
+       .mmap =         xsk_mmap,
+       .sendpage =     sock_no_sendpage,
+};
+
+static void xsk_destruct(struct sock *sk)
+{
+       struct xdp_sock *xs = xdp_sk(sk);
+
+       if (!sock_flag(sk, SOCK_DEAD))
+               return;
+
+       xskq_destroy(xs->rx);
+       xskq_destroy(xs->tx);
+       xdp_put_umem(xs->umem);
+
+       sk_refcnt_debug_dec(sk);
+}
+
+static int xsk_create(struct net *net, struct socket *sock, int protocol,
+                     int kern)
+{
+       struct sock *sk;
+       struct xdp_sock *xs;
+
+       if (!ns_capable(net->user_ns, CAP_NET_RAW))
+               return -EPERM;
+       if (sock->type != SOCK_RAW)
+               return -ESOCKTNOSUPPORT;
+
+       if (protocol)
+               return -EPROTONOSUPPORT;
+
+       sock->state = SS_UNCONNECTED;
+
+       sk = sk_alloc(net, PF_XDP, GFP_KERNEL, &xsk_proto, kern);
+       if (!sk)
+               return -ENOBUFS;
+
+       sock->ops = &xsk_proto_ops;
+
+       sock_init_data(sock, sk);
+
+       sk->sk_family = PF_XDP;
+
+       sk->sk_destruct = xsk_destruct;
+       sk_refcnt_debug_inc(sk);
+
+       xs = xdp_sk(sk);
+       mutex_init(&xs->mutex);
+
+       local_bh_disable();
+       sock_prot_inuse_add(net, &xsk_proto, 1);
+       local_bh_enable();
+
+       return 0;
+}
+
+static const struct net_proto_family xsk_family_ops = {
+       .family = PF_XDP,
+       .create = xsk_create,
+       .owner  = THIS_MODULE,
+};
+
+static int __init xsk_init(void)
+{
+       int err;
+
+       err = proto_register(&xsk_proto, 0 /* no slab */);
+       if (err)
+               goto out;
+
+       err = sock_register(&xsk_family_ops);
+       if (err)
+               goto out_proto;
+
+       return 0;
+
+out_proto:
+       proto_unregister(&xsk_proto);
+out:
+       return err;
+}
+
+fs_initcall(xsk_init);
diff --git a/net/xdp/xsk_queue.c b/net/xdp/xsk_queue.c
new file mode 100644 (file)
index 0000000..d012e5e
--- /dev/null
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0
+/* XDP user-space ring structure
+ * Copyright(c) 2018 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/slab.h>
+
+#include "xsk_queue.h"
+
+void xskq_set_umem(struct xsk_queue *q, struct xdp_umem_props *umem_props)
+{
+       if (!q)
+               return;
+
+       q->umem_props = *umem_props;
+}
+
+static u32 xskq_umem_get_ring_size(struct xsk_queue *q)
+{
+       return sizeof(struct xdp_umem_ring) + q->nentries * sizeof(u32);
+}
+
+static u32 xskq_rxtx_get_ring_size(struct xsk_queue *q)
+{
+       return (sizeof(struct xdp_ring) +
+               q->nentries * sizeof(struct xdp_desc));
+}
+
+struct xsk_queue *xskq_create(u32 nentries, bool umem_queue)
+{
+       struct xsk_queue *q;
+       gfp_t gfp_flags;
+       size_t size;
+
+       q = kzalloc(sizeof(*q), GFP_KERNEL);
+       if (!q)
+               return NULL;
+
+       q->nentries = nentries;
+       q->ring_mask = nentries - 1;
+
+       gfp_flags = GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN |
+                   __GFP_COMP  | __GFP_NORETRY;
+       size = umem_queue ? xskq_umem_get_ring_size(q) :
+              xskq_rxtx_get_ring_size(q);
+
+       q->ring = (struct xdp_ring *)__get_free_pages(gfp_flags,
+                                                     get_order(size));
+       if (!q->ring) {
+               kfree(q);
+               return NULL;
+       }
+
+       return q;
+}
+
+void xskq_destroy(struct xsk_queue *q)
+{
+       if (!q)
+               return;
+
+       page_frag_free(q->ring);
+       kfree(q);
+}
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
new file mode 100644 (file)
index 0000000..7aa9a53
--- /dev/null
@@ -0,0 +1,247 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * XDP user-space ring structure
+ * Copyright(c) 2018 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _LINUX_XSK_QUEUE_H
+#define _LINUX_XSK_QUEUE_H
+
+#include <linux/types.h>
+#include <linux/if_xdp.h>
+
+#include "xdp_umem_props.h"
+
+#define RX_BATCH_SIZE 16
+
+struct xsk_queue {
+       struct xdp_umem_props umem_props;
+       u32 ring_mask;
+       u32 nentries;
+       u32 prod_head;
+       u32 prod_tail;
+       u32 cons_head;
+       u32 cons_tail;
+       struct xdp_ring *ring;
+       u64 invalid_descs;
+};
+
+/* Common functions operating for both RXTX and umem queues */
+
+static inline u64 xskq_nb_invalid_descs(struct xsk_queue *q)
+{
+       return q ? q->invalid_descs : 0;
+}
+
+static inline u32 xskq_nb_avail(struct xsk_queue *q, u32 dcnt)
+{
+       u32 entries = q->prod_tail - q->cons_tail;
+
+       if (entries == 0) {
+               /* Refresh the local pointer */
+               q->prod_tail = READ_ONCE(q->ring->producer);
+               entries = q->prod_tail - q->cons_tail;
+       }
+
+       return (entries > dcnt) ? dcnt : entries;
+}
+
+static inline u32 xskq_nb_free(struct xsk_queue *q, u32 producer, u32 dcnt)
+{
+       u32 free_entries = q->nentries - (producer - q->cons_tail);
+
+       if (free_entries >= dcnt)
+               return free_entries;
+
+       /* Refresh the local tail pointer */
+       q->cons_tail = READ_ONCE(q->ring->consumer);
+       return q->nentries - (producer - q->cons_tail);
+}
+
+/* UMEM queue */
+
+static inline bool xskq_is_valid_id(struct xsk_queue *q, u32 idx)
+{
+       if (unlikely(idx >= q->umem_props.nframes)) {
+               q->invalid_descs++;
+               return false;
+       }
+       return true;
+}
+
+static inline u32 *xskq_validate_id(struct xsk_queue *q)
+{
+       while (q->cons_tail != q->cons_head) {
+               struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring;
+               unsigned int idx = q->cons_tail & q->ring_mask;
+
+               if (xskq_is_valid_id(q, ring->desc[idx]))
+                       return &ring->desc[idx];
+
+               q->cons_tail++;
+       }
+
+       return NULL;
+}
+
+static inline u32 *xskq_peek_id(struct xsk_queue *q)
+{
+       struct xdp_umem_ring *ring;
+
+       if (q->cons_tail == q->cons_head) {
+               WRITE_ONCE(q->ring->consumer, q->cons_tail);
+               q->cons_head = q->cons_tail + xskq_nb_avail(q, RX_BATCH_SIZE);
+
+               /* Order consumer and data */
+               smp_rmb();
+
+               return xskq_validate_id(q);
+       }
+
+       ring = (struct xdp_umem_ring *)q->ring;
+       return &ring->desc[q->cons_tail & q->ring_mask];
+}
+
+static inline void xskq_discard_id(struct xsk_queue *q)
+{
+       q->cons_tail++;
+       (void)xskq_validate_id(q);
+}
+
+static inline int xskq_produce_id(struct xsk_queue *q, u32 id)
+{
+       struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring;
+
+       ring->desc[q->prod_tail++ & q->ring_mask] = id;
+
+       /* Order producer and data */
+       smp_wmb();
+
+       WRITE_ONCE(q->ring->producer, q->prod_tail);
+       return 0;
+}
+
+static inline int xskq_reserve_id(struct xsk_queue *q)
+{
+       if (xskq_nb_free(q, q->prod_head, 1) == 0)
+               return -ENOSPC;
+
+       q->prod_head++;
+       return 0;
+}
+
+/* Rx/Tx queue */
+
+static inline bool xskq_is_valid_desc(struct xsk_queue *q, struct xdp_desc *d)
+{
+       u32 buff_len;
+
+       if (unlikely(d->idx >= q->umem_props.nframes)) {
+               q->invalid_descs++;
+               return false;
+       }
+
+       buff_len = q->umem_props.frame_size;
+       if (unlikely(d->len > buff_len || d->len == 0 ||
+                    d->offset > buff_len || d->offset + d->len > buff_len)) {
+               q->invalid_descs++;
+               return false;
+       }
+
+       return true;
+}
+
+static inline struct xdp_desc *xskq_validate_desc(struct xsk_queue *q,
+                                                 struct xdp_desc *desc)
+{
+       while (q->cons_tail != q->cons_head) {
+               struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring;
+               unsigned int idx = q->cons_tail & q->ring_mask;
+
+               if (xskq_is_valid_desc(q, &ring->desc[idx])) {
+                       if (desc)
+                               *desc = ring->desc[idx];
+                       return desc;
+               }
+
+               q->cons_tail++;
+       }
+
+       return NULL;
+}
+
+static inline struct xdp_desc *xskq_peek_desc(struct xsk_queue *q,
+                                             struct xdp_desc *desc)
+{
+       struct xdp_rxtx_ring *ring;
+
+       if (q->cons_tail == q->cons_head) {
+               WRITE_ONCE(q->ring->consumer, q->cons_tail);
+               q->cons_head = q->cons_tail + xskq_nb_avail(q, RX_BATCH_SIZE);
+
+               /* Order consumer and data */
+               smp_rmb();
+
+               return xskq_validate_desc(q, desc);
+       }
+
+       ring = (struct xdp_rxtx_ring *)q->ring;
+       *desc = ring->desc[q->cons_tail & q->ring_mask];
+       return desc;
+}
+
+static inline void xskq_discard_desc(struct xsk_queue *q)
+{
+       q->cons_tail++;
+       (void)xskq_validate_desc(q, NULL);
+}
+
+static inline int xskq_produce_batch_desc(struct xsk_queue *q,
+                                         u32 id, u32 len, u16 offset)
+{
+       struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring;
+       unsigned int idx;
+
+       if (xskq_nb_free(q, q->prod_head, 1) == 0)
+               return -ENOSPC;
+
+       idx = (q->prod_head++) & q->ring_mask;
+       ring->desc[idx].idx = id;
+       ring->desc[idx].len = len;
+       ring->desc[idx].offset = offset;
+
+       return 0;
+}
+
+static inline void xskq_produce_flush_desc(struct xsk_queue *q)
+{
+       /* Order producer and data */
+       smp_wmb();
+
+       q->prod_tail = q->prod_head,
+       WRITE_ONCE(q->ring->producer, q->prod_tail);
+}
+
+static inline bool xskq_full_desc(struct xsk_queue *q)
+{
+       return (xskq_nb_avail(q, q->nentries) == q->nentries);
+}
+
+static inline bool xskq_empty_desc(struct xsk_queue *q)
+{
+       return (xskq_nb_free(q, q->prod_tail, 1) == q->nentries);
+}
+
+void xskq_set_umem(struct xsk_queue *q, struct xdp_umem_props *umem_props);
+struct xsk_queue *xskq_create(u32 nentries, bool umem_queue);
+void xskq_destroy(struct xsk_queue *q_ops);
+
+#endif /* _LINUX_XSK_QUEUE_H */
index f9d2f2233f09531697b35209fe86754d23971e3f..8308281f32530bd9103e7f7bd06472824600c526 100644 (file)
@@ -42,6 +42,7 @@ static void xfrm_state_gc_task(struct work_struct *work);
 
 static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
 static __read_mostly seqcount_t xfrm_state_hash_generation = SEQCNT_ZERO(xfrm_state_hash_generation);
+static struct kmem_cache *xfrm_state_cache __ro_after_init;
 
 static DECLARE_WORK(xfrm_state_gc_work, xfrm_state_gc_task);
 static HLIST_HEAD(xfrm_state_gc_list);
@@ -451,7 +452,7 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x)
        }
        xfrm_dev_state_free(x);
        security_xfrm_state_free(x);
-       kfree(x);
+       kmem_cache_free(xfrm_state_cache, x);
 }
 
 static void xfrm_state_gc_task(struct work_struct *work)
@@ -563,7 +564,7 @@ struct xfrm_state *xfrm_state_alloc(struct net *net)
 {
        struct xfrm_state *x;
 
-       x = kzalloc(sizeof(struct xfrm_state), GFP_ATOMIC);
+       x = kmem_cache_alloc(xfrm_state_cache, GFP_ATOMIC | __GFP_ZERO);
 
        if (x) {
                write_pnet(&x->xs_net, net);
@@ -2175,6 +2176,12 @@ struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family)
        return afinfo;
 }
 
+void xfrm_flush_gc(void)
+{
+       flush_work(&xfrm_state_gc_work);
+}
+EXPORT_SYMBOL(xfrm_flush_gc);
+
 /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
 void xfrm_state_delete_tunnel(struct xfrm_state *x)
 {
@@ -2307,6 +2314,10 @@ int __net_init xfrm_state_init(struct net *net)
 {
        unsigned int sz;
 
+       if (net_eq(net, &init_net))
+               xfrm_state_cache = KMEM_CACHE(xfrm_state,
+                                             SLAB_HWCACHE_ALIGN | SLAB_PANIC);
+
        INIT_LIST_HEAD(&net->xfrm.state_all);
 
        sz = sizeof(struct hlist_head) * 8;
index aa8c392e2e52271017ad9056aaf340932b613c6c..62a99ab680e3f87e9e92fde3c8ce119fcb8ac885 100644 (file)
@@ -1,4 +1,8 @@
 # SPDX-License-Identifier: GPL-2.0
+
+BPF_SAMPLES_PATH ?= $(abspath $(srctree)/$(src))
+TOOLS_PATH := $(BPF_SAMPLES_PATH)/../../tools
+
 # List of programs to build
 hostprogs-y := test_lru_dist
 hostprogs-y += sock_example
@@ -45,58 +49,62 @@ hostprogs-y += xdp_rxq_info
 hostprogs-y += syscall_tp
 hostprogs-y += cpustat
 hostprogs-y += xdp_adjust_tail
+hostprogs-y += xdpsock
+hostprogs-y += xdp_fwd
 
 # Libbpf dependencies
-LIBBPF := ../../tools/lib/bpf/bpf.o ../../tools/lib/bpf/nlattr.o
-CGROUP_HELPERS := ../../tools/testing/selftests/bpf/cgroup_helpers.o
+LIBBPF = $(TOOLS_PATH)/lib/bpf/libbpf.a
 
-test_lru_dist-objs := test_lru_dist.o $(LIBBPF)
-sock_example-objs := sock_example.o $(LIBBPF)
-fds_example-objs := bpf_load.o $(LIBBPF) fds_example.o
-sockex1-objs := bpf_load.o $(LIBBPF) sockex1_user.o
-sockex2-objs := bpf_load.o $(LIBBPF) sockex2_user.o
-sockex3-objs := bpf_load.o $(LIBBPF) sockex3_user.o
-tracex1-objs := bpf_load.o $(LIBBPF) tracex1_user.o
-tracex2-objs := bpf_load.o $(LIBBPF) tracex2_user.o
-tracex3-objs := bpf_load.o $(LIBBPF) tracex3_user.o
-tracex4-objs := bpf_load.o $(LIBBPF) tracex4_user.o
-tracex5-objs := bpf_load.o $(LIBBPF) tracex5_user.o
-tracex6-objs := bpf_load.o $(LIBBPF) tracex6_user.o
-tracex7-objs := bpf_load.o $(LIBBPF) tracex7_user.o
-load_sock_ops-objs := bpf_load.o $(LIBBPF) load_sock_ops.o
-test_probe_write_user-objs := bpf_load.o $(LIBBPF) test_probe_write_user_user.o
-trace_output-objs := bpf_load.o $(LIBBPF) trace_output_user.o
-lathist-objs := bpf_load.o $(LIBBPF) lathist_user.o
-offwaketime-objs := bpf_load.o $(LIBBPF) offwaketime_user.o
-spintest-objs := bpf_load.o $(LIBBPF) spintest_user.o
-map_perf_test-objs := bpf_load.o $(LIBBPF) map_perf_test_user.o
-test_overhead-objs := bpf_load.o $(LIBBPF) test_overhead_user.o
-test_cgrp2_array_pin-objs := $(LIBBPF) test_cgrp2_array_pin.o
-test_cgrp2_attach-objs := $(LIBBPF) test_cgrp2_attach.o
-test_cgrp2_attach2-objs := $(LIBBPF) test_cgrp2_attach2.o $(CGROUP_HELPERS)
-test_cgrp2_sock-objs := $(LIBBPF) test_cgrp2_sock.o
-test_cgrp2_sock2-objs := bpf_load.o $(LIBBPF) test_cgrp2_sock2.o
-xdp1-objs := bpf_load.o $(LIBBPF) xdp1_user.o
+CGROUP_HELPERS := ../../tools/testing/selftests/bpf/cgroup_helpers.o
+TRACE_HELPERS := ../../tools/testing/selftests/bpf/trace_helpers.o
+
+fds_example-objs := bpf_load.o fds_example.o
+sockex1-objs := bpf_load.o sockex1_user.o
+sockex2-objs := bpf_load.o sockex2_user.o
+sockex3-objs := bpf_load.o sockex3_user.o
+tracex1-objs := bpf_load.o tracex1_user.o
+tracex2-objs := bpf_load.o tracex2_user.o
+tracex3-objs := bpf_load.o tracex3_user.o
+tracex4-objs := bpf_load.o tracex4_user.o
+tracex5-objs := bpf_load.o tracex5_user.o
+tracex6-objs := bpf_load.o tracex6_user.o
+tracex7-objs := bpf_load.o tracex7_user.o
+load_sock_ops-objs := bpf_load.o load_sock_ops.o
+test_probe_write_user-objs := bpf_load.o test_probe_write_user_user.o
+trace_output-objs := bpf_load.o trace_output_user.o $(TRACE_HELPERS)
+lathist-objs := bpf_load.o lathist_user.o
+offwaketime-objs := bpf_load.o offwaketime_user.o $(TRACE_HELPERS)
+spintest-objs := bpf_load.o spintest_user.o $(TRACE_HELPERS)
+map_perf_test-objs := bpf_load.o map_perf_test_user.o
+test_overhead-objs := bpf_load.o test_overhead_user.o
+test_cgrp2_array_pin-objs := test_cgrp2_array_pin.o
+test_cgrp2_attach-objs := test_cgrp2_attach.o
+test_cgrp2_attach2-objs := test_cgrp2_attach2.o $(CGROUP_HELPERS)
+test_cgrp2_sock-objs := test_cgrp2_sock.o
+test_cgrp2_sock2-objs := bpf_load.o test_cgrp2_sock2.o
+xdp1-objs := xdp1_user.o
 # reuse xdp1 source intentionally
-xdp2-objs := bpf_load.o $(LIBBPF) xdp1_user.o
-xdp_router_ipv4-objs := bpf_load.o $(LIBBPF) xdp_router_ipv4_user.o
-test_current_task_under_cgroup-objs := bpf_load.o $(LIBBPF) $(CGROUP_HELPERS) \
+xdp2-objs := xdp1_user.o
+xdp_router_ipv4-objs := bpf_load.o xdp_router_ipv4_user.o
+test_current_task_under_cgroup-objs := bpf_load.o $(CGROUP_HELPERS) \
                                       test_current_task_under_cgroup_user.o
-trace_event-objs := bpf_load.o $(LIBBPF) trace_event_user.o
-sampleip-objs := bpf_load.o $(LIBBPF) sampleip_user.o
-tc_l2_redirect-objs := bpf_load.o $(LIBBPF) tc_l2_redirect_user.o
-lwt_len_hist-objs := bpf_load.o $(LIBBPF) lwt_len_hist_user.o
-xdp_tx_iptunnel-objs := bpf_load.o $(LIBBPF) xdp_tx_iptunnel_user.o
-test_map_in_map-objs := bpf_load.o $(LIBBPF) test_map_in_map_user.o
-per_socket_stats_example-objs := $(LIBBPF) cookie_uid_helper_example.o
-xdp_redirect-objs := bpf_load.o $(LIBBPF) xdp_redirect_user.o
-xdp_redirect_map-objs := bpf_load.o $(LIBBPF) xdp_redirect_map_user.o
-xdp_redirect_cpu-objs := bpf_load.o $(LIBBPF) xdp_redirect_cpu_user.o
-xdp_monitor-objs := bpf_load.o $(LIBBPF) xdp_monitor_user.o
-xdp_rxq_info-objs := bpf_load.o $(LIBBPF) xdp_rxq_info_user.o
-syscall_tp-objs := bpf_load.o $(LIBBPF) syscall_tp_user.o
-cpustat-objs := bpf_load.o $(LIBBPF) cpustat_user.o
-xdp_adjust_tail-objs := bpf_load.o $(LIBBPF) xdp_adjust_tail_user.o
+trace_event-objs := bpf_load.o trace_event_user.o $(TRACE_HELPERS)
+sampleip-objs := bpf_load.o sampleip_user.o $(TRACE_HELPERS)
+tc_l2_redirect-objs := bpf_load.o tc_l2_redirect_user.o
+lwt_len_hist-objs := bpf_load.o lwt_len_hist_user.o
+xdp_tx_iptunnel-objs := bpf_load.o xdp_tx_iptunnel_user.o
+test_map_in_map-objs := bpf_load.o test_map_in_map_user.o
+per_socket_stats_example-objs := cookie_uid_helper_example.o
+xdp_redirect-objs := bpf_load.o xdp_redirect_user.o
+xdp_redirect_map-objs := bpf_load.o xdp_redirect_map_user.o
+xdp_redirect_cpu-objs := bpf_load.o xdp_redirect_cpu_user.o
+xdp_monitor-objs := bpf_load.o xdp_monitor_user.o
+xdp_rxq_info-objs := xdp_rxq_info_user.o
+syscall_tp-objs := bpf_load.o syscall_tp_user.o
+cpustat-objs := bpf_load.o cpustat_user.o
+xdp_adjust_tail-objs := xdp_adjust_tail_user.o
+xdpsock-objs := bpf_load.o xdpsock_user.o
+xdp_fwd-objs := bpf_load.o xdp_fwd_user.o
 
 # Tell kbuild to always build the programs
 always := $(hostprogs-y)
@@ -114,7 +122,6 @@ always += sock_flags_kern.o
 always += test_probe_write_user_kern.o
 always += trace_output_kern.o
 always += tcbpf1_kern.o
-always += tcbpf2_kern.o
 always += tc_l2_redirect_kern.o
 always += lathist_kern.o
 always += offwaketime_kern.o
@@ -151,6 +158,8 @@ always += xdp2skb_meta_kern.o
 always += syscall_tp_kern.o
 always += cpustat_kern.o
 always += xdp_adjust_tail_kern.o
+always += xdpsock_kern.o
+always += xdp_fwd_kern.o
 
 HOSTCFLAGS += -I$(objtree)/usr/include
 HOSTCFLAGS += -I$(srctree)/tools/lib/
@@ -159,44 +168,20 @@ HOSTCFLAGS += -I$(srctree)/tools/lib/ -I$(srctree)/tools/include
 HOSTCFLAGS += -I$(srctree)/tools/perf
 
 HOSTCFLAGS_bpf_load.o += -I$(objtree)/usr/include -Wno-unused-variable
-HOSTLOADLIBES_fds_example += -lelf
-HOSTLOADLIBES_sockex1 += -lelf
-HOSTLOADLIBES_sockex2 += -lelf
-HOSTLOADLIBES_sockex3 += -lelf
-HOSTLOADLIBES_tracex1 += -lelf
-HOSTLOADLIBES_tracex2 += -lelf
-HOSTLOADLIBES_tracex3 += -lelf
-HOSTLOADLIBES_tracex4 += -lelf -lrt
-HOSTLOADLIBES_tracex5 += -lelf
-HOSTLOADLIBES_tracex6 += -lelf
-HOSTLOADLIBES_tracex7 += -lelf
-HOSTLOADLIBES_test_cgrp2_sock2 += -lelf
-HOSTLOADLIBES_load_sock_ops += -lelf
-HOSTLOADLIBES_test_probe_write_user += -lelf
-HOSTLOADLIBES_trace_output += -lelf -lrt
-HOSTLOADLIBES_lathist += -lelf
-HOSTLOADLIBES_offwaketime += -lelf
-HOSTLOADLIBES_spintest += -lelf
-HOSTLOADLIBES_map_perf_test += -lelf -lrt
-HOSTLOADLIBES_test_overhead += -lelf -lrt
-HOSTLOADLIBES_xdp1 += -lelf
-HOSTLOADLIBES_xdp2 += -lelf
-HOSTLOADLIBES_xdp_router_ipv4 += -lelf
-HOSTLOADLIBES_test_current_task_under_cgroup += -lelf
-HOSTLOADLIBES_trace_event += -lelf
-HOSTLOADLIBES_sampleip += -lelf
-HOSTLOADLIBES_tc_l2_redirect += -l elf
-HOSTLOADLIBES_lwt_len_hist += -l elf
-HOSTLOADLIBES_xdp_tx_iptunnel += -lelf
-HOSTLOADLIBES_test_map_in_map += -lelf
-HOSTLOADLIBES_xdp_redirect += -lelf
-HOSTLOADLIBES_xdp_redirect_map += -lelf
-HOSTLOADLIBES_xdp_redirect_cpu += -lelf
-HOSTLOADLIBES_xdp_monitor += -lelf
-HOSTLOADLIBES_xdp_rxq_info += -lelf
-HOSTLOADLIBES_syscall_tp += -lelf
-HOSTLOADLIBES_cpustat += -lelf
-HOSTLOADLIBES_xdp_adjust_tail += -lelf
+HOSTCFLAGS_trace_helpers.o += -I$(srctree)/tools/lib/bpf/
+
+HOSTCFLAGS_trace_output_user.o += -I$(srctree)/tools/lib/bpf/
+HOSTCFLAGS_offwaketime_user.o += -I$(srctree)/tools/lib/bpf/
+HOSTCFLAGS_spintest_user.o += -I$(srctree)/tools/lib/bpf/
+HOSTCFLAGS_trace_event_user.o += -I$(srctree)/tools/lib/bpf/
+HOSTCFLAGS_sampleip_user.o += -I$(srctree)/tools/lib/bpf/
+
+HOST_LOADLIBES         += $(LIBBPF) -lelf
+HOSTLOADLIBES_tracex4          += -lrt
+HOSTLOADLIBES_trace_output     += -lrt
+HOSTLOADLIBES_map_perf_test    += -lrt
+HOSTLOADLIBES_test_overhead    += -lrt
+HOSTLOADLIBES_xdpsock          += -pthread
 
 # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
 #  make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
@@ -210,15 +195,16 @@ CLANG_ARCH_ARGS = -target $(ARCH)
 endif
 
 # Trick to allow make to be run from this directory
-all: $(LIBBPF)
-       $(MAKE) -C ../../ $(CURDIR)/
+all:
+       $(MAKE) -C ../../ $(CURDIR)/ BPF_SAMPLES_PATH=$(CURDIR)
 
 clean:
        $(MAKE) -C ../../ M=$(CURDIR) clean
        @rm -f *~
 
 $(LIBBPF): FORCE
-       $(MAKE) -C $(dir $@) $(notdir $@)
+# Fix up variables inherited from Kbuild that tools/ build system won't like
+       $(MAKE) -C $(dir $@) RM='rm -rf' LDFLAGS= srctree=$(BPF_SAMPLES_PATH)/../../ O=
 
 $(obj)/syscall_nrs.s:  $(src)/syscall_nrs.c
        $(call if_changed_dep,cc_s_c)
@@ -249,7 +235,8 @@ verify_target_bpf: verify_cmds
                exit 2; \
        else true; fi
 
-$(src)/*.c: verify_target_bpf
+$(BPF_SAMPLES_PATH)/*.c: verify_target_bpf $(LIBBPF)
+$(src)/*.c: verify_target_bpf $(LIBBPF)
 
 $(obj)/tracex5_kern.o: $(obj)/syscall_nrs.h
 
@@ -257,9 +244,10 @@ $(obj)/tracex5_kern.o: $(obj)/syscall_nrs.h
 # But, there is no easy way to fix it, so just exclude it since it is
 # useless for BPF samples.
 $(obj)/%.o: $(src)/%.c
-       $(CLANG) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) -I$(obj) \
+       @echo "  CLANG-bpf " $@
+       $(Q)$(CLANG) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) -I$(obj) \
                -I$(srctree)/tools/testing/selftests/bpf/ \
-               -D__KERNEL__ -Wno-unused-value -Wno-pointer-sign \
+               -D__KERNEL__ -D__BPF_TRACING__ -Wno-unused-value -Wno-pointer-sign \
                -D__TARGET_ARCH_$(ARCH) -Wno-compare-distinct-pointer-types \
                -Wno-gnu-variable-sized-type-not-at-end \
                -Wno-address-of-packed-member -Wno-tautological-compare \
similarity index 98%
rename from samples/bpf/libbpf.h
rename to samples/bpf/bpf_insn.h
index 18bfee5aab6bdb7665d405d08e6fea3b84982f49..20dc5cefec8448e117b839bb8cef12559574e006 100644 (file)
@@ -1,9 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* eBPF mini library */
-#ifndef __LIBBPF_H
-#define __LIBBPF_H
-
-#include <bpf/bpf.h>
+/* eBPF instruction mini library */
+#ifndef __BPF_INSN_H
+#define __BPF_INSN_H
 
 struct bpf_insn;
 
index bebe4188b4b36cbf42fe39f1f5c61c43ea1bf8a9..89161c9ed466b63bda3b25b275559bf0892d2fee 100644 (file)
@@ -24,7 +24,7 @@
 #include <poll.h>
 #include <ctype.h>
 #include <assert.h>
-#include "libbpf.h"
+#include <bpf/bpf.h>
 #include "bpf_load.h"
 #include "perf-sys.h"
 
@@ -145,6 +145,9 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
        }
 
        if (is_kprobe || is_kretprobe) {
+               bool need_normal_check = true;
+               const char *event_prefix = "";
+
                if (is_kprobe)
                        event += 7;
                else
@@ -158,18 +161,33 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
                if (isdigit(*event))
                        return populate_prog_array(event, fd);
 
-               snprintf(buf, sizeof(buf),
-                        "echo '%c:%s %s' >> /sys/kernel/debug/tracing/kprobe_events",
-                        is_kprobe ? 'p' : 'r', event, event);
-               err = system(buf);
-               if (err < 0) {
-                       printf("failed to create kprobe '%s' error '%s'\n",
-                              event, strerror(errno));
-                       return -1;
+#ifdef __x86_64__
+               if (strncmp(event, "sys_", 4) == 0) {
+                       snprintf(buf, sizeof(buf),
+                                "echo '%c:__x64_%s __x64_%s' >> /sys/kernel/debug/tracing/kprobe_events",
+                                is_kprobe ? 'p' : 'r', event, event);
+                       err = system(buf);
+                       if (err >= 0) {
+                               need_normal_check = false;
+                               event_prefix = "__x64_";
+                       }
+               }
+#endif
+               if (need_normal_check) {
+                       snprintf(buf, sizeof(buf),
+                                "echo '%c:%s %s' >> /sys/kernel/debug/tracing/kprobe_events",
+                                is_kprobe ? 'p' : 'r', event, event);
+                       err = system(buf);
+                       if (err < 0) {
+                               printf("failed to create kprobe '%s' error '%s'\n",
+                                      event, strerror(errno));
+                               return -1;
+                       }
                }
 
                strcpy(buf, DEBUGFS);
                strcat(buf, "events/kprobes/");
+               strcat(buf, event_prefix);
                strcat(buf, event);
                strcat(buf, "/id");
        } else if (is_tracepoint) {
@@ -402,7 +420,7 @@ static int load_elf_maps_section(struct bpf_map_data *maps, int maps_shndx,
 
        /* Keeping compatible with ELF maps section changes
         * ------------------------------------------------
-        * The program size of struct bpf_map_def is known by loader
+        * The program size of struct bpf_load_map_def is known by loader
         * code, but struct stored in ELF file can be different.
         *
         * Unfortunately sym[i].st_size is zero.  To calculate the
@@ -411,7 +429,7 @@ static int load_elf_maps_section(struct bpf_map_data *maps, int maps_shndx,
         * symbols.
         */
        map_sz_elf = data_maps->d_size / nr_maps;
-       map_sz_copy = sizeof(struct bpf_map_def);
+       map_sz_copy = sizeof(struct bpf_load_map_def);
        if (map_sz_elf < map_sz_copy) {
                /*
                 * Backward compat, loading older ELF file with
@@ -430,8 +448,8 @@ static int load_elf_maps_section(struct bpf_map_data *maps, int maps_shndx,
 
        /* Memcpy relevant part of ELF maps data to loader maps */
        for (i = 0; i < nr_maps; i++) {
+               struct bpf_load_map_def *def;
                unsigned char *addr, *end;
-               struct bpf_map_def *def;
                const char *map_name;
                size_t offset;
 
@@ -446,9 +464,9 @@ static int load_elf_maps_section(struct bpf_map_data *maps, int maps_shndx,
 
                /* Symbol value is offset into ELF maps section data area */
                offset = sym[i].st_value;
-               def = (struct bpf_map_def *)(data_maps->d_buf + offset);
+               def = (struct bpf_load_map_def *)(data_maps->d_buf + offset);
                maps[i].elf_offset = offset;
-               memset(&maps[i].def, 0, sizeof(struct bpf_map_def));
+               memset(&maps[i].def, 0, sizeof(struct bpf_load_map_def));
                memcpy(&maps[i].def, def, map_sz_copy);
 
                /* Verify no newer features were requested */
@@ -549,7 +567,6 @@ static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map)
                if (nr_maps < 0) {
                        printf("Error: Failed loading ELF maps (errno:%d):%s\n",
                               nr_maps, strerror(-nr_maps));
-                       ret = 1;
                        goto done;
                }
                if (load_maps(map_data, nr_maps, fixup_map))
@@ -615,7 +632,6 @@ static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map)
                }
        }
 
-       ret = 0;
 done:
        close(fd);
        return ret;
@@ -650,66 +666,3 @@ void read_trace_pipe(void)
                }
        }
 }
-
-#define MAX_SYMS 300000
-static struct ksym syms[MAX_SYMS];
-static int sym_cnt;
-
-static int ksym_cmp(const void *p1, const void *p2)
-{
-       return ((struct ksym *)p1)->addr - ((struct ksym *)p2)->addr;
-}
-
-int load_kallsyms(void)
-{
-       FILE *f = fopen("/proc/kallsyms", "r");
-       char func[256], buf[256];
-       char symbol;
-       void *addr;
-       int i = 0;
-
-       if (!f)
-               return -ENOENT;
-
-       while (!feof(f)) {
-               if (!fgets(buf, sizeof(buf), f))
-                       break;
-               if (sscanf(buf, "%p %c %s", &addr, &symbol, func) != 3)
-                       break;
-               if (!addr)
-                       continue;
-               syms[i].addr = (long) addr;
-               syms[i].name = strdup(func);
-               i++;
-       }
-       sym_cnt = i;
-       qsort(syms, sym_cnt, sizeof(struct ksym), ksym_cmp);
-       return 0;
-}
-
-struct ksym *ksym_search(long key)
-{
-       int start = 0, end = sym_cnt;
-       int result;
-
-       while (start < end) {
-               size_t mid = start + (end - start) / 2;
-
-               result = key - syms[mid].addr;
-               if (result < 0)
-                       end = mid;
-               else if (result > 0)
-                       start = mid + 1;
-               else
-                       return &syms[mid];
-       }
-
-       if (start >= 1 && syms[start - 1].addr < key &&
-           key < syms[start].addr)
-               /* valid ksym */
-               return &syms[start - 1];
-
-       /* out of range. return _stext */
-       return &syms[0];
-}
-
index 453c200b389bf630c25428ef0a03647d75d8f35b..814894a129745a2c699665adf6b9c07513b9b0aa 100644 (file)
@@ -2,12 +2,12 @@
 #ifndef __BPF_LOAD_H
 #define __BPF_LOAD_H
 
-#include "libbpf.h"
+#include <bpf/bpf.h>
 
 #define MAX_MAPS 32
 #define MAX_PROGS 32
 
-struct bpf_map_def {
+struct bpf_load_map_def {
        unsigned int type;
        unsigned int key_size;
        unsigned int value_size;
@@ -21,7 +21,7 @@ struct bpf_map_data {
        int fd;
        char *name;
        size_t elf_offset;
-       struct bpf_map_def def;
+       struct bpf_load_map_def def;
 };
 
 typedef void (*fixup_map_cb)(struct bpf_map_data *map, int idx);
@@ -54,12 +54,5 @@ int load_bpf_file(char *path);
 int load_bpf_file_fixup_map(const char *path, fixup_map_cb fixup_map);
 
 void read_trace_pipe(void);
-struct ksym {
-       long addr;
-       char *name;
-};
-
-int load_kallsyms(void);
-struct ksym *ksym_search(long key);
 int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags);
 #endif
index 8eca27e595aee7a5b76bd1ee2895e23b3347cc8a..deb0e3e0324d4810f6ad0b2764d4ee4fe4b86896 100644 (file)
@@ -51,7 +51,7 @@
 #include <sys/types.h>
 #include <unistd.h>
 #include <bpf/bpf.h>
-#include "libbpf.h"
+#include "bpf_insn.h"
 
 #define PORT 8888
 
index 2b4cd1ae57c537e73f4b9b36a1be3398452f6b2a..869a99406dbfc98ea23ea36f83a9463746a8863f 100644 (file)
@@ -17,7 +17,7 @@
 #include <sys/resource.h>
 #include <sys/wait.h>
 
-#include "libbpf.h"
+#include <bpf/bpf.h>
 #include "bpf_load.h"
 
 #define MAX_CPU                        8
index e29bd52ff9e85c6dd6a2996bd213567e51e0328e..9854854f05d111aa018e2b4a2d1aac70080ac62c 100644 (file)
 #include <sys/types.h>
 #include <sys/socket.h>
 
+#include <bpf/bpf.h>
+
+#include "bpf_insn.h"
 #include "bpf_load.h"
-#include "libbpf.h"
 #include "sock_example.h"
 
 #define BPF_F_PIN      (1 << 0)
index 6477bad5b4e2b9e6ae8fe7558b18cd95f86197b5..c8e88cc84e6110267d45ad0a470dac90e566777d 100644 (file)
@@ -10,7 +10,7 @@
 #include <stdlib.h>
 #include <signal.h>
 #include <linux/bpf.h>
-#include "libbpf.h"
+#include <bpf/bpf.h>
 #include "bpf_load.h"
 
 #define MAX_ENTRIES    20
index e5da6cf71a3e1e4fd8743727fa296e01d8bcba1b..8ecb41ea0c034f560552deb3482c76783fec7db5 100644 (file)
@@ -8,7 +8,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <linux/bpf.h>
-#include "libbpf.h"
+#include <bpf/bpf.h>
 #include "bpf_load.h"
 #include <unistd.h>
 #include <errno.h>
index 7fcb94c09112e97eec0d276a7fb6fb0c5434f704..587b68b1f8dde6f4758dd16f8bb68a88f1816c84 100644 (file)
@@ -9,7 +9,7 @@
 #include <errno.h>
 #include <arpa/inet.h>
 
-#include "libbpf.h"
+#include <bpf/bpf.h>
 #include "bpf_util.h"
 
 #define MAX_INDEX 64
index 519d9af4b04a13c3fff93db6c7d927fb1cb95371..38b7b1a96cc2503e8ef6232e7f2cc6321efad950 100644 (file)
@@ -21,7 +21,7 @@
 #include <arpa/inet.h>
 #include <errno.h>
 
-#include "libbpf.h"
+#include <bpf/bpf.h>
 #include "bpf_load.h"
 
 #define TEST_BIT(t) (1U << (t))
index 512f87a5fd20e08d0b97893b17e20e971425830e..f06063af9fcb9a9fe4ca2eb90c68669b2ad6ca11 100644 (file)
@@ -17,6 +17,7 @@
 #include <sys/resource.h>
 #include "libbpf.h"
 #include "bpf_load.h"
+#include "trace_helpers.h"
 
 #define PRINT_RAW_ADDR 0
 
index 4ed690b907ff844961499d492350746065e423fb..60c2b73d1b4db34ba823b4f903ed1c4d9157018a 100644 (file)
@@ -22,6 +22,7 @@
 #include "libbpf.h"
 #include "bpf_load.h"
 #include "perf-sys.h"
+#include "trace_helpers.h"
 
 #define DEFAULT_FREQ   99
 #define DEFAULT_SECS   5
index 33a637507c00ffb860177596b6e2b1d8f7a23bc0..60ec467c78abf78035f7049e373269d4a211cdc5 100644 (file)
@@ -26,7 +26,8 @@
 #include <linux/if_ether.h>
 #include <linux/ip.h>
 #include <stddef.h>
-#include "libbpf.h"
+#include <bpf/bpf.h>
+#include "bpf_insn.h"
 #include "sock_example.h"
 
 char bpf_log_buf[BPF_LOG_BUF_SIZE];
index 772d5dad8465af5195c50e63d3393eb20c9d59b6..a27d7579bc7309e1dadb9fc4da36604fb970cd2d 100644 (file)
@@ -9,7 +9,6 @@
 #include <net/if.h>
 #include <linux/if_packet.h>
 #include <arpa/inet.h>
-#include "libbpf.h"
 
 static inline int open_raw_sock(const char *name)
 {
index 2be935c2627dce639c817b5fc45e6e7421486f07..93ec01c56104f4e1771fefc984a7963f2871907a 100644 (file)
@@ -2,7 +2,7 @@
 #include <stdio.h>
 #include <assert.h>
 #include <linux/bpf.h>
-#include "libbpf.h"
+#include <bpf/bpf.h>
 #include "bpf_load.h"
 #include "sock_example.h"
 #include <unistd.h>
index 44fe0805b087db5a8c579e2db2f345cb3ae89846..1d5c6e9a6d2756343ac7261790c46c0769d03df0 100644 (file)
@@ -2,7 +2,7 @@
 #include <stdio.h>
 #include <assert.h>
 #include <linux/bpf.h>
-#include "libbpf.h"
+#include <bpf/bpf.h>
 #include "bpf_load.h"
 #include "sock_example.h"
 #include <unistd.h>
index 495ee02e2fb7c71753b3e851a6ce2136bc7a65ab..5ba3ae9d180bad63b7a04fcc1a03c7be154727ac 100644 (file)
@@ -2,7 +2,7 @@
 #include <stdio.h>
 #include <assert.h>
 #include <linux/bpf.h>
-#include "libbpf.h"
+#include <bpf/bpf.h>
 #include "bpf_load.h"
 #include "sock_example.h"
 #include <unistd.h>
index 3d736219a31c8e5a99f1c9173e9f62344977b286..8d3e9cfa190978e56d00bde53a22d3f72f9e9b7e 100644 (file)
@@ -7,6 +7,7 @@
 #include <sys/resource.h>
 #include "libbpf.h"
 #include "bpf_load.h"
+#include "trace_helpers.h"
 
 int main(int ac, char **argv)
 {
index 9169d3207f18130135b9f7bba8adbed49822cffa..1a1d0059a277a2d763997eac4f755fecab18a454 100644 (file)
@@ -16,7 +16,7 @@
 #include <assert.h>
 #include <stdbool.h>
 #include <sys/resource.h>
-#include "libbpf.h"
+#include <bpf/bpf.h>
 #include "bpf_load.h"
 
 /* This program verifies bpf attachment to tracepoint sys_enter_* and sys_exit_*.
index 28995a77656073721d5ea5c78eb33f9aceb66a2f..7ec45c3e8f5641849259269b3c6f4358552c4658 100644 (file)
@@ -13,7 +13,7 @@
 #include <string.h>
 #include <errno.h>
 
-#include "libbpf.h"
+#include <bpf/bpf.h>
 
 static void usage(void)
 {
index 8a1b8b5d8def4717b64fbac2b58209bd3aeacab6..242184292f59922667c35e88ef921a2142148dec 100644 (file)
@@ -14,7 +14,7 @@
 #include <errno.h>
 #include <fcntl.h>
 
-#include "libbpf.h"
+#include <bpf/bpf.h>
 
 static void usage(void)
 {
index 4bfcaf93fcf308e8b94c44cfc46c186e74d56cb7..20fbd1241db335ff929fd80044da39de71d5ea2f 100644 (file)
@@ -28,8 +28,9 @@
 #include <fcntl.h>
 
 #include <linux/bpf.h>
+#include <bpf/bpf.h>
 
-#include "libbpf.h"
+#include "bpf_insn.h"
 
 enum {
        MAP_KEY_PACKETS,
index 1af412ec60075e47a1b2ec8c12206c886f72c268..b453e6a161be20ed6ea094f81f36cf4263814f3a 100644 (file)
@@ -24,8 +24,9 @@
 #include <unistd.h>
 
 #include <linux/bpf.h>
+#include <bpf/bpf.h>
 
-#include "libbpf.h"
+#include "bpf_insn.h"
 #include "cgroup_helpers.h"
 
 #define FOO            "/foo"
index e79594dd629ba34e066212d80e1549962b3c7d46..b0811da5a00f3af5f950e8400c011045004504e3 100644 (file)
@@ -21,8 +21,9 @@
 #include <net/if.h>
 #include <inttypes.h>
 #include <linux/bpf.h>
+#include <bpf/bpf.h>
 
-#include "libbpf.h"
+#include "bpf_insn.h"
 
 char bpf_log_buf[BPF_LOG_BUF_SIZE];
 
index e53f1f6f0867de31f513a0671e53d7442001d13b..3b5be2364975ae26b4dc8166c5c925bc1907aab1 100644 (file)
@@ -19,8 +19,9 @@
 #include <fcntl.h>
 #include <net/if.h>
 #include <linux/bpf.h>
+#include <bpf/bpf.h>
 
-#include "libbpf.h"
+#include "bpf_insn.h"
 #include "bpf_load.h"
 
 static int usage(const char *argv0)
index 65b5fb51c1dbc5b34dcf4dc01e6d6e16eb43b265..4be4874ca2bc651d23db26ef827dd6b22f56cc72 100644 (file)
@@ -9,7 +9,7 @@
 #include <stdio.h>
 #include <linux/bpf.h>
 #include <unistd.h>
-#include "libbpf.h"
+#include <bpf/bpf.h>
 #include "bpf_load.h"
 #include <linux/bpf.h>
 #include "cgroup_helpers.h"
index 73c35714226842b248027b214989e56570e4378f..eec3e2509ce8a42153f8c124e8a278890cd567e0 100644 (file)
@@ -21,7 +21,7 @@
 #include <stdlib.h>
 #include <time.h>
 
-#include "libbpf.h"
+#include <bpf/bpf.h>
 #include "bpf_util.h"
 
 #define min(a, b) ((a) < (b) ? (a) : (b))
index 1aca18539d8dd01742909e9837bf8394185ab38e..e308858f7bcf29752df5076c629befbb6a6b2fe2 100644 (file)
@@ -13,7 +13,7 @@
 #include <errno.h>
 #include <stdlib.h>
 #include <stdio.h>
-#include "libbpf.h"
+#include <bpf/bpf.h>
 #include "bpf_load.h"
 
 #define PORT_A         (map_fd[0])
index e1d35e07a10ec13feaba88f5276612ff5a35b0ae..6caf47afa635ca680bb56b43ef78c7f62b293dd7 100644 (file)
@@ -19,7 +19,7 @@
 #include <string.h>
 #include <time.h>
 #include <sys/resource.h>
-#include "libbpf.h"
+#include <bpf/bpf.h>
 #include "bpf_load.h"
 
 #define MAX_CNT 1000000
index bf8e3a9f30673c9d4a0a0a405c218f45f80eba1f..045eb5e30f54b21e84d25732f9f0135e7e4da774 100644 (file)
@@ -3,7 +3,7 @@
 #include <assert.h>
 #include <linux/bpf.h>
 #include <unistd.h>
-#include "libbpf.h"
+#include <bpf/bpf.h>
 #include "bpf_load.h"
 #include <sys/socket.h>
 #include <string.h>
diff --git a/samples/bpf/test_tunnel_bpf.sh b/samples/bpf/test_tunnel_bpf.sh
deleted file mode 100755 (executable)
index c265863..0000000
+++ /dev/null
@@ -1,319 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-# In Namespace 0 (at_ns0) using native tunnel
-# Overlay IP: 10.1.1.100
-# local 192.16.1.100 remote 192.16.1.200
-# veth0 IP: 172.16.1.100, tunnel dev <type>00
-
-# Out of Namespace using BPF set/get on lwtunnel
-# Overlay IP: 10.1.1.200
-# local 172.16.1.200 remote 172.16.1.100
-# veth1 IP: 172.16.1.200, tunnel dev <type>11
-
-function config_device {
-       ip netns add at_ns0
-       ip link add veth0 type veth peer name veth1
-       ip link set veth0 netns at_ns0
-       ip netns exec at_ns0 ip addr add 172.16.1.100/24 dev veth0
-       ip netns exec at_ns0 ip link set dev veth0 up
-       ip link set dev veth1 up mtu 1500
-       ip addr add dev veth1 172.16.1.200/24
-}
-
-function add_gre_tunnel {
-       # in namespace
-       ip netns exec at_ns0 \
-        ip link add dev $DEV_NS type $TYPE seq key 2 \
-               local 172.16.1.100 remote 172.16.1.200
-       ip netns exec at_ns0 ip link set dev $DEV_NS up
-       ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
-
-       # out of namespace
-       ip link add dev $DEV type $TYPE key 2 external
-       ip link set dev $DEV up
-       ip addr add dev $DEV 10.1.1.200/24
-}
-
-function add_ip6gretap_tunnel {
-
-       # assign ipv6 address
-       ip netns exec at_ns0 ip addr add ::11/96 dev veth0
-       ip netns exec at_ns0 ip link set dev veth0 up
-       ip addr add dev veth1 ::22/96
-       ip link set dev veth1 up
-
-       # in namespace
-       ip netns exec at_ns0 \
-               ip link add dev $DEV_NS type $TYPE seq flowlabel 0xbcdef key 2 \
-               local ::11 remote ::22
-
-       ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
-       ip netns exec at_ns0 ip addr add dev $DEV_NS fc80::100/96
-       ip netns exec at_ns0 ip link set dev $DEV_NS up
-
-       # out of namespace
-       ip link add dev $DEV type $TYPE external
-       ip addr add dev $DEV 10.1.1.200/24
-       ip addr add dev $DEV fc80::200/24
-       ip link set dev $DEV up
-}
-
-function add_erspan_tunnel {
-       # in namespace
-       if [ "$1" == "v1" ]; then
-               ip netns exec at_ns0 \
-               ip link add dev $DEV_NS type $TYPE seq key 2 \
-               local 172.16.1.100 remote 172.16.1.200 \
-               erspan_ver 1 erspan 123
-       else
-               ip netns exec at_ns0 \
-               ip link add dev $DEV_NS type $TYPE seq key 2 \
-               local 172.16.1.100 remote 172.16.1.200 \
-               erspan_ver 2 erspan_dir egress erspan_hwid 3
-       fi
-       ip netns exec at_ns0 ip link set dev $DEV_NS up
-       ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
-
-       # out of namespace
-       ip link add dev $DEV type $TYPE external
-       ip link set dev $DEV up
-       ip addr add dev $DEV 10.1.1.200/24
-}
-
-function add_ip6erspan_tunnel {
-
-       # assign ipv6 address
-       ip netns exec at_ns0 ip addr add ::11/96 dev veth0
-       ip netns exec at_ns0 ip link set dev veth0 up
-       ip addr add dev veth1 ::22/96
-       ip link set dev veth1 up
-
-       # in namespace
-       if [ "$1" == "v1" ]; then
-               ip netns exec at_ns0 \
-               ip link add dev $DEV_NS type $TYPE seq key 2 \
-               local ::11 remote ::22 \
-               erspan_ver 1 erspan 123
-       else
-               ip netns exec at_ns0 \
-               ip link add dev $DEV_NS type $TYPE seq key 2 \
-               local ::11 remote ::22 \
-               erspan_ver 2 erspan_dir egress erspan_hwid 7
-       fi
-       ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
-       ip netns exec at_ns0 ip link set dev $DEV_NS up
-
-       # out of namespace
-       ip link add dev $DEV type $TYPE external
-       ip addr add dev $DEV 10.1.1.200/24
-       ip link set dev $DEV up
-}
-
-function add_vxlan_tunnel {
-       # Set static ARP entry here because iptables set-mark works
-       # on L3 packet, as a result not applying to ARP packets,
-       # causing errors at get_tunnel_{key/opt}.
-
-       # in namespace
-       ip netns exec at_ns0 \
-               ip link add dev $DEV_NS type $TYPE id 2 dstport 4789 gbp remote 172.16.1.200
-       ip netns exec at_ns0 ip link set dev $DEV_NS address 52:54:00:d9:01:00 up
-       ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
-       ip netns exec at_ns0 arp -s 10.1.1.200 52:54:00:d9:02:00
-       ip netns exec at_ns0 iptables -A OUTPUT -j MARK --set-mark 0x800FF
-
-       # out of namespace
-       ip link add dev $DEV type $TYPE external gbp dstport 4789
-       ip link set dev $DEV address 52:54:00:d9:02:00 up
-       ip addr add dev $DEV 10.1.1.200/24
-       arp -s 10.1.1.100 52:54:00:d9:01:00
-}
-
-function add_geneve_tunnel {
-       # in namespace
-       ip netns exec at_ns0 \
-               ip link add dev $DEV_NS type $TYPE id 2 dstport 6081 remote 172.16.1.200
-       ip netns exec at_ns0 ip link set dev $DEV_NS up
-       ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
-
-       # out of namespace
-       ip link add dev $DEV type $TYPE dstport 6081 external
-       ip link set dev $DEV up
-       ip addr add dev $DEV 10.1.1.200/24
-}
-
-function add_ipip_tunnel {
-       # in namespace
-       ip netns exec at_ns0 \
-               ip link add dev $DEV_NS type $TYPE local 172.16.1.100 remote 172.16.1.200
-       ip netns exec at_ns0 ip link set dev $DEV_NS up
-       ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
-
-       # out of namespace
-       ip link add dev $DEV type $TYPE external
-       ip link set dev $DEV up
-       ip addr add dev $DEV 10.1.1.200/24
-}
-
-function attach_bpf {
-       DEV=$1
-       SET_TUNNEL=$2
-       GET_TUNNEL=$3
-       tc qdisc add dev $DEV clsact
-       tc filter add dev $DEV egress bpf da obj tcbpf2_kern.o sec $SET_TUNNEL
-       tc filter add dev $DEV ingress bpf da obj tcbpf2_kern.o sec $GET_TUNNEL
-}
-
-function test_gre {
-       TYPE=gretap
-       DEV_NS=gretap00
-       DEV=gretap11
-       config_device
-       add_gre_tunnel
-       attach_bpf $DEV gre_set_tunnel gre_get_tunnel
-       ping -c 1 10.1.1.100
-       ip netns exec at_ns0 ping -c 1 10.1.1.200
-       cleanup
-}
-
-function test_ip6gre {
-       TYPE=ip6gre
-       DEV_NS=ip6gre00
-       DEV=ip6gre11
-       config_device
-       # reuse the ip6gretap function
-       add_ip6gretap_tunnel
-       attach_bpf $DEV ip6gretap_set_tunnel ip6gretap_get_tunnel
-       # underlay
-       ping6 -c 4 ::11
-       # overlay: ipv4 over ipv6
-       ip netns exec at_ns0 ping -c 1 10.1.1.200
-       ping -c 1 10.1.1.100
-       # overlay: ipv6 over ipv6
-       ip netns exec at_ns0 ping6 -c 1 fc80::200
-       cleanup
-}
-
-function test_ip6gretap {
-       TYPE=ip6gretap
-       DEV_NS=ip6gretap00
-       DEV=ip6gretap11
-       config_device
-       add_ip6gretap_tunnel
-       attach_bpf $DEV ip6gretap_set_tunnel ip6gretap_get_tunnel
-       # underlay
-       ping6 -c 4 ::11
-       # overlay: ipv4 over ipv6
-       ip netns exec at_ns0 ping -i .2 -c 1 10.1.1.200
-       ping -c 1 10.1.1.100
-       # overlay: ipv6 over ipv6
-       ip netns exec at_ns0 ping6 -c 1 fc80::200
-       cleanup
-}
-
-function test_erspan {
-       TYPE=erspan
-       DEV_NS=erspan00
-       DEV=erspan11
-       config_device
-       add_erspan_tunnel $1
-       attach_bpf $DEV erspan_set_tunnel erspan_get_tunnel
-       ping -c 1 10.1.1.100
-       ip netns exec at_ns0 ping -c 1 10.1.1.200
-       cleanup
-}
-
-function test_ip6erspan {
-       TYPE=ip6erspan
-       DEV_NS=ip6erspan00
-       DEV=ip6erspan11
-       config_device
-       add_ip6erspan_tunnel $1
-       attach_bpf $DEV ip4ip6erspan_set_tunnel ip4ip6erspan_get_tunnel
-       ping6 -c 3 ::11
-       ip netns exec at_ns0 ping -c 1 10.1.1.200
-       cleanup
-}
-
-function test_vxlan {
-       TYPE=vxlan
-       DEV_NS=vxlan00
-       DEV=vxlan11
-       config_device
-       add_vxlan_tunnel
-       attach_bpf $DEV vxlan_set_tunnel vxlan_get_tunnel
-       ping -c 1 10.1.1.100
-       ip netns exec at_ns0 ping -c 1 10.1.1.200
-       cleanup
-}
-
-function test_geneve {
-       TYPE=geneve
-       DEV_NS=geneve00
-       DEV=geneve11
-       config_device
-       add_geneve_tunnel
-       attach_bpf $DEV geneve_set_tunnel geneve_get_tunnel
-       ping -c 1 10.1.1.100
-       ip netns exec at_ns0 ping -c 1 10.1.1.200
-       cleanup
-}
-
-function test_ipip {
-       TYPE=ipip
-       DEV_NS=ipip00
-       DEV=ipip11
-       config_device
-       tcpdump -nei veth1 &
-       cat /sys/kernel/debug/tracing/trace_pipe &
-       add_ipip_tunnel
-       ethtool -K veth1 gso off gro off rx off tx off
-       ip link set dev veth1 mtu 1500
-       attach_bpf $DEV ipip_set_tunnel ipip_get_tunnel
-       ping -c 1 10.1.1.100
-       ip netns exec at_ns0 ping -c 1 10.1.1.200
-       ip netns exec at_ns0 iperf -sD -p 5200 > /dev/null
-       sleep 0.2
-       iperf -c 10.1.1.100 -n 5k -p 5200
-       cleanup
-}
-
-function cleanup {
-       set +ex
-       pkill iperf
-       ip netns delete at_ns0
-       ip link del veth1
-       ip link del ipip11
-       ip link del gretap11
-       ip link del ip6gre11
-       ip link del ip6gretap11
-       ip link del vxlan11
-       ip link del geneve11
-       ip link del erspan11
-       ip link del ip6erspan11
-       pkill tcpdump
-       pkill cat
-       set -ex
-}
-
-trap cleanup 0 2 3 6 9
-cleanup
-echo "Testing GRE tunnel..."
-test_gre
-echo "Testing IP6GRE tunnel..."
-test_ip6gre
-echo "Testing IP6GRETAP tunnel..."
-test_ip6gretap
-echo "Testing ERSPAN tunnel..."
-test_erspan v1
-test_erspan v2
-echo "Testing IP6ERSPAN tunnel..."
-test_ip6erspan v1
-test_ip6erspan v2
-echo "Testing VXLAN tunnel..."
-test_vxlan
-echo "Testing GENEVE tunnel..."
-test_geneve
-echo "Testing IPIP tunnel..."
-test_ipip
-echo "*** PASS ***"
index 56f7a259a7c92500c41a850b45ff66ab63e47158..1fa1becfa641510ae67db4d0ea64c3971f6d2f4d 100644 (file)
@@ -21,6 +21,7 @@
 #include "libbpf.h"
 #include "bpf_load.h"
 #include "perf-sys.h"
+#include "trace_helpers.h"
 
 #define SAMPLE_FREQ 50
 
index ccca1e3480171501f01779ca88fc26305bff2491..4837d73edefe8ed1376bbf74ceb11aaa613b1857 100644 (file)
 #include <sys/mman.h>
 #include <time.h>
 #include <signal.h>
-#include "libbpf.h"
+#include <libbpf.h>
 #include "bpf_load.h"
 #include "perf-sys.h"
+#include "trace_helpers.h"
 
 static int pmu_fd;
 
-int page_size;
-int page_cnt = 8;
-volatile struct perf_event_mmap_page *header;
-
-typedef void (*print_fn)(void *data, int size);
-
-static int perf_event_mmap(int fd)
-{
-       void *base;
-       int mmap_size;
-
-       page_size = getpagesize();
-       mmap_size = page_size * (page_cnt + 1);
-
-       base = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
-       if (base == MAP_FAILED) {
-               printf("mmap err\n");
-               return -1;
-       }
-
-       header = base;
-       return 0;
-}
-
-static int perf_event_poll(int fd)
-{
-       struct pollfd pfd = { .fd = fd, .events = POLLIN };
-
-       return poll(&pfd, 1, 1000);
-}
-
-struct perf_event_sample {
-       struct perf_event_header header;
-       __u32 size;
-       char data[];
-};
-
-static void perf_event_read(print_fn fn)
-{
-       __u64 data_tail = header->data_tail;
-       __u64 data_head = header->data_head;
-       __u64 buffer_size = page_cnt * page_size;
-       void *base, *begin, *end;
-       char buf[256];
-
-       asm volatile("" ::: "memory"); /* in real code it should be smp_rmb() */
-       if (data_head == data_tail)
-               return;
-
-       base = ((char *)header) + page_size;
-
-       begin = base + data_tail % buffer_size;
-       end = base + data_head % buffer_size;
-
-       while (begin != end) {
-               struct perf_event_sample *e;
-
-               e = begin;
-               if (begin + e->header.size > base + buffer_size) {
-                       long len = base + buffer_size - begin;
-
-                       assert(len < e->header.size);
-                       memcpy(buf, begin, len);
-                       memcpy(buf + len, base, e->header.size - len);
-                       e = (void *) buf;
-                       begin = base + e->header.size - len;
-               } else if (begin + e->header.size == base + buffer_size) {
-                       begin = base;
-               } else {
-                       begin += e->header.size;
-               }
-
-               if (e->header.type == PERF_RECORD_SAMPLE) {
-                       fn(e->data, e->size);
-               } else if (e->header.type == PERF_RECORD_LOST) {
-                       struct {
-                               struct perf_event_header header;
-                               __u64 id;
-                               __u64 lost;
-                       } *lost = (void *) e;
-                       printf("lost %lld events\n", lost->lost);
-               } else {
-                       printf("unknown event type=%d size=%d\n",
-                              e->header.type, e->header.size);
-               }
-       }
-
-       __sync_synchronize(); /* smp_mb() */
-       header->data_tail = data_head;
-}
-
 static __u64 time_get_ns(void)
 {
        struct timespec ts;
@@ -127,7 +37,7 @@ static __u64 start_time;
 
 #define MAX_CNT 100000ll
 
-static void print_bpf_output(void *data, int size)
+static int print_bpf_output(void *data, int size)
 {
        static __u64 cnt;
        struct {
@@ -138,7 +48,7 @@ static void print_bpf_output(void *data, int size)
        if (e->cookie != 0x12345678) {
                printf("BUG pid %llx cookie %llx sized %d\n",
                       e->pid, e->cookie, size);
-               kill(0, SIGINT);
+               return LIBBPF_PERF_EVENT_ERROR;
        }
 
        cnt++;
@@ -146,8 +56,10 @@ static void print_bpf_output(void *data, int size)
        if (cnt == MAX_CNT) {
                printf("recv %lld events per sec\n",
                       MAX_CNT * 1000000000ll / (time_get_ns() - start_time));
-               kill(0, SIGINT);
+               return LIBBPF_PERF_EVENT_DONE;
        }
+
+       return LIBBPF_PERF_EVENT_CONT;
 }
 
 static void test_bpf_perf_event(void)
@@ -170,6 +82,7 @@ int main(int argc, char **argv)
 {
        char filename[256];
        FILE *f;
+       int ret;
 
        snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
 
@@ -187,10 +100,7 @@ int main(int argc, char **argv)
        (void) f;
 
        start_time = time_get_ns();
-       for (;;) {
-               perf_event_poll(pmu_fd);
-               perf_event_read(print_bpf_output);
-       }
-
-       return 0;
+       ret = perf_event_poller(pmu_fd, print_bpf_output);
+       kill(0, SIGINT);
+       return ret;
 }
index 3dcb475fb135c51fc0d8ebe9786a8d8e8091259a..af8c20608ab562eb2b65c8e6638ed2799e71b2cc 100644 (file)
@@ -2,7 +2,7 @@
 #include <stdio.h>
 #include <linux/bpf.h>
 #include <unistd.h>
-#include "libbpf.h"
+#include <bpf/bpf.h>
 #include "bpf_load.h"
 
 int main(int ac, char **argv)
index efb5e61918df62fccdfa1dc72d5d85dae073d008..1a81e6a5c2ea27b943e9db08a384c7ee4b438159 100644 (file)
@@ -7,7 +7,7 @@
 #include <string.h>
 #include <sys/resource.h>
 
-#include "libbpf.h"
+#include <bpf/bpf.h>
 #include "bpf_load.h"
 #include "bpf_util.h"
 
index fe372239d5054c44a7dcb70f05530aa19eb3cfc9..6c6b10f4c3ee35b484e30fa4939a1e6a2d344d45 100644 (file)
@@ -13,7 +13,7 @@
 #include <linux/bpf.h>
 #include <sys/resource.h>
 
-#include "libbpf.h"
+#include <bpf/bpf.h>
 #include "bpf_load.h"
 #include "bpf_util.h"
 
index 22c644f1f4c378db2877e94977dcfdd5df87118a..14625c898e43cf29c823875413a33cb928ed8813 100644 (file)
@@ -14,7 +14,7 @@
 #include <linux/bpf.h>
 #include <sys/resource.h>
 
-#include "libbpf.h"
+#include <bpf/bpf.h>
 #include "bpf_load.h"
 
 struct pair {
index 4e2774b731f0422cf224838301af2dc5c2f8a573..c4ab91c894940e2c03d3406a7b55170250b4e624 100644 (file)
@@ -5,7 +5,7 @@
 #include <linux/filter.h>
 #include <linux/seccomp.h>
 #include <sys/prctl.h>
-#include "libbpf.h"
+#include <bpf/bpf.h>
 #include "bpf_load.h"
 #include <sys/resource.h>
 
index 89ab8d40847453155c25a0e3181d2878e2f3d932..4bb3c830adb283783dac07db75e2391c8eddde72 100644 (file)
@@ -16,7 +16,7 @@
 #include <unistd.h>
 
 #include "bpf_load.h"
-#include "libbpf.h"
+#include <bpf/bpf.h>
 #include "perf-sys.h"
 
 #define SAMPLE_PERIOD  0x7fffffffffffffffULL
index 8a52ac492e8b714bf7780ec2d89ee513d77c4833..ea6dae78f0dff11e16a73105f830dd6cdb8ba469 100644 (file)
@@ -3,7 +3,7 @@
 #include <stdio.h>
 #include <linux/bpf.h>
 #include <unistd.h>
-#include "libbpf.h"
+#include <bpf/bpf.h>
 #include "bpf_load.h"
 
 int main(int argc, char **argv)
index b901ee2b33362d4dcd13c28fa3c922c73d19918d..b02c531510ed9a0b88963f749a5d32e8dc0b3c16 100644 (file)
@@ -16,9 +16,9 @@
 #include <libgen.h>
 #include <sys/resource.h>
 
-#include "bpf_load.h"
 #include "bpf_util.h"
-#include "libbpf.h"
+#include "bpf/bpf.h"
+#include "bpf/libbpf.h"
 
 static int ifindex;
 static __u32 xdp_flags;
@@ -31,7 +31,7 @@ static void int_exit(int sig)
 
 /* simple per-protocol drop counter
  */
-static void poll_stats(int interval)
+static void poll_stats(int map_fd, int interval)
 {
        unsigned int nr_cpus = bpf_num_possible_cpus();
        const unsigned int nr_keys = 256;
@@ -47,7 +47,7 @@ static void poll_stats(int interval)
                for (key = 0; key < nr_keys; key++) {
                        __u64 sum = 0;
 
-                       assert(bpf_map_lookup_elem(map_fd[0], &key, values) == 0);
+                       assert(bpf_map_lookup_elem(map_fd, &key, values) == 0);
                        for (i = 0; i < nr_cpus; i++)
                                sum += (values[i] - prev[key][i]);
                        if (sum)
@@ -71,9 +71,14 @@ static void usage(const char *prog)
 int main(int argc, char **argv)
 {
        struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+       struct bpf_prog_load_attr prog_load_attr = {
+               .prog_type      = BPF_PROG_TYPE_XDP,
+       };
        const char *optstr = "SN";
+       int prog_fd, map_fd, opt;
+       struct bpf_object *obj;
+       struct bpf_map *map;
        char filename[256];
-       int opt;
 
        while ((opt = getopt(argc, argv, optstr)) != -1) {
                switch (opt) {
@@ -102,13 +107,19 @@ int main(int argc, char **argv)
        ifindex = strtoul(argv[optind], NULL, 0);
 
        snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+       prog_load_attr.file = filename;
 
-       if (load_bpf_file(filename)) {
-               printf("%s", bpf_log_buf);
+       if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
+               return 1;
+
+       map = bpf_map__next(NULL, obj);
+       if (!map) {
+               printf("finding a map in obj file failed\n");
                return 1;
        }
+       map_fd = bpf_map__fd(map);
 
-       if (!prog_fd[0]) {
+       if (!prog_fd) {
                printf("load_bpf_file: %s\n", strerror(errno));
                return 1;
        }
@@ -116,12 +127,12 @@ int main(int argc, char **argv)
        signal(SIGINT, int_exit);
        signal(SIGTERM, int_exit);
 
-       if (bpf_set_link_xdp_fd(ifindex, prog_fd[0], xdp_flags) < 0) {
+       if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) {
                printf("link set xdp fd failed\n");
                return 1;
        }
 
-       poll_stats(2);
+       poll_stats(map_fd, 2);
 
        return 0;
 }
index f621a541b57451c61e5ef1d0f9d99433d541cd30..3042ce37dae8ec14c149f21ad9a96191ed49d217 100644 (file)
@@ -18,9 +18,8 @@
 #include <netinet/ether.h>
 #include <unistd.h>
 #include <time.h>
-#include "bpf_load.h"
-#include "libbpf.h"
-#include "bpf_util.h"
+#include "bpf/bpf.h"
+#include "bpf/libbpf.h"
 
 #define STATS_INTERVAL_S 2U
 
@@ -36,7 +35,7 @@ static void int_exit(int sig)
 
 /* simple "icmp packet too big sent" counter
  */
-static void poll_stats(unsigned int kill_after_s)
+static void poll_stats(unsigned int map_fd, unsigned int kill_after_s)
 {
        time_t started_at = time(NULL);
        __u64 value = 0;
@@ -46,7 +45,7 @@ static void poll_stats(unsigned int kill_after_s)
        while (!kill_after_s || time(NULL) - started_at <= kill_after_s) {
                sleep(STATS_INTERVAL_S);
 
-               assert(bpf_map_lookup_elem(map_fd[0], &key, &value) == 0);
+               assert(bpf_map_lookup_elem(map_fd, &key, &value) == 0);
 
                printf("icmp \"packet too big\" sent: %10llu pkts\n", value);
        }
@@ -66,14 +65,17 @@ static void usage(const char *cmd)
 
 int main(int argc, char **argv)
 {
+       struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+       struct bpf_prog_load_attr prog_load_attr = {
+               .prog_type      = BPF_PROG_TYPE_XDP,
+       };
        unsigned char opt_flags[256] = {};
        unsigned int kill_after_s = 0;
        const char *optstr = "i:T:SNh";
-       struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+       int i, prog_fd, map_fd, opt;
+       struct bpf_object *obj;
+       struct bpf_map *map;
        char filename[256];
-       int opt;
-       int i;
-
 
        for (i = 0; i < strlen(optstr); i++)
                if (optstr[i] != 'h' && 'a' <= optstr[i] && optstr[i] <= 'z')
@@ -115,13 +117,19 @@ int main(int argc, char **argv)
        }
 
        snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+       prog_load_attr.file = filename;
+
+       if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
+               return 1;
 
-       if (load_bpf_file(filename)) {
-               printf("%s", bpf_log_buf);
+       map = bpf_map__next(NULL, obj);
+       if (!map) {
+               printf("finding a map in obj file failed\n");
                return 1;
        }
+       map_fd = bpf_map__fd(map);
 
-       if (!prog_fd[0]) {
+       if (!prog_fd) {
                printf("load_bpf_file: %s\n", strerror(errno));
                return 1;
        }
@@ -129,12 +137,12 @@ int main(int argc, char **argv)
        signal(SIGINT, int_exit);
        signal(SIGTERM, int_exit);
 
-       if (bpf_set_link_xdp_fd(ifindex, prog_fd[0], xdp_flags) < 0) {
+       if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) {
                printf("link set xdp fd failed\n");
                return 1;
        }
 
-       poll_stats(kill_after_s);
+       poll_stats(map_fd, kill_after_s);
 
        bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
 
diff --git a/samples/bpf/xdp_fwd_kern.c b/samples/bpf/xdp_fwd_kern.c
new file mode 100644 (file)
index 0000000..4a6be0f
--- /dev/null
@@ -0,0 +1,138 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2017-18 David Ahern <dsahern@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+#define KBUILD_MODNAME "foo"
+#include <uapi/linux/bpf.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/if_vlan.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+
+#include "bpf_helpers.h"
+
+#define IPV6_FLOWINFO_MASK              cpu_to_be32(0x0FFFFFFF)
+
+struct bpf_map_def SEC("maps") tx_port = {
+       .type = BPF_MAP_TYPE_DEVMAP,
+       .key_size = sizeof(int),
+       .value_size = sizeof(int),
+       .max_entries = 64,
+};
+
+/* from include/net/ip.h */
+static __always_inline int ip_decrease_ttl(struct iphdr *iph)
+{
+       u32 check = (__force u32)iph->check;
+
+       check += (__force u32)htons(0x0100);
+       iph->check = (__force __sum16)(check + (check >= 0xFFFF));
+       return --iph->ttl;
+}
+
+static __always_inline int xdp_fwd_flags(struct xdp_md *ctx, u32 flags)
+{
+       void *data_end = (void *)(long)ctx->data_end;
+       void *data = (void *)(long)ctx->data;
+       struct bpf_fib_lookup fib_params;
+       struct ethhdr *eth = data;
+       struct ipv6hdr *ip6h;
+       struct iphdr *iph;
+       int out_index;
+       u16 h_proto;
+       u64 nh_off;
+
+       nh_off = sizeof(*eth);
+       if (data + nh_off > data_end)
+               return XDP_DROP;
+
+       __builtin_memset(&fib_params, 0, sizeof(fib_params));
+
+       h_proto = eth->h_proto;
+       if (h_proto == htons(ETH_P_IP)) {
+               iph = data + nh_off;
+
+               if (iph + 1 > data_end)
+                       return XDP_DROP;
+
+               if (iph->ttl <= 1)
+                       return XDP_PASS;
+
+               fib_params.family       = AF_INET;
+               fib_params.tos          = iph->tos;
+               fib_params.l4_protocol  = iph->protocol;
+               fib_params.sport        = 0;
+               fib_params.dport        = 0;
+               fib_params.tot_len      = ntohs(iph->tot_len);
+               fib_params.ipv4_src     = iph->saddr;
+               fib_params.ipv4_dst     = iph->daddr;
+       } else if (h_proto == htons(ETH_P_IPV6)) {
+               struct in6_addr *src = (struct in6_addr *) fib_params.ipv6_src;
+               struct in6_addr *dst = (struct in6_addr *) fib_params.ipv6_dst;
+
+               ip6h = data + nh_off;
+               if (ip6h + 1 > data_end)
+                       return XDP_DROP;
+
+               if (ip6h->hop_limit <= 1)
+                       return XDP_PASS;
+
+               fib_params.family       = AF_INET6;
+               fib_params.flowlabel    = *(__be32 *)ip6h & IPV6_FLOWINFO_MASK;
+               fib_params.l4_protocol  = ip6h->nexthdr;
+               fib_params.sport        = 0;
+               fib_params.dport        = 0;
+               fib_params.tot_len      = ntohs(ip6h->payload_len);
+               *src                    = ip6h->saddr;
+               *dst                    = ip6h->daddr;
+       } else {
+               return XDP_PASS;
+       }
+
+       fib_params.ifindex = ctx->ingress_ifindex;
+
+       out_index = bpf_fib_lookup(ctx, &fib_params, sizeof(fib_params), flags);
+
+       /* verify egress index has xdp support
+        * TO-DO bpf_map_lookup_elem(&tx_port, &key) fails with
+        *       cannot pass map_type 14 into func bpf_map_lookup_elem#1:
+        * NOTE: without verification that egress index supports XDP
+        *       forwarding packets are dropped.
+        */
+       if (out_index > 0) {
+               if (h_proto == htons(ETH_P_IP))
+                       ip_decrease_ttl(iph);
+               else if (h_proto == htons(ETH_P_IPV6))
+                       ip6h->hop_limit--;
+
+               memcpy(eth->h_dest, fib_params.dmac, ETH_ALEN);
+               memcpy(eth->h_source, fib_params.smac, ETH_ALEN);
+               return bpf_redirect_map(&tx_port, out_index, 0);
+       }
+
+       return XDP_PASS;
+}
+
+SEC("xdp_fwd")
+int xdp_fwd_prog(struct xdp_md *ctx)
+{
+       return xdp_fwd_flags(ctx, 0);
+}
+
+SEC("xdp_fwd_direct")
+int xdp_fwd_direct_prog(struct xdp_md *ctx)
+{
+       return xdp_fwd_flags(ctx, BPF_FIB_LOOKUP_DIRECT);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/xdp_fwd_user.c b/samples/bpf/xdp_fwd_user.c
new file mode 100644 (file)
index 0000000..a87a204
--- /dev/null
@@ -0,0 +1,136 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2017-18 David Ahern <dsahern@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+#include <linux/bpf.h>
+#include <linux/if_link.h>
+#include <linux/limits.h>
+#include <net/if.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <libgen.h>
+
+#include "bpf_load.h"
+#include "bpf_util.h"
+#include <bpf/bpf.h>
+
+
+static int do_attach(int idx, int fd, const char *name)
+{
+       int err;
+
+       err = bpf_set_link_xdp_fd(idx, fd, 0);
+       if (err < 0)
+               printf("ERROR: failed to attach program to %s\n", name);
+
+       return err;
+}
+
+static int do_detach(int idx, const char *name)
+{
+       int err;
+
+       err = bpf_set_link_xdp_fd(idx, -1, 0);
+       if (err < 0)
+               printf("ERROR: failed to detach program from %s\n", name);
+
+       return err;
+}
+
+static void usage(const char *prog)
+{
+       fprintf(stderr,
+               "usage: %s [OPTS] interface-list\n"
+               "\nOPTS:\n"
+               "    -d    detach program\n"
+               "    -D    direct table lookups (skip fib rules)\n",
+               prog);
+}
+
+int main(int argc, char **argv)
+{
+       char filename[PATH_MAX];
+       int opt, i, idx, err;
+       int prog_id = 0;
+       int attach = 1;
+       int ret = 0;
+
+       while ((opt = getopt(argc, argv, ":dD")) != -1) {
+               switch (opt) {
+               case 'd':
+                       attach = 0;
+                       break;
+               case 'D':
+                       prog_id = 1;
+                       break;
+               default:
+                       usage(basename(argv[0]));
+                       return 1;
+               }
+       }
+
+       if (optind == argc) {
+               usage(basename(argv[0]));
+               return 1;
+       }
+
+       if (attach) {
+               snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+               if (access(filename, O_RDONLY) < 0) {
+                       printf("error accessing file %s: %s\n",
+                               filename, strerror(errno));
+                       return 1;
+               }
+
+               if (load_bpf_file(filename)) {
+                       printf("%s", bpf_log_buf);
+                       return 1;
+               }
+
+               if (!prog_fd[prog_id]) {
+                       printf("load_bpf_file: %s\n", strerror(errno));
+                       return 1;
+               }
+       }
+       if (attach) {
+               for (i = 1; i < 64; ++i)
+                       bpf_map_update_elem(map_fd[0], &i, &i, 0);
+       }
+
+       for (i = optind; i < argc; ++i) {
+               idx = if_nametoindex(argv[i]);
+               if (!idx)
+                       idx = strtoul(argv[i], NULL, 0);
+
+               if (!idx) {
+                       fprintf(stderr, "Invalid arg\n");
+                       return 1;
+               }
+               if (!attach) {
+                       err = do_detach(idx, argv[i]);
+                       if (err)
+                               ret = err;
+               } else {
+                       err = do_attach(idx, prog_fd[prog_id], argv[i]);
+                       if (err)
+                               ret = err;
+               }
+       }
+
+       return ret;
+}
index 894bc64c2cac651851c0603e8f51af45a3692a45..bf09b5188acdf11dfc38e8f6bd6562b85f54023d 100644 (file)
@@ -26,7 +26,7 @@ static const char *__doc_err_only__=
 #include <net/if.h>
 #include <time.h>
 
-#include "libbpf.h"
+#include <bpf/bpf.h>
 #include "bpf_load.h"
 #include "bpf_util.h"
 
@@ -58,7 +58,7 @@ static void usage(char *argv[])
                        printf(" flag (internal value:%d)",
                               *long_options[i].flag);
                else
-                       printf("(internal short-option: -%c)",
+                       printf("short-option: -%c",
                               long_options[i].val);
                printf("\n");
        }
@@ -594,7 +594,7 @@ int main(int argc, char **argv)
        snprintf(bpf_obj_file, sizeof(bpf_obj_file), "%s_kern.o", argv[0]);
 
        /* Parse commands line args */
-       while ((opt = getopt_long(argc, argv, "h",
+       while ((opt = getopt_long(argc, argv, "hDSs:",
                                  long_options, &longindex)) != -1) {
                switch (opt) {
                case 'D':
index 23744a8aaf2160ad3c435df973f0ba5cec59917d..f6efaefd485b15929b04bf23346ef21934342d52 100644 (file)
@@ -28,7 +28,7 @@ static const char *__doc__ =
  * use bpf/libbpf.h), but cannot as (currently) needed for XDP
  * attaching to a device via bpf_set_link_xdp_fd()
  */
-#include "libbpf.h"
+#include <bpf/bpf.h>
 #include "bpf_load.h"
 
 #include "bpf_util.h"
index 7eae07d7293ecfa6129ab02d2072191b9effa995..4445e76854b59dbd3b66b137f89e4eb24665fc89 100644 (file)
@@ -24,7 +24,7 @@
 
 #include "bpf_load.h"
 #include "bpf_util.h"
-#include "libbpf.h"
+#include <bpf/bpf.h>
 
 static int ifindex_in;
 static int ifindex_out;
index b701b5c21342a78105fdf7e1e5beac6a049788c0..81a69e36cb788cbfa99b1a8a71b9ede1d1d0cca9 100644 (file)
@@ -24,7 +24,7 @@
 
 #include "bpf_load.h"
 #include "bpf_util.h"
-#include "libbpf.h"
+#include <bpf/bpf.h>
 
 static int ifindex_in;
 static int ifindex_out;
index 6296741c1fbd508be8a69d34e6d7098f4117089e..b2b4dfa776c8c1a40fa04bac053dc36a49c9d550 100644 (file)
@@ -16,7 +16,7 @@
 #include <sys/socket.h>
 #include <unistd.h>
 #include "bpf_load.h"
-#include "libbpf.h"
+#include <bpf/bpf.h>
 #include <arpa/inet.h>
 #include <fcntl.h>
 #include <poll.h>
index 478d95412de438be21908ed5363d72387187dbca..e4e9ba52bff02c457410543424b0f90148a01730 100644 (file)
@@ -22,8 +22,8 @@ static const char *__doc__ = " XDP RX-queue info extract example\n\n"
 #include <arpa/inet.h>
 #include <linux/if_link.h>
 
-#include "libbpf.h"
-#include "bpf_load.h"
+#include "bpf/bpf.h"
+#include "bpf/libbpf.h"
 #include "bpf_util.h"
 
 static int ifindex = -1;
@@ -32,6 +32,9 @@ static char *ifname;
 
 static __u32 xdp_flags;
 
+static struct bpf_map *stats_global_map;
+static struct bpf_map *rx_queue_index_map;
+
 /* Exit return codes */
 #define EXIT_OK                0
 #define EXIT_FAIL              1
@@ -174,7 +177,7 @@ static struct datarec *alloc_record_per_cpu(void)
 
 static struct record *alloc_record_per_rxq(void)
 {
-       unsigned int nr_rxqs = map_data[2].def.max_entries;
+       unsigned int nr_rxqs = bpf_map__def(rx_queue_index_map)->max_entries;
        struct record *array;
        size_t size;
 
@@ -190,7 +193,7 @@ static struct record *alloc_record_per_rxq(void)
 
 static struct stats_record *alloc_stats_record(void)
 {
-       unsigned int nr_rxqs = map_data[2].def.max_entries;
+       unsigned int nr_rxqs = bpf_map__def(rx_queue_index_map)->max_entries;
        struct stats_record *rec;
        int i;
 
@@ -210,7 +213,7 @@ static struct stats_record *alloc_stats_record(void)
 
 static void free_stats_record(struct stats_record *r)
 {
-       unsigned int nr_rxqs = map_data[2].def.max_entries;
+       unsigned int nr_rxqs = bpf_map__def(rx_queue_index_map)->max_entries;
        int i;
 
        for (i = 0; i < nr_rxqs; i++)
@@ -254,11 +257,11 @@ static void stats_collect(struct stats_record *rec)
 {
        int fd, i, max_rxqs;
 
-       fd = map_data[1].fd; /* map: stats_global_map */
+       fd = bpf_map__fd(stats_global_map);
        map_collect_percpu(fd, 0, &rec->stats);
 
-       fd = map_data[2].fd; /* map: rx_queue_index_map */
-       max_rxqs = map_data[2].def.max_entries;
+       fd = bpf_map__fd(rx_queue_index_map);
+       max_rxqs = bpf_map__def(rx_queue_index_map)->max_entries;
        for (i = 0; i < max_rxqs; i++)
                map_collect_percpu(fd, i, &rec->rxq[i]);
 }
@@ -304,8 +307,8 @@ static void stats_print(struct stats_record *stats_rec,
                        struct stats_record *stats_prev,
                        int action)
 {
+       unsigned int nr_rxqs = bpf_map__def(rx_queue_index_map)->max_entries;
        unsigned int nr_cpus = bpf_num_possible_cpus();
-       unsigned int nr_rxqs = map_data[2].def.max_entries;
        double pps = 0, err = 0;
        struct record *rec, *prev;
        double t;
@@ -419,31 +422,44 @@ static void stats_poll(int interval, int action)
 int main(int argc, char **argv)
 {
        struct rlimit r = {10 * 1024 * 1024, RLIM_INFINITY};
+       struct bpf_prog_load_attr prog_load_attr = {
+               .prog_type      = BPF_PROG_TYPE_XDP,
+       };
+       int prog_fd, map_fd, opt, err;
        bool use_separators = true;
        struct config cfg = { 0 };
+       struct bpf_object *obj;
+       struct bpf_map *map;
        char filename[256];
        int longindex = 0;
        int interval = 2;
        __u32 key = 0;
-       int opt, err;
 
        char action_str_buf[XDP_ACTION_MAX_STRLEN + 1 /* for \0 */] = { 0 };
        int action = XDP_PASS; /* Default action */
        char *action_str = NULL;
 
        snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+       prog_load_attr.file = filename;
 
        if (setrlimit(RLIMIT_MEMLOCK, &r)) {
                perror("setrlimit(RLIMIT_MEMLOCK)");
                return 1;
        }
 
-       if (load_bpf_file(filename)) {
-               fprintf(stderr, "ERR in load_bpf_file(): %s", bpf_log_buf);
+       if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
+               return EXIT_FAIL;
+
+       map = bpf_map__next(NULL, obj);
+       stats_global_map = bpf_map__next(map, obj);
+       rx_queue_index_map = bpf_map__next(stats_global_map, obj);
+       if (!map || !stats_global_map || !rx_queue_index_map) {
+               printf("finding a map in obj file failed\n");
                return EXIT_FAIL;
        }
+       map_fd = bpf_map__fd(map);
 
-       if (!prog_fd[0]) {
+       if (!prog_fd) {
                fprintf(stderr, "ERR: load_bpf_file: %s\n", strerror(errno));
                return EXIT_FAIL;
        }
@@ -512,7 +528,7 @@ int main(int argc, char **argv)
                setlocale(LC_NUMERIC, "en_US");
 
        /* User-side setup ifindex in config_map */
-       err = bpf_map_update_elem(map_fd[0], &key, &cfg, 0);
+       err = bpf_map_update_elem(map_fd, &key, &cfg, 0);
        if (err) {
                fprintf(stderr, "Store config failed (err:%d)\n", err);
                exit(EXIT_FAIL_BPF);
@@ -521,7 +537,7 @@ int main(int argc, char **argv)
        /* Remove XDP program when program is interrupted */
        signal(SIGINT, int_exit);
 
-       if (bpf_set_link_xdp_fd(ifindex, prog_fd[0], xdp_flags) < 0) {
+       if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) {
                fprintf(stderr, "link set xdp fd failed\n");
                return EXIT_FAIL_XDP;
        }
index f0a787268a87b1bae9fd70a924a4d027cf3e1b61..a4ccc33adac016a11c8f37c6637b27f6cab1664d 100644 (file)
@@ -18,7 +18,7 @@
 #include <unistd.h>
 #include <time.h>
 #include "bpf_load.h"
-#include "libbpf.h"
+#include <bpf/bpf.h>
 #include "bpf_util.h"
 #include "xdp_tx_iptunnel_common.h"
 
diff --git a/samples/bpf/xdpsock.h b/samples/bpf/xdpsock.h
new file mode 100644 (file)
index 0000000..533ab81
--- /dev/null
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef XDPSOCK_H_
+#define XDPSOCK_H_
+
+/* Power-of-2 number of sockets */
+#define MAX_SOCKS 4
+
+/* Round-robin receive */
+#define RR_LB 0
+
+#endif /* XDPSOCK_H_ */
diff --git a/samples/bpf/xdpsock_kern.c b/samples/bpf/xdpsock_kern.c
new file mode 100644 (file)
index 0000000..d8806c4
--- /dev/null
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+#define KBUILD_MODNAME "foo"
+#include <uapi/linux/bpf.h>
+#include "bpf_helpers.h"
+
+#include "xdpsock.h"
+
+struct bpf_map_def SEC("maps") qidconf_map = {
+       .type           = BPF_MAP_TYPE_ARRAY,
+       .key_size       = sizeof(int),
+       .value_size     = sizeof(int),
+       .max_entries    = 1,
+};
+
+struct bpf_map_def SEC("maps") xsks_map = {
+       .type = BPF_MAP_TYPE_XSKMAP,
+       .key_size = sizeof(int),
+       .value_size = sizeof(int),
+       .max_entries = 4,
+};
+
+struct bpf_map_def SEC("maps") rr_map = {
+       .type = BPF_MAP_TYPE_PERCPU_ARRAY,
+       .key_size = sizeof(int),
+       .value_size = sizeof(unsigned int),
+       .max_entries = 1,
+};
+
+SEC("xdp_sock")
+int xdp_sock_prog(struct xdp_md *ctx)
+{
+       int *qidconf, key = 0, idx;
+       unsigned int *rr;
+
+       qidconf = bpf_map_lookup_elem(&qidconf_map, &key);
+       if (!qidconf)
+               return XDP_ABORTED;
+
+       if (*qidconf != ctx->rx_queue_index)
+               return XDP_PASS;
+
+#if RR_LB /* NB! RR_LB is configured in xdpsock.h */
+       rr = bpf_map_lookup_elem(&rr_map, &key);
+       if (!rr)
+               return XDP_ABORTED;
+
+       *rr = (*rr + 1) & (MAX_SOCKS - 1);
+       idx = *rr;
+#else
+       idx = 0;
+#endif
+
+       return bpf_redirect_map(&xsks_map, idx, 0);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c
new file mode 100644 (file)
index 0000000..7fe60f6
--- /dev/null
@@ -0,0 +1,948 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2017 - 2018 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <assert.h>
+#include <errno.h>
+#include <getopt.h>
+#include <libgen.h>
+#include <linux/bpf.h>
+#include <linux/if_link.h>
+#include <linux/if_xdp.h>
+#include <linux/if_ether.h>
+#include <net/if.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <net/ethernet.h>
+#include <sys/resource.h>
+#include <sys/socket.h>
+#include <sys/mman.h>
+#include <time.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <locale.h>
+#include <sys/types.h>
+#include <poll.h>
+
+#include "bpf_load.h"
+#include "bpf_util.h"
+#include <bpf/bpf.h>
+
+#include "xdpsock.h"
+
+#ifndef SOL_XDP
+#define SOL_XDP 283
+#endif
+
+#ifndef AF_XDP
+#define AF_XDP 44
+#endif
+
+#ifndef PF_XDP
+#define PF_XDP AF_XDP
+#endif
+
+#define NUM_FRAMES 131072
+#define FRAME_HEADROOM 0
+#define FRAME_SIZE 2048
+#define NUM_DESCS 1024
+#define BATCH_SIZE 16
+
+#define FQ_NUM_DESCS 1024
+#define CQ_NUM_DESCS 1024
+
+#define DEBUG_HEXDUMP 0
+
+typedef __u32 u32;
+
+static unsigned long prev_time;
+
+enum benchmark_type {
+       BENCH_RXDROP = 0,
+       BENCH_TXONLY = 1,
+       BENCH_L2FWD = 2,
+};
+
+static enum benchmark_type opt_bench = BENCH_RXDROP;
+static u32 opt_xdp_flags;
+static const char *opt_if = "";
+static int opt_ifindex;
+static int opt_queue;
+static int opt_poll;
+static int opt_shared_packet_buffer;
+static int opt_interval = 1;
+
+struct xdp_umem_uqueue {
+       u32 cached_prod;
+       u32 cached_cons;
+       u32 mask;
+       u32 size;
+       struct xdp_umem_ring *ring;
+};
+
+struct xdp_umem {
+       char (*frames)[FRAME_SIZE];
+       struct xdp_umem_uqueue fq;
+       struct xdp_umem_uqueue cq;
+       int fd;
+};
+
+struct xdp_uqueue {
+       u32 cached_prod;
+       u32 cached_cons;
+       u32 mask;
+       u32 size;
+       struct xdp_rxtx_ring *ring;
+};
+
+struct xdpsock {
+       struct xdp_uqueue rx;
+       struct xdp_uqueue tx;
+       int sfd;
+       struct xdp_umem *umem;
+       u32 outstanding_tx;
+       unsigned long rx_npkts;
+       unsigned long tx_npkts;
+       unsigned long prev_rx_npkts;
+       unsigned long prev_tx_npkts;
+};
+
+#define MAX_SOCKS 4
+static int num_socks;
+struct xdpsock *xsks[MAX_SOCKS];
+
+static unsigned long get_nsecs(void)
+{
+       struct timespec ts;
+
+       clock_gettime(CLOCK_MONOTONIC, &ts);
+       return ts.tv_sec * 1000000000UL + ts.tv_nsec;
+}
+
+static void dump_stats(void);
+
+#define lassert(expr)                                                  \
+       do {                                                            \
+               if (!(expr)) {                                          \
+                       fprintf(stderr, "%s:%s:%i: Assertion failed: "  \
+                               #expr ": errno: %d/\"%s\"\n",           \
+                               __FILE__, __func__, __LINE__,           \
+                               errno, strerror(errno));                \
+                       dump_stats();                                   \
+                       exit(EXIT_FAILURE);                             \
+               }                                                       \
+       } while (0)
+
+#define barrier() __asm__ __volatile__("": : :"memory")
+#define u_smp_rmb() barrier()
+#define u_smp_wmb() barrier()
+#define likely(x) __builtin_expect(!!(x), 1)
+#define unlikely(x) __builtin_expect(!!(x), 0)
+
+static const char pkt_data[] =
+       "\x3c\xfd\xfe\x9e\x7f\x71\xec\xb1\xd7\x98\x3a\xc0\x08\x00\x45\x00"
+       "\x00\x2e\x00\x00\x00\x00\x40\x11\x88\x97\x05\x08\x07\x08\xc8\x14"
+       "\x1e\x04\x10\x92\x10\x92\x00\x1a\x6d\xa3\x34\x33\x1f\x69\x40\x6b"
+       "\x54\x59\xb6\x14\x2d\x11\x44\xbf\xaf\xd9\xbe\xaa";
+
+static inline u32 umem_nb_free(struct xdp_umem_uqueue *q, u32 nb)
+{
+       u32 free_entries = q->size - (q->cached_prod - q->cached_cons);
+
+       if (free_entries >= nb)
+               return free_entries;
+
+       /* Refresh the local tail pointer */
+       q->cached_cons = q->ring->ptrs.consumer;
+
+       return q->size - (q->cached_prod - q->cached_cons);
+}
+
+static inline u32 xq_nb_free(struct xdp_uqueue *q, u32 ndescs)
+{
+       u32 free_entries = q->cached_cons - q->cached_prod;
+
+       if (free_entries >= ndescs)
+               return free_entries;
+
+       /* Refresh the local tail pointer */
+       q->cached_cons = q->ring->ptrs.consumer + q->size;
+       return q->cached_cons - q->cached_prod;
+}
+
+static inline u32 umem_nb_avail(struct xdp_umem_uqueue *q, u32 nb)
+{
+       u32 entries = q->cached_prod - q->cached_cons;
+
+       if (entries == 0) {
+               q->cached_prod = q->ring->ptrs.producer;
+               entries = q->cached_prod - q->cached_cons;
+       }
+
+       return (entries > nb) ? nb : entries;
+}
+
+static inline u32 xq_nb_avail(struct xdp_uqueue *q, u32 ndescs)
+{
+       u32 entries = q->cached_prod - q->cached_cons;
+
+       if (entries == 0) {
+               q->cached_prod = q->ring->ptrs.producer;
+               entries = q->cached_prod - q->cached_cons;
+       }
+
+       return (entries > ndescs) ? ndescs : entries;
+}
+
+static inline int umem_fill_to_kernel_ex(struct xdp_umem_uqueue *fq,
+                                        struct xdp_desc *d,
+                                        size_t nb)
+{
+       u32 i;
+
+       if (umem_nb_free(fq, nb) < nb)
+               return -ENOSPC;
+
+       for (i = 0; i < nb; i++) {
+               u32 idx = fq->cached_prod++ & fq->mask;
+
+               fq->ring->desc[idx] = d[i].idx;
+       }
+
+       u_smp_wmb();
+
+       fq->ring->ptrs.producer = fq->cached_prod;
+
+       return 0;
+}
+
+static inline int umem_fill_to_kernel(struct xdp_umem_uqueue *fq, u32 *d,
+                                     size_t nb)
+{
+       u32 i;
+
+       if (umem_nb_free(fq, nb) < nb)
+               return -ENOSPC;
+
+       for (i = 0; i < nb; i++) {
+               u32 idx = fq->cached_prod++ & fq->mask;
+
+               fq->ring->desc[idx] = d[i];
+       }
+
+       u_smp_wmb();
+
+       fq->ring->ptrs.producer = fq->cached_prod;
+
+       return 0;
+}
+
+static inline size_t umem_complete_from_kernel(struct xdp_umem_uqueue *cq,
+                                              u32 *d, size_t nb)
+{
+       u32 idx, i, entries = umem_nb_avail(cq, nb);
+
+       u_smp_rmb();
+
+       for (i = 0; i < entries; i++) {
+               idx = cq->cached_cons++ & cq->mask;
+               d[i] = cq->ring->desc[idx];
+       }
+
+       if (entries > 0) {
+               u_smp_wmb();
+
+               cq->ring->ptrs.consumer = cq->cached_cons;
+       }
+
+       return entries;
+}
+
+static inline void *xq_get_data(struct xdpsock *xsk, __u32 idx, __u32 off)
+{
+       lassert(idx < NUM_FRAMES);
+       return &xsk->umem->frames[idx][off];
+}
+
+static inline int xq_enq(struct xdp_uqueue *uq,
+                        const struct xdp_desc *descs,
+                        unsigned int ndescs)
+{
+       struct xdp_rxtx_ring *r = uq->ring;
+       unsigned int i;
+
+       if (xq_nb_free(uq, ndescs) < ndescs)
+               return -ENOSPC;
+
+       for (i = 0; i < ndescs; i++) {
+               u32 idx = uq->cached_prod++ & uq->mask;
+
+               r->desc[idx].idx = descs[i].idx;
+               r->desc[idx].len = descs[i].len;
+               r->desc[idx].offset = descs[i].offset;
+       }
+
+       u_smp_wmb();
+
+       r->ptrs.producer = uq->cached_prod;
+       return 0;
+}
+
+static inline int xq_enq_tx_only(struct xdp_uqueue *uq,
+                                __u32 idx, unsigned int ndescs)
+{
+       struct xdp_rxtx_ring *q = uq->ring;
+       unsigned int i;
+
+       if (xq_nb_free(uq, ndescs) < ndescs)
+               return -ENOSPC;
+
+       for (i = 0; i < ndescs; i++) {
+               u32 idx = uq->cached_prod++ & uq->mask;
+
+               q->desc[idx].idx        = idx + i;
+               q->desc[idx].len        = sizeof(pkt_data) - 1;
+               q->desc[idx].offset     = 0;
+       }
+
+       u_smp_wmb();
+
+       q->ptrs.producer = uq->cached_prod;
+       return 0;
+}
+
+static inline int xq_deq(struct xdp_uqueue *uq,
+                        struct xdp_desc *descs,
+                        int ndescs)
+{
+       struct xdp_rxtx_ring *r = uq->ring;
+       unsigned int idx;
+       int i, entries;
+
+       entries = xq_nb_avail(uq, ndescs);
+
+       u_smp_rmb();
+
+       for (i = 0; i < entries; i++) {
+               idx = uq->cached_cons++ & uq->mask;
+               descs[i] = r->desc[idx];
+       }
+
+       if (entries > 0) {
+               u_smp_wmb();
+
+               r->ptrs.consumer = uq->cached_cons;
+       }
+
+       return entries;
+}
+
+static void swap_mac_addresses(void *data)
+{
+       struct ether_header *eth = (struct ether_header *)data;
+       struct ether_addr *src_addr = (struct ether_addr *)&eth->ether_shost;
+       struct ether_addr *dst_addr = (struct ether_addr *)&eth->ether_dhost;
+       struct ether_addr tmp;
+
+       tmp = *src_addr;
+       *src_addr = *dst_addr;
+       *dst_addr = tmp;
+}
+
+#if DEBUG_HEXDUMP
+static void hex_dump(void *pkt, size_t length, const char *prefix)
+{
+       int i = 0;
+       const unsigned char *address = (unsigned char *)pkt;
+       const unsigned char *line = address;
+       size_t line_size = 32;
+       unsigned char c;
+
+       printf("length = %zu\n", length);
+       printf("%s | ", prefix);
+       while (length-- > 0) {
+               printf("%02X ", *address++);
+               if (!(++i % line_size) || (length == 0 && i % line_size)) {
+                       if (length == 0) {
+                               while (i++ % line_size)
+                                       printf("__ ");
+                       }
+                       printf(" | ");  /* right close */
+                       while (line < address) {
+                               c = *line++;
+                               printf("%c", (c < 33 || c == 255) ? 0x2E : c);
+                       }
+                       printf("\n");
+                       if (length > 0)
+                               printf("%s | ", prefix);
+               }
+       }
+       printf("\n");
+}
+#endif
+
+static size_t gen_eth_frame(char *frame)
+{
+       memcpy(frame, pkt_data, sizeof(pkt_data) - 1);
+       return sizeof(pkt_data) - 1;
+}
+
+static struct xdp_umem *xdp_umem_configure(int sfd)
+{
+       int fq_size = FQ_NUM_DESCS, cq_size = CQ_NUM_DESCS;
+       struct xdp_umem_reg mr;
+       struct xdp_umem *umem;
+       void *bufs;
+
+       umem = calloc(1, sizeof(*umem));
+       lassert(umem);
+
+       lassert(posix_memalign(&bufs, getpagesize(), /* PAGE_SIZE aligned */
+                              NUM_FRAMES * FRAME_SIZE) == 0);
+
+       mr.addr = (__u64)bufs;
+       mr.len = NUM_FRAMES * FRAME_SIZE;
+       mr.frame_size = FRAME_SIZE;
+       mr.frame_headroom = FRAME_HEADROOM;
+
+       lassert(setsockopt(sfd, SOL_XDP, XDP_UMEM_REG, &mr, sizeof(mr)) == 0);
+       lassert(setsockopt(sfd, SOL_XDP, XDP_UMEM_FILL_RING, &fq_size,
+                          sizeof(int)) == 0);
+       lassert(setsockopt(sfd, SOL_XDP, XDP_UMEM_COMPLETION_RING, &cq_size,
+                          sizeof(int)) == 0);
+
+       umem->fq.ring = mmap(0, sizeof(struct xdp_umem_ring) +
+                            FQ_NUM_DESCS * sizeof(u32),
+                            PROT_READ | PROT_WRITE,
+                            MAP_SHARED | MAP_POPULATE, sfd,
+                            XDP_UMEM_PGOFF_FILL_RING);
+       lassert(umem->fq.ring != MAP_FAILED);
+
+       umem->fq.mask = FQ_NUM_DESCS - 1;
+       umem->fq.size = FQ_NUM_DESCS;
+
+       umem->cq.ring = mmap(0, sizeof(struct xdp_umem_ring) +
+                            CQ_NUM_DESCS * sizeof(u32),
+                            PROT_READ | PROT_WRITE,
+                            MAP_SHARED | MAP_POPULATE, sfd,
+                            XDP_UMEM_PGOFF_COMPLETION_RING);
+       lassert(umem->cq.ring != MAP_FAILED);
+
+       umem->cq.mask = CQ_NUM_DESCS - 1;
+       umem->cq.size = CQ_NUM_DESCS;
+
+       umem->frames = (char (*)[FRAME_SIZE])bufs;
+       umem->fd = sfd;
+
+       if (opt_bench == BENCH_TXONLY) {
+               int i;
+
+               for (i = 0; i < NUM_FRAMES; i++)
+                       (void)gen_eth_frame(&umem->frames[i][0]);
+       }
+
+       return umem;
+}
+
+static struct xdpsock *xsk_configure(struct xdp_umem *umem)
+{
+       struct sockaddr_xdp sxdp = {};
+       int sfd, ndescs = NUM_DESCS;
+       struct xdpsock *xsk;
+       bool shared = true;
+       u32 i;
+
+       sfd = socket(PF_XDP, SOCK_RAW, 0);
+       lassert(sfd >= 0);
+
+       xsk = calloc(1, sizeof(*xsk));
+       lassert(xsk);
+
+       xsk->sfd = sfd;
+       xsk->outstanding_tx = 0;
+
+       if (!umem) {
+               shared = false;
+               xsk->umem = xdp_umem_configure(sfd);
+       } else {
+               xsk->umem = umem;
+       }
+
+       lassert(setsockopt(sfd, SOL_XDP, XDP_RX_RING,
+                          &ndescs, sizeof(int)) == 0);
+       lassert(setsockopt(sfd, SOL_XDP, XDP_TX_RING,
+                          &ndescs, sizeof(int)) == 0);
+
+       /* Rx */
+       xsk->rx.ring = mmap(NULL,
+                           sizeof(struct xdp_ring) +
+                           NUM_DESCS * sizeof(struct xdp_desc),
+                           PROT_READ | PROT_WRITE,
+                           MAP_SHARED | MAP_POPULATE, sfd,
+                           XDP_PGOFF_RX_RING);
+       lassert(xsk->rx.ring != MAP_FAILED);
+
+       if (!shared) {
+               for (i = 0; i < NUM_DESCS / 2; i++)
+                       lassert(umem_fill_to_kernel(&xsk->umem->fq, &i, 1)
+                               == 0);
+       }
+
+       /* Tx */
+       xsk->tx.ring = mmap(NULL,
+                        sizeof(struct xdp_ring) +
+                        NUM_DESCS * sizeof(struct xdp_desc),
+                        PROT_READ | PROT_WRITE,
+                        MAP_SHARED | MAP_POPULATE, sfd,
+                        XDP_PGOFF_TX_RING);
+       lassert(xsk->tx.ring != MAP_FAILED);
+
+       xsk->rx.mask = NUM_DESCS - 1;
+       xsk->rx.size = NUM_DESCS;
+
+       xsk->tx.mask = NUM_DESCS - 1;
+       xsk->tx.size = NUM_DESCS;
+
+       sxdp.sxdp_family = PF_XDP;
+       sxdp.sxdp_ifindex = opt_ifindex;
+       sxdp.sxdp_queue_id = opt_queue;
+       if (shared) {
+               sxdp.sxdp_flags = XDP_SHARED_UMEM;
+               sxdp.sxdp_shared_umem_fd = umem->fd;
+       }
+
+       lassert(bind(sfd, (struct sockaddr *)&sxdp, sizeof(sxdp)) == 0);
+
+       return xsk;
+}
+
+static void print_benchmark(bool running)
+{
+       const char *bench_str = "INVALID";
+
+       if (opt_bench == BENCH_RXDROP)
+               bench_str = "rxdrop";
+       else if (opt_bench == BENCH_TXONLY)
+               bench_str = "txonly";
+       else if (opt_bench == BENCH_L2FWD)
+               bench_str = "l2fwd";
+
+       printf("%s:%d %s ", opt_if, opt_queue, bench_str);
+       if (opt_xdp_flags & XDP_FLAGS_SKB_MODE)
+               printf("xdp-skb ");
+       else if (opt_xdp_flags & XDP_FLAGS_DRV_MODE)
+               printf("xdp-drv ");
+       else
+               printf("        ");
+
+       if (opt_poll)
+               printf("poll() ");
+
+       if (running) {
+               printf("running...");
+               fflush(stdout);
+       }
+}
+
+static void dump_stats(void)
+{
+       unsigned long now = get_nsecs();
+       long dt = now - prev_time;
+       int i;
+
+       prev_time = now;
+
+       for (i = 0; i < num_socks; i++) {
+               char *fmt = "%-15s %'-11.0f %'-11lu\n";
+               double rx_pps, tx_pps;
+
+               rx_pps = (xsks[i]->rx_npkts - xsks[i]->prev_rx_npkts) *
+                        1000000000. / dt;
+               tx_pps = (xsks[i]->tx_npkts - xsks[i]->prev_tx_npkts) *
+                        1000000000. / dt;
+
+               printf("\n sock%d@", i);
+               print_benchmark(false);
+               printf("\n");
+
+               printf("%-15s %-11s %-11s %-11.2f\n", "", "pps", "pkts",
+                      dt / 1000000000.);
+               printf(fmt, "rx", rx_pps, xsks[i]->rx_npkts);
+               printf(fmt, "tx", tx_pps, xsks[i]->tx_npkts);
+
+               xsks[i]->prev_rx_npkts = xsks[i]->rx_npkts;
+               xsks[i]->prev_tx_npkts = xsks[i]->tx_npkts;
+       }
+}
+
+static void *poller(void *arg)
+{
+       (void)arg;
+       for (;;) {
+               sleep(opt_interval);
+               dump_stats();
+       }
+
+       return NULL;
+}
+
+static void int_exit(int sig)
+{
+       (void)sig;
+       dump_stats();
+       bpf_set_link_xdp_fd(opt_ifindex, -1, opt_xdp_flags);
+       exit(EXIT_SUCCESS);
+}
+
+static struct option long_options[] = {
+       {"rxdrop", no_argument, 0, 'r'},
+       {"txonly", no_argument, 0, 't'},
+       {"l2fwd", no_argument, 0, 'l'},
+       {"interface", required_argument, 0, 'i'},
+       {"queue", required_argument, 0, 'q'},
+       {"poll", no_argument, 0, 'p'},
+       {"shared-buffer", no_argument, 0, 's'},
+       {"xdp-skb", no_argument, 0, 'S'},
+       {"xdp-native", no_argument, 0, 'N'},
+       {"interval", required_argument, 0, 'n'},
+       {0, 0, 0, 0}
+};
+
+static void usage(const char *prog)
+{
+       const char *str =
+               "  Usage: %s [OPTIONS]\n"
+               "  Options:\n"
+               "  -r, --rxdrop         Discard all incoming packets (default)\n"
+               "  -t, --txonly         Only send packets\n"
+               "  -l, --l2fwd          MAC swap L2 forwarding\n"
+               "  -i, --interface=n    Run on interface n\n"
+               "  -q, --queue=n        Use queue n (default 0)\n"
+               "  -p, --poll           Use poll syscall\n"
+               "  -s, --shared-buffer  Use shared packet buffer\n"
+               "  -S, --xdp-skb=n      Use XDP skb-mod\n"
+               "  -N, --xdp-native=n   Enfore XDP native mode\n"
+               "  -n, --interval=n     Specify statistics update interval (default 1 sec).\n"
+               "\n";
+       fprintf(stderr, str, prog);
+       exit(EXIT_FAILURE);
+}
+
+static void parse_command_line(int argc, char **argv)
+{
+       int option_index, c;
+
+       opterr = 0;
+
+       for (;;) {
+               c = getopt_long(argc, argv, "rtli:q:psSNn:", long_options,
+                               &option_index);
+               if (c == -1)
+                       break;
+
+               switch (c) {
+               case 'r':
+                       opt_bench = BENCH_RXDROP;
+                       break;
+               case 't':
+                       opt_bench = BENCH_TXONLY;
+                       break;
+               case 'l':
+                       opt_bench = BENCH_L2FWD;
+                       break;
+               case 'i':
+                       opt_if = optarg;
+                       break;
+               case 'q':
+                       opt_queue = atoi(optarg);
+                       break;
+               case 's':
+                       opt_shared_packet_buffer = 1;
+                       break;
+               case 'p':
+                       opt_poll = 1;
+                       break;
+               case 'S':
+                       opt_xdp_flags |= XDP_FLAGS_SKB_MODE;
+                       break;
+               case 'N':
+                       opt_xdp_flags |= XDP_FLAGS_DRV_MODE;
+                       break;
+               case 'n':
+                       opt_interval = atoi(optarg);
+                       break;
+               default:
+                       usage(basename(argv[0]));
+               }
+       }
+
+       opt_ifindex = if_nametoindex(opt_if);
+       if (!opt_ifindex) {
+               fprintf(stderr, "ERROR: interface \"%s\" does not exist\n",
+                       opt_if);
+               usage(basename(argv[0]));
+       }
+}
+
+static void kick_tx(int fd)
+{
+       int ret;
+
+       ret = sendto(fd, NULL, 0, MSG_DONTWAIT, NULL, 0);
+       if (ret >= 0 || errno == ENOBUFS || errno == EAGAIN)
+               return;
+       lassert(0);
+}
+
+static inline void complete_tx_l2fwd(struct xdpsock *xsk)
+{
+       u32 descs[BATCH_SIZE];
+       unsigned int rcvd;
+       size_t ndescs;
+
+       if (!xsk->outstanding_tx)
+               return;
+
+       kick_tx(xsk->sfd);
+       ndescs = (xsk->outstanding_tx > BATCH_SIZE) ? BATCH_SIZE :
+                xsk->outstanding_tx;
+
+       /* re-add completed Tx buffers */
+       rcvd = umem_complete_from_kernel(&xsk->umem->cq, descs, ndescs);
+       if (rcvd > 0) {
+               umem_fill_to_kernel(&xsk->umem->fq, descs, rcvd);
+               xsk->outstanding_tx -= rcvd;
+               xsk->tx_npkts += rcvd;
+       }
+}
+
+static inline void complete_tx_only(struct xdpsock *xsk)
+{
+       u32 descs[BATCH_SIZE];
+       unsigned int rcvd;
+
+       if (!xsk->outstanding_tx)
+               return;
+
+       kick_tx(xsk->sfd);
+
+       rcvd = umem_complete_from_kernel(&xsk->umem->cq, descs, BATCH_SIZE);
+       if (rcvd > 0) {
+               xsk->outstanding_tx -= rcvd;
+               xsk->tx_npkts += rcvd;
+       }
+}
+
+static void rx_drop(struct xdpsock *xsk)
+{
+       struct xdp_desc descs[BATCH_SIZE];
+       unsigned int rcvd, i;
+
+       rcvd = xq_deq(&xsk->rx, descs, BATCH_SIZE);
+       if (!rcvd)
+               return;
+
+       for (i = 0; i < rcvd; i++) {
+               u32 idx = descs[i].idx;
+
+               lassert(idx < NUM_FRAMES);
+#if DEBUG_HEXDUMP
+               char *pkt;
+               char buf[32];
+
+               pkt = xq_get_data(xsk, idx, descs[i].offset);
+               sprintf(buf, "idx=%d", idx);
+               hex_dump(pkt, descs[i].len, buf);
+#endif
+       }
+
+       xsk->rx_npkts += rcvd;
+
+       umem_fill_to_kernel_ex(&xsk->umem->fq, descs, rcvd);
+}
+
+static void rx_drop_all(void)
+{
+       struct pollfd fds[MAX_SOCKS + 1];
+       int i, ret, timeout, nfds = 1;
+
+       memset(fds, 0, sizeof(fds));
+
+       for (i = 0; i < num_socks; i++) {
+               fds[i].fd = xsks[i]->sfd;
+               fds[i].events = POLLIN;
+               timeout = 1000; /* 1sn */
+       }
+
+       for (;;) {
+               if (opt_poll) {
+                       ret = poll(fds, nfds, timeout);
+                       if (ret <= 0)
+                               continue;
+               }
+
+               for (i = 0; i < num_socks; i++)
+                       rx_drop(xsks[i]);
+       }
+}
+
+static void tx_only(struct xdpsock *xsk)
+{
+       int timeout, ret, nfds = 1;
+       struct pollfd fds[nfds + 1];
+       unsigned int idx = 0;
+
+       memset(fds, 0, sizeof(fds));
+       fds[0].fd = xsk->sfd;
+       fds[0].events = POLLOUT;
+       timeout = 1000; /* 1sn */
+
+       for (;;) {
+               if (opt_poll) {
+                       ret = poll(fds, nfds, timeout);
+                       if (ret <= 0)
+                               continue;
+
+                       if (fds[0].fd != xsk->sfd ||
+                           !(fds[0].revents & POLLOUT))
+                               continue;
+               }
+
+               if (xq_nb_free(&xsk->tx, BATCH_SIZE) >= BATCH_SIZE) {
+                       lassert(xq_enq_tx_only(&xsk->tx, idx, BATCH_SIZE) == 0);
+
+                       xsk->outstanding_tx += BATCH_SIZE;
+                       idx += BATCH_SIZE;
+                       idx %= NUM_FRAMES;
+               }
+
+               complete_tx_only(xsk);
+       }
+}
+
+static void l2fwd(struct xdpsock *xsk)
+{
+       for (;;) {
+               struct xdp_desc descs[BATCH_SIZE];
+               unsigned int rcvd, i;
+               int ret;
+
+               for (;;) {
+                       complete_tx_l2fwd(xsk);
+
+                       rcvd = xq_deq(&xsk->rx, descs, BATCH_SIZE);
+                       if (rcvd > 0)
+                               break;
+               }
+
+               for (i = 0; i < rcvd; i++) {
+                       char *pkt = xq_get_data(xsk, descs[i].idx,
+                                               descs[i].offset);
+
+                       swap_mac_addresses(pkt);
+#if DEBUG_HEXDUMP
+                       char buf[32];
+                       u32 idx = descs[i].idx;
+
+                       sprintf(buf, "idx=%d", idx);
+                       hex_dump(pkt, descs[i].len, buf);
+#endif
+               }
+
+               xsk->rx_npkts += rcvd;
+
+               ret = xq_enq(&xsk->tx, descs, rcvd);
+               lassert(ret == 0);
+               xsk->outstanding_tx += rcvd;
+       }
+}
+
+int main(int argc, char **argv)
+{
+       struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+       char xdp_filename[256];
+       int i, ret, key = 0;
+       pthread_t pt;
+
+       parse_command_line(argc, argv);
+
+       if (setrlimit(RLIMIT_MEMLOCK, &r)) {
+               fprintf(stderr, "ERROR: setrlimit(RLIMIT_MEMLOCK) \"%s\"\n",
+                       strerror(errno));
+               exit(EXIT_FAILURE);
+       }
+
+       snprintf(xdp_filename, sizeof(xdp_filename), "%s_kern.o", argv[0]);
+
+       if (load_bpf_file(xdp_filename)) {
+               fprintf(stderr, "ERROR: load_bpf_file %s\n", bpf_log_buf);
+               exit(EXIT_FAILURE);
+       }
+
+       if (!prog_fd[0]) {
+               fprintf(stderr, "ERROR: load_bpf_file: \"%s\"\n",
+                       strerror(errno));
+               exit(EXIT_FAILURE);
+       }
+
+       if (bpf_set_link_xdp_fd(opt_ifindex, prog_fd[0], opt_xdp_flags) < 0) {
+               fprintf(stderr, "ERROR: link set xdp fd failed\n");
+               exit(EXIT_FAILURE);
+       }
+
+       ret = bpf_map_update_elem(map_fd[0], &key, &opt_queue, 0);
+       if (ret) {
+               fprintf(stderr, "ERROR: bpf_map_update_elem qidconf\n");
+               exit(EXIT_FAILURE);
+       }
+
+       /* Create sockets... */
+       xsks[num_socks++] = xsk_configure(NULL);
+
+#if RR_LB
+       for (i = 0; i < MAX_SOCKS - 1; i++)
+               xsks[num_socks++] = xsk_configure(xsks[0]->umem);
+#endif
+
+       /* ...and insert them into the map. */
+       for (i = 0; i < num_socks; i++) {
+               key = i;
+               ret = bpf_map_update_elem(map_fd[1], &key, &xsks[i]->sfd, 0);
+               if (ret) {
+                       fprintf(stderr, "ERROR: bpf_map_update_elem %d\n", i);
+                       exit(EXIT_FAILURE);
+               }
+       }
+
+       signal(SIGINT, int_exit);
+       signal(SIGTERM, int_exit);
+       signal(SIGABRT, int_exit);
+
+       setlocale(LC_ALL, "");
+
+       ret = pthread_create(&pt, NULL, poller, NULL);
+       lassert(ret == 0);
+
+       prev_time = get_nsecs();
+
+       if (opt_bench == BENCH_RXDROP)
+               rx_drop_all();
+       else if (opt_bench == BENCH_TXONLY)
+               tx_only(xsks[0]);
+       else
+               l2fwd(xsks[0]);
+
+       return 0;
+}
diff --git a/samples/sockmap/Makefile b/samples/sockmap/Makefile
deleted file mode 100644 (file)
index 9bf2881..0000000
+++ /dev/null
@@ -1,75 +0,0 @@
-# List of programs to build
-hostprogs-y := sockmap
-
-# Libbpf dependencies
-LIBBPF := ../../tools/lib/bpf/bpf.o ../../tools/lib/bpf/nlattr.o
-
-HOSTCFLAGS += -I$(objtree)/usr/include
-HOSTCFLAGS += -I$(srctree)/tools/lib/
-HOSTCFLAGS += -I$(srctree)/tools/testing/selftests/bpf/
-HOSTCFLAGS += -I$(srctree)/tools/lib/ -I$(srctree)/tools/include
-HOSTCFLAGS += -I$(srctree)/tools/perf
-
-sockmap-objs := ../bpf/bpf_load.o $(LIBBPF) sockmap_user.o
-
-# Tell kbuild to always build the programs
-always := $(hostprogs-y)
-always += sockmap_kern.o
-
-HOSTLOADLIBES_sockmap += -lelf -lpthread
-
-# Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
-#  make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
-LLC ?= llc
-CLANG ?= clang
-
-# Trick to allow make to be run from this directory
-all:
-       $(MAKE) -C ../../ $(CURDIR)/
-
-clean:
-       $(MAKE) -C ../../ M=$(CURDIR) clean
-       @rm -f *~
-
-$(obj)/syscall_nrs.s:  $(src)/syscall_nrs.c
-       $(call if_changed_dep,cc_s_c)
-
-$(obj)/syscall_nrs.h:  $(obj)/syscall_nrs.s FORCE
-       $(call filechk,offsets,__SYSCALL_NRS_H__)
-
-clean-files += syscall_nrs.h
-
-FORCE:
-
-
-# Verify LLVM compiler tools are available and bpf target is supported by llc
-.PHONY: verify_cmds verify_target_bpf $(CLANG) $(LLC)
-
-verify_cmds: $(CLANG) $(LLC)
-       @for TOOL in $^ ; do \
-               if ! (which -- "$${TOOL}" > /dev/null 2>&1); then \
-                       echo "*** ERROR: Cannot find LLVM tool $${TOOL}" ;\
-                       exit 1; \
-               else true; fi; \
-       done
-
-verify_target_bpf: verify_cmds
-       @if ! (${LLC} -march=bpf -mattr=help > /dev/null 2>&1); then \
-               echo "*** ERROR: LLVM (${LLC}) does not support 'bpf' target" ;\
-               echo "   NOTICE: LLVM version >= 3.7.1 required" ;\
-               exit 2; \
-       else true; fi
-
-$(src)/*.c: verify_target_bpf
-
-# asm/sysreg.h - inline assembly used by it is incompatible with llvm.
-# But, there is no easy way to fix it, so just exclude it since it is
-# useless for BPF samples.
-$(obj)/%.o: $(src)/%.c
-       $(CLANG) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) -I$(obj) \
-               -D__KERNEL__ -D__ASM_SYSREG_H -Wno-unused-value -Wno-pointer-sign \
-               -Wno-compare-distinct-pointer-types \
-               -Wno-gnu-variable-sized-type-not-at-end \
-               -Wno-address-of-packed-member -Wno-tautological-compare \
-               -Wno-unknown-warning-option \
-               -O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=obj -o $@
diff --git a/samples/sockmap/sockmap_test.sh b/samples/sockmap/sockmap_test.sh
deleted file mode 100755 (executable)
index ace75f0..0000000
+++ /dev/null
@@ -1,488 +0,0 @@
-#Test a bunch of positive cases to verify basic functionality
-for prog in  "--txmsg_redir --txmsg_skb" "--txmsg_redir --txmsg_ingress" "--txmsg" "--txmsg_redir" "--txmsg_redir --txmsg_ingress" "--txmsg_drop"; do
-for t in "sendmsg" "sendpage"; do
-for r in 1 10 100; do
-       for i in 1 10 100; do
-               for l in 1 10 100; do
-                       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-                       echo $TEST
-                       $TEST
-                       sleep 2
-               done
-       done
-done
-done
-done
-
-#Test max iov
-t="sendmsg"
-r=1
-i=1024
-l=1
-prog="--txmsg"
-
-TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-echo $TEST
-$TEST
-sleep 2
-prog="--txmsg_redir"
-TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-echo $TEST
-$TEST
-
-# Test max iov with 1k send
-
-t="sendmsg"
-r=1
-i=1024
-l=1024
-prog="--txmsg"
-
-TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-echo $TEST
-$TEST
-sleep 2
-prog="--txmsg_redir"
-TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-echo $TEST
-$TEST
-sleep 2
-
-# Test apply with 1B
-r=1
-i=1024
-l=1024
-prog="--txmsg_apply 1"
-
-for t in "sendmsg" "sendpage"; do
-       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-       echo $TEST
-       $TEST
-       sleep 2
-done
-
-# Test apply with larger value than send
-r=1
-i=8
-l=1024
-prog="--txmsg_apply 2048"
-
-for t in "sendmsg" "sendpage"; do
-       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-       echo $TEST
-       $TEST
-       sleep 2
-done
-
-# Test apply with apply that never reaches limit
-r=1024
-i=1
-l=1
-prog="--txmsg_apply 2048"
-
-for t in "sendmsg" "sendpage"; do
-       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-       echo $TEST
-       $TEST
-       sleep 2
-done
-
-# Test apply and redirect with 1B
-r=1
-i=1024
-l=1024
-prog="--txmsg_redir --txmsg_apply 1"
-
-for t in "sendmsg" "sendpage"; do
-       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-       echo $TEST
-       $TEST
-       sleep 2
-done
-
-prog="--txmsg_redir --txmsg_apply 1 --txmsg_ingress"
-
-for t in "sendmsg" "sendpage"; do
-       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-       echo $TEST
-       $TEST
-       sleep 2
-done
-
-prog="--txmsg_redir --txmsg_apply 1 --txmsg_skb"
-
-for t in "sendmsg" "sendpage"; do
-       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-       echo $TEST
-       $TEST
-       sleep 2
-done
-
-
-# Test apply and redirect with larger value than send
-r=1
-i=8
-l=1024
-prog="--txmsg_redir --txmsg_apply 2048"
-
-for t in "sendmsg" "sendpage"; do
-       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-       echo $TEST
-       $TEST
-       sleep 2
-done
-
-prog="--txmsg_redir --txmsg_apply 2048 --txmsg_ingress"
-
-for t in "sendmsg" "sendpage"; do
-       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-       echo $TEST
-       $TEST
-       sleep 2
-done
-
-prog="--txmsg_redir --txmsg_apply 2048 --txmsg_skb"
-
-for t in "sendmsg" "sendpage"; do
-       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-       echo $TEST
-       $TEST
-       sleep 2
-done
-
-
-# Test apply and redirect with apply that never reaches limit
-r=1024
-i=1
-l=1
-prog="--txmsg_apply 2048"
-
-for t in "sendmsg" "sendpage"; do
-       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-       echo $TEST
-       $TEST
-       sleep 2
-done
-
-# Test cork with 1B not really useful but test it anyways
-r=1
-i=1024
-l=1024
-prog="--txmsg_cork 1"
-
-for t in "sendpage" "sendmsg"; do
-       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-       echo $TEST
-       $TEST
-       sleep 2
-done
-
-# Test cork with a more reasonable 100B
-r=1
-i=1000
-l=1000
-prog="--txmsg_cork 100"
-
-for t in "sendpage" "sendmsg"; do
-       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-       echo $TEST
-       $TEST
-       sleep 2
-done
-
-# Test cork with larger value than send
-r=1
-i=8
-l=1024
-prog="--txmsg_cork 2048"
-
-for t in "sendpage" "sendmsg"; do
-       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-       echo $TEST
-       $TEST
-       sleep 2
-done
-
-# Test cork with cork that never reaches limit
-r=1024
-i=1
-l=1
-prog="--txmsg_cork 2048"
-
-for t in "sendpage" "sendmsg"; do
-       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-       echo $TEST
-       $TEST
-       sleep 2
-done
-
-r=1
-i=1024
-l=1024
-prog="--txmsg_redir --txmsg_cork 1"
-
-for t in "sendpage" "sendmsg"; do
-       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-       echo $TEST
-       $TEST
-       sleep 2
-done
-
-# Test cork with a more reasonable 100B
-r=1
-i=1000
-l=1000
-prog="--txmsg_redir --txmsg_cork 100"
-
-for t in "sendpage" "sendmsg"; do
-       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-       echo $TEST
-       $TEST
-       sleep 2
-done
-
-# Test cork with larger value than send
-r=1
-i=8
-l=1024
-prog="--txmsg_redir --txmsg_cork 2048"
-
-for t in "sendpage" "sendmsg"; do
-       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-       echo $TEST
-       $TEST
-       sleep 2
-done
-
-# Test cork with cork that never reaches limit
-r=1024
-i=1
-l=1
-prog="--txmsg_cork 2048"
-
-for t in "sendpage" "sendmsg"; do
-       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-       echo $TEST
-       $TEST
-       sleep 2
-done
-
-
-# mix and match cork and apply not really useful but valid programs
-
-# Test apply < cork
-r=100
-i=1
-l=5
-prog="--txmsg_apply 10 --txmsg_cork 100"
-for t in "sendpage" "sendmsg"; do
-       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-       echo $TEST
-       $TEST
-       sleep 2
-done
-
-# Try again with larger sizes so we hit overflow case
-r=100
-i=1000
-l=2048
-prog="--txmsg_apply 4096 --txmsg_cork 8096"
-for t in "sendpage" "sendmsg"; do
-       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-       echo $TEST
-       $TEST
-       sleep 2
-done
-
-# Test apply > cork
-r=100
-i=1
-l=5
-prog="--txmsg_apply 100 --txmsg_cork 10"
-for t in "sendpage" "sendmsg"; do
-       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-       echo $TEST
-       $TEST
-       sleep 2
-done
-
-# Again with larger sizes so we hit overflow cases
-r=100
-i=1000
-l=2048
-prog="--txmsg_apply 8096 --txmsg_cork 4096"
-for t in "sendpage" "sendmsg"; do
-       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-       echo $TEST
-       $TEST
-       sleep 2
-done
-
-
-# Test apply = cork
-r=100
-i=1
-l=5
-prog="--txmsg_apply 10 --txmsg_cork 10"
-for t in "sendpage" "sendmsg"; do
-       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-       echo $TEST
-       $TEST
-       sleep 2
-done
-
-r=100
-i=1000
-l=2048
-prog="--txmsg_apply 4096 --txmsg_cork 4096"
-for t in "sendpage" "sendmsg"; do
-       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-       echo $TEST
-       $TEST
-       sleep 2
-done
-
-# Test apply < cork
-r=100
-i=1
-l=5
-prog="--txmsg_redir --txmsg_apply 10 --txmsg_cork 100"
-for t in "sendpage" "sendmsg"; do
-       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-       echo $TEST
-       $TEST
-       sleep 2
-done
-
-# Try again with larger sizes so we hit overflow case
-r=100
-i=1000
-l=2048
-prog="--txmsg_redir --txmsg_apply 4096 --txmsg_cork 8096"
-for t in "sendpage" "sendmsg"; do
-       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-       echo $TEST
-       $TEST
-       sleep 2
-done
-
-# Test apply > cork
-r=100
-i=1
-l=5
-prog="--txmsg_redir --txmsg_apply 100 --txmsg_cork 10"
-for t in "sendpage" "sendmsg"; do
-       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-       echo $TEST
-       $TEST
-       sleep 2
-done
-
-# Again with larger sizes so we hit overflow cases
-r=100
-i=1000
-l=2048
-prog="--txmsg_redir --txmsg_apply 8096 --txmsg_cork 4096"
-for t in "sendpage" "sendmsg"; do
-       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-       echo $TEST
-       $TEST
-       sleep 2
-done
-
-
-# Test apply = cork
-r=100
-i=1
-l=5
-prog="--txmsg_redir --txmsg_apply 10 --txmsg_cork 10"
-for t in "sendpage" "sendmsg"; do
-       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-       echo $TEST
-       $TEST
-       sleep 2
-done
-
-r=100
-i=1000
-l=2048
-prog="--txmsg_redir --txmsg_apply 4096 --txmsg_cork 4096"
-for t in "sendpage" "sendmsg"; do
-       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-       echo $TEST
-       $TEST
-       sleep 2
-done
-
-# Tests for bpf_msg_pull_data()
-for i in `seq 99 100 1600`; do
-       TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \
-               --txmsg --txmsg_start 0 --txmsg_end $i --txmsg_cork 1600"
-       echo $TEST
-       $TEST
-       sleep 2
-done
-
-for i in `seq 199 100 1600`; do
-       TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \
-               --txmsg --txmsg_start 100 --txmsg_end $i --txmsg_cork 1600"
-       echo $TEST
-       $TEST
-       sleep 2
-done
-
-TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \
-       --txmsg --txmsg_start 1500 --txmsg_end 1600 --txmsg_cork 1600"
-echo $TEST
-$TEST
-sleep 2
-
-TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \
-       --txmsg --txmsg_start 1111 --txmsg_end 1112 --txmsg_cork 1600"
-echo $TEST
-$TEST
-sleep 2
-
-TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \
-       --txmsg --txmsg_start 1111 --txmsg_end 0 --txmsg_cork 1600"
-echo $TEST
-$TEST
-sleep 2
-
-TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \
-       --txmsg --txmsg_start 0 --txmsg_end 1601 --txmsg_cork 1600"
-echo $TEST
-$TEST
-sleep 2
-
-TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \
-       --txmsg --txmsg_start 0 --txmsg_end 1601 --txmsg_cork 1602"
-echo $TEST
-$TEST
-sleep 2
-
-# Run through gamut again with start and end
-for prog in "--txmsg" "--txmsg_redir" "--txmsg_drop"; do
-for t in "sendmsg" "sendpage"; do
-for r in 1 10 100; do
-       for i in 1 10 100; do
-               for l in 1 10 100; do
-                       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog --txmsg_start 1 --txmsg_end 2"
-                       echo $TEST
-                       $TEST
-                       sleep 2
-               done
-       done
-done
-done
-done
-
-# Some specific tests to cover specific code paths
-./sockmap --cgroup /mnt/cgroup2/ -t sendpage \
-       -r 5 -i 1 -l 1 --txmsg_redir --txmsg_cork 5 --txmsg_apply 3
-./sockmap --cgroup /mnt/cgroup2/ -t sendmsg \
-       -r 5 -i 1 -l 1 --txmsg_redir --txmsg_cork 5 --txmsg_apply 3
-./sockmap --cgroup /mnt/cgroup2/ -t sendpage \
-       -r 5 -i 1 -l 1 --txmsg_redir --txmsg_cork 5 --txmsg_apply 5
-./sockmap --cgroup /mnt/cgroup2/ -t sendmsg \
-       -r 5 -i 1 -l 1 --txmsg_redir --txmsg_cork 5 --txmsg_apply 5
index b2a95af7df189509c5827bfe7af1d69918d5cfda..7f5c862461383c0d067f9ab017dd56b8cde390f4 100644 (file)
@@ -14,7 +14,7 @@ ifdef CONFIG_GCC_PLUGINS
   endif
 
   ifdef CONFIG_GCC_PLUGIN_SANCOV
-    ifeq ($(CFLAGS_KCOV),)
+    ifeq ($(strip $(CFLAGS_KCOV)),)
       # It is needed because of the gcc-plugin.sh and gcc version checks.
       gcc-plugin-$(CONFIG_GCC_PLUGIN_SANCOV)           += sancov_plugin.so
 
index 07d07409f16fd5dbcde755adf3cbd9f1b49532d3..5af34a2b0cd9a4c116f68fb73f39e6716f3cfcb3 100644 (file)
@@ -196,7 +196,7 @@ $(obj)/%.tab.c: $(src)/%.y FORCE
        $(call if_changed,bison)
 
 quiet_cmd_bison_h = YACC    $@
-      cmd_bison_h = bison -o/dev/null --defines=$@ -t -l $<
+      cmd_bison_h = $(YACC) -o/dev/null --defines=$@ -t -l $<
 
 $(obj)/%.tab.h: $(src)/%.y FORCE
        $(call if_changed,bison_h)
diff --git a/scripts/bpf_helpers_doc.py b/scripts/bpf_helpers_doc.py
new file mode 100755 (executable)
index 0000000..8f59897
--- /dev/null
@@ -0,0 +1,421 @@
+#!/usr/bin/python3
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Copyright (C) 2018 Netronome Systems, Inc.
+
+# In case user attempts to run with Python 2.
+from __future__ import print_function
+
+import argparse
+import re
+import sys, os
+
+class NoHelperFound(BaseException):
+    pass
+
+class ParsingError(BaseException):
+    def __init__(self, line='<line not provided>', reader=None):
+        if reader:
+            BaseException.__init__(self,
+                                   'Error at file offset %d, parsing line: %s' %
+                                   (reader.tell(), line))
+        else:
+            BaseException.__init__(self, 'Error parsing line: %s' % line)
+
+class Helper(object):
+    """
+    An object representing the description of an eBPF helper function.
+    @proto: function prototype of the helper function
+    @desc: textual description of the helper function
+    @ret: description of the return value of the helper function
+    """
+    def __init__(self, proto='', desc='', ret=''):
+        self.proto = proto
+        self.desc = desc
+        self.ret = ret
+
+    def proto_break_down(self):
+        """
+        Break down helper function protocol into smaller chunks: return type,
+        name, distincts arguments.
+        """
+        arg_re = re.compile('((const )?(struct )?(\w+|...))( (\**)(\w+))?$')
+        res = {}
+        proto_re = re.compile('(.+) (\**)(\w+)\(((([^,]+)(, )?){1,5})\)$')
+
+        capture = proto_re.match(self.proto)
+        res['ret_type'] = capture.group(1)
+        res['ret_star'] = capture.group(2)
+        res['name']     = capture.group(3)
+        res['args'] = []
+
+        args    = capture.group(4).split(', ')
+        for a in args:
+            capture = arg_re.match(a)
+            res['args'].append({
+                'type' : capture.group(1),
+                'star' : capture.group(6),
+                'name' : capture.group(7)
+            })
+
+        return res
+
+class HeaderParser(object):
+    """
+    An object used to parse a file in order to extract the documentation of a
+    list of eBPF helper functions. All the helpers that can be retrieved are
+    stored as Helper object, in the self.helpers() array.
+    @filename: name of file to parse, usually include/uapi/linux/bpf.h in the
+               kernel tree
+    """
+    def __init__(self, filename):
+        self.reader = open(filename, 'r')
+        self.line = ''
+        self.helpers = []
+
+    def parse_helper(self):
+        proto    = self.parse_proto()
+        desc     = self.parse_desc()
+        ret      = self.parse_ret()
+        return Helper(proto=proto, desc=desc, ret=ret)
+
+    def parse_proto(self):
+        # Argument can be of shape:
+        #   - "void"
+        #   - "type  name"
+        #   - "type *name"
+        #   - Same as above, with "const" and/or "struct" in front of type
+        #   - "..." (undefined number of arguments, for bpf_trace_printk())
+        # There is at least one term ("void"), and at most five arguments.
+        p = re.compile(' \* ?((.+) \**\w+\((((const )?(struct )?(\w+|\.\.\.)( \**\w+)?)(, )?){1,5}\))$')
+        capture = p.match(self.line)
+        if not capture:
+            raise NoHelperFound
+        self.line = self.reader.readline()
+        return capture.group(1)
+
+    def parse_desc(self):
+        p = re.compile(' \* ?(?:\t| {6,8})Description$')
+        capture = p.match(self.line)
+        if not capture:
+            # Helper can have empty description and we might be parsing another
+            # attribute: return but do not consume.
+            return ''
+        # Description can be several lines, some of them possibly empty, and it
+        # stops when another subsection title is met.
+        desc = ''
+        while True:
+            self.line = self.reader.readline()
+            if self.line == ' *\n':
+                desc += '\n'
+            else:
+                p = re.compile(' \* ?(?:\t| {6,8})(?:\t| {8})(.*)')
+                capture = p.match(self.line)
+                if capture:
+                    desc += capture.group(1) + '\n'
+                else:
+                    break
+        return desc
+
+    def parse_ret(self):
+        p = re.compile(' \* ?(?:\t| {6,8})Return$')
+        capture = p.match(self.line)
+        if not capture:
+            # Helper can have empty retval and we might be parsing another
+            # attribute: return but do not consume.
+            return ''
+        # Return value description can be several lines, some of them possibly
+        # empty, and it stops when another subsection title is met.
+        ret = ''
+        while True:
+            self.line = self.reader.readline()
+            if self.line == ' *\n':
+                ret += '\n'
+            else:
+                p = re.compile(' \* ?(?:\t| {6,8})(?:\t| {8})(.*)')
+                capture = p.match(self.line)
+                if capture:
+                    ret += capture.group(1) + '\n'
+                else:
+                    break
+        return ret
+
+    def run(self):
+        # Advance to start of helper function descriptions.
+        offset = self.reader.read().find('* Start of BPF helper function descriptions:')
+        if offset == -1:
+            raise Exception('Could not find start of eBPF helper descriptions list')
+        self.reader.seek(offset)
+        self.reader.readline()
+        self.reader.readline()
+        self.line = self.reader.readline()
+
+        while True:
+            try:
+                helper = self.parse_helper()
+                self.helpers.append(helper)
+            except NoHelperFound:
+                break
+
+        self.reader.close()
+        print('Parsed description of %d helper function(s)' % len(self.helpers),
+              file=sys.stderr)
+
+###############################################################################
+
+class Printer(object):
+    """
+    A generic class for printers. Printers should be created with an array of
+    Helper objects, and implement a way to print them in the desired fashion.
+    @helpers: array of Helper objects to print to standard output
+    """
+    def __init__(self, helpers):
+        self.helpers = helpers
+
+    def print_header(self):
+        pass
+
+    def print_footer(self):
+        pass
+
+    def print_one(self, helper):
+        pass
+
+    def print_all(self):
+        self.print_header()
+        for helper in self.helpers:
+            self.print_one(helper)
+        self.print_footer()
+
+class PrinterRST(Printer):
+    """
+    A printer for dumping collected information about helpers as a ReStructured
+    Text page compatible with the rst2man program, which can be used to
+    generate a manual page for the helpers.
+    @helpers: array of Helper objects to print to standard output
+    """
+    def print_header(self):
+        header = '''\
+.. Copyright (C) All BPF authors and contributors from 2014 to present.
+.. See git log include/uapi/linux/bpf.h in kernel tree for details.
+.. 
+.. %%%LICENSE_START(VERBATIM)
+.. Permission is granted to make and distribute verbatim copies of this
+.. manual provided the copyright notice and this permission notice are
+.. preserved on all copies.
+.. 
+.. Permission is granted to copy and distribute modified versions of this
+.. manual under the conditions for verbatim copying, provided that the
+.. entire resulting derived work is distributed under the terms of a
+.. permission notice identical to this one.
+.. 
+.. Since the Linux kernel and libraries are constantly changing, this
+.. manual page may be incorrect or out-of-date.  The author(s) assume no
+.. responsibility for errors or omissions, or for damages resulting from
+.. the use of the information contained herein.  The author(s) may not
+.. have taken the same level of care in the production of this manual,
+.. which is licensed free of charge, as they might when working
+.. professionally.
+.. 
+.. Formatted or processed versions of this manual, if unaccompanied by
+.. the source, must acknowledge the copyright and authors of this work.
+.. %%%LICENSE_END
+.. 
+.. Please do not edit this file. It was generated from the documentation
+.. located in file include/uapi/linux/bpf.h of the Linux kernel sources
+.. (helpers description), and from scripts/bpf_helpers_doc.py in the same
+.. repository (header and footer).
+
+===========
+BPF-HELPERS
+===========
+-------------------------------------------------------------------------------
+list of eBPF helper functions
+-------------------------------------------------------------------------------
+
+:Manual section: 7
+
+DESCRIPTION
+===========
+
+The extended Berkeley Packet Filter (eBPF) subsystem consists in programs
+written in a pseudo-assembly language, then attached to one of the several
+kernel hooks and run in reaction of specific events. This framework differs
+from the older, "classic" BPF (or "cBPF") in several aspects, one of them being
+the ability to call special functions (or "helpers") from within a program.
+These functions are restricted to a white-list of helpers defined in the
+kernel.
+
+These helpers are used by eBPF programs to interact with the system, or with
+the context in which they work. For instance, they can be used to print
+debugging messages, to get the time since the system was booted, to interact
+with eBPF maps, or to manipulate network packets. Since there are several eBPF
+program types, and that they do not run in the same context, each program type
+can only call a subset of those helpers.
+
+Due to eBPF conventions, a helper can not have more than five arguments.
+
+Internally, eBPF programs call directly into the compiled helper functions
+without requiring any foreign-function interface. As a result, calling helpers
+introduces no overhead, thus offering excellent performance.
+
+This document is an attempt to list and document the helpers available to eBPF
+developers. They are sorted by chronological order (the oldest helpers in the
+kernel at the top).
+
+HELPERS
+=======
+'''
+        print(header)
+
+    def print_footer(self):
+        footer = '''
+EXAMPLES
+========
+
+Example usage for most of the eBPF helpers listed in this manual page are
+available within the Linux kernel sources, at the following locations:
+
+* *samples/bpf/*
+* *tools/testing/selftests/bpf/*
+
+LICENSE
+=======
+
+eBPF programs can have an associated license, passed along with the bytecode
+instructions to the kernel when the programs are loaded. The format for that
+string is identical to the one in use for kernel modules (Dual licenses, such
+as "Dual BSD/GPL", may be used). Some helper functions are only accessible to
+programs that are compatible with the GNU Privacy License (GPL).
+
+In order to use such helpers, the eBPF program must be loaded with the correct
+license string passed (via **attr**) to the **bpf**\ () system call, and this
+generally translates into the C source code of the program containing a line
+similar to the following:
+
+::
+
+       char ____license[] __attribute__((section("license"), used)) = "GPL";
+
+IMPLEMENTATION
+==============
+
+This manual page is an effort to document the existing eBPF helper functions.
+But as of this writing, the BPF sub-system is under heavy development. New eBPF
+program or map types are added, along with new helper functions. Some helpers
+are occasionally made available for additional program types. So in spite of
+the efforts of the community, this page might not be up-to-date. If you want to
+check by yourself what helper functions exist in your kernel, or what types of
+programs they can support, here are some files among the kernel tree that you
+may be interested in:
+
+* *include/uapi/linux/bpf.h* is the main BPF header. It contains the full list
+  of all helper functions, as well as many other BPF definitions including most
+  of the flags, structs or constants used by the helpers.
+* *net/core/filter.c* contains the definition of most network-related helper
+  functions, and the list of program types from which they can be used.
+* *kernel/trace/bpf_trace.c* is the equivalent for most tracing program-related
+  helpers.
+* *kernel/bpf/verifier.c* contains the functions used to check that valid types
+  of eBPF maps are used with a given helper function.
+* *kernel/bpf/* directory contains other files in which additional helpers are
+  defined (for cgroups, sockmaps, etc.).
+
+Compatibility between helper functions and program types can generally be found
+in the files where helper functions are defined. Look for the **struct
+bpf_func_proto** objects and for functions returning them: these functions
+contain a list of helpers that a given program type can call. Note that the
+**default:** label of the **switch ... case** used to filter helpers can call
+other functions, themselves allowing access to additional helpers. The
+requirement for GPL license is also in those **struct bpf_func_proto**.
+
+Compatibility between helper functions and map types can be found in the
+**check_map_func_compatibility**\ () function in file *kernel/bpf/verifier.c*.
+
+Helper functions that invalidate the checks on **data** and **data_end**
+pointers for network processing are listed in function
+**bpf_helper_changes_pkt_data**\ () in file *net/core/filter.c*.
+
+SEE ALSO
+========
+
+**bpf**\ (2),
+**cgroups**\ (7),
+**ip**\ (8),
+**perf_event_open**\ (2),
+**sendmsg**\ (2),
+**socket**\ (7),
+**tc-bpf**\ (8)'''
+        print(footer)
+
+    def print_proto(self, helper):
+        """
+        Format function protocol with bold and italics markers. This makes RST
+        file less readable, but gives nice results in the manual page.
+        """
+        proto = helper.proto_break_down()
+
+        print('**%s %s%s(' % (proto['ret_type'],
+                              proto['ret_star'].replace('*', '\\*'),
+                              proto['name']),
+              end='')
+
+        comma = ''
+        for a in proto['args']:
+            one_arg = '{}{}'.format(comma, a['type'])
+            if a['name']:
+                if a['star']:
+                    one_arg += ' {}**\ '.format(a['star'].replace('*', '\\*'))
+                else:
+                    one_arg += '** '
+                one_arg += '*{}*\\ **'.format(a['name'])
+            comma = ', '
+            print(one_arg, end='')
+
+        print(')**')
+
+    def print_one(self, helper):
+        self.print_proto(helper)
+
+        if (helper.desc):
+            print('\tDescription')
+            # Do not strip all newline characters: formatted code at the end of
+            # a section must be followed by a blank line.
+            for line in re.sub('\n$', '', helper.desc, count=1).split('\n'):
+                print('{}{}'.format('\t\t' if line else '', line))
+
+        if (helper.ret):
+            print('\tReturn')
+            for line in helper.ret.rstrip().split('\n'):
+                print('{}{}'.format('\t\t' if line else '', line))
+
+        print('')
+
+###############################################################################
+
+# If script is launched from scripts/ from kernel tree and can access
+# ../include/uapi/linux/bpf.h, use it as a default name for the file to parse,
+# otherwise the --filename argument will be required from the command line.
+script = os.path.abspath(sys.argv[0])
+linuxRoot = os.path.dirname(os.path.dirname(script))
+bpfh = os.path.join(linuxRoot, 'include/uapi/linux/bpf.h')
+
+argParser = argparse.ArgumentParser(description="""
+Parse eBPF header file and generate documentation for eBPF helper functions.
+The RST-formatted output produced can be turned into a manual page with the
+rst2man utility.
+""")
+if (os.path.isfile(bpfh)):
+    argParser.add_argument('--filename', help='path to include/uapi/linux/bpf.h',
+                           default=bpfh)
+else:
+    argParser.add_argument('--filename', help='path to include/uapi/linux/bpf.h')
+args = argParser.parse_args()
+
+# Parse file.
+headerParser = HeaderParser(args.filename)
+headerParser.run()
+
+# Print formatted output to standard output.
+printer = PrinterRST(headerParser.helpers)
+printer.print_all()
index c07ba4da9e36119bfb5c8732f5b2178a46b09997..815eaf140ab5c0724a27df752d92a49402022e0b 100644 (file)
@@ -787,10 +787,9 @@ static void check_pci_bridge(struct check *c, struct dt_info *dti, struct node *
                FAIL(c, dti, node, "incorrect #size-cells for PCI bridge");
 
        prop = get_property(node, "bus-range");
-       if (!prop) {
-               FAIL(c, dti, node, "missing bus-range for PCI bridge");
+       if (!prop)
                return;
-       }
+
        if (prop->val.len != (sizeof(cell_t) * 2)) {
                FAIL_PROP(c, dti, node, prop, "value must be 2 cells");
                return;
index 61d9b256c6581b34f269a5a8dbf880ade359503f..a1c51b7e4baf986814c696c814da97557d776d08 100755 (executable)
@@ -1,6 +1,6 @@
 #!/usr/bin/env perl
 
-# Copyright (c) Mauro Carvalho Chehab <mchehab@infradead.org>
+# Copyright (c) Mauro Carvalho Chehab <mchehab@kernel.org>
 # Released under GPLv2
 #
 # In order to use, you need to:
index 9e5735a4d3a57bf910ae2441a00a083e865f4136..1876a741087cc531645f4dc5383a7a234de6f5bd 100755 (executable)
@@ -170,7 +170,10 @@ __faddr2line() {
                echo "$file_lines" | while read -r line
                do
                        echo $line
-                       eval $(echo $line | awk -F "[ :]" '{printf("n1=%d;n2=%d;f=%s",$NF-5, $NF+5, $(NF-1))}')
+                       n=$(echo $line | sed 's/.*:\([0-9]\+\).*/\1/g')
+                       n1=$[$n-5]
+                       n2=$[$n+5]
+                       f=$(echo $line | sed 's/.*at \(.\+\):.*/\1/g')
                        awk 'NR>=strtonum("'$n1'") && NR<=strtonum("'$n2'") {printf("%d\t%s\n", NR, $0)}' $f
                done
 
index ef0287e429578bd897b25dd68f121ac4f1e96e78..03b7ce97de146c8e694e3fc3c49660498372f0ce 100644 (file)
@@ -14,14 +14,14 @@ genksyms-objs       := genksyms.o parse.tab.o lex.lex.o
 # so that 'bison: not found' will be displayed if it is missing.
 ifeq ($(findstring 1,$(KBUILD_ENABLE_EXTRA_GCC_CHECKS)),)
 
-quiet_cmd_bison_no_warn = $(quet_cmd_bison)
+quiet_cmd_bison_no_warn = $(quiet_cmd_bison)
       cmd_bison_no_warn = $(YACC) --version >/dev/null; \
                          $(cmd_bison) 2>/dev/null
 
 $(obj)/parse.tab.c: $(src)/parse.y FORCE
        $(call if_changed,bison_no_warn)
 
-quiet_cmd_bison_h_no_warn = $(quet_cmd_bison_h)
+quiet_cmd_bison_h_no_warn = $(quiet_cmd_bison_h)
       cmd_bison_h_no_warn = $(YACC) --version >/dev/null; \
                            $(cmd_bison_h) 2>/dev/null
 
index 944418da9fe3369efd596e44fd53ffaffc0f80bd..0f6dcb4011a8566dae009d31a6e7614227994a0b 100644 (file)
@@ -330,14 +330,7 @@ static int parse_source_files(const char *objfile, struct md4_ctx *md)
                goto out;
        }
 
-       /* There will be a line like so:
-               deps_drivers/net/dummy.o := \
-                 drivers/net/dummy.c \
-                   $(wildcard include/config/net/fastroute.h) \
-                 include/linux/module.h \
-
-          Sum all files in the same dir or subdirs.
-       */
+       /* Sum all files in the same dir or subdirs. */
        while ((line = get_next_line(&pos, file, flen)) != NULL) {
                char* p = line;
 
index bfe16cbe42df0ea20da1a0d3d2ec5b30dc4f8092..c3db607ee9ec19909b8b1c78328795a1ce23f12c 100755 (executable)
@@ -1,7 +1,7 @@
 #!/usr/bin/perl
 # SPDX-License-Identifier: GPL-2.0
 #
-# Author: Mauro Carvalho Chehab <mchehab@s-opensource.com>
+# Author: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
 #
 # Produce manpages from kernel-doc.
 # See Documentation/doc-guide/kernel-doc.rst for instructions
index 48620c93d6976eca3a9b1cc3c34cfe21a69c7a39..1ce701fcb3f3b5e2165eea48329d1f10718da4e7 100644 (file)
@@ -449,6 +449,8 @@ int cap_inode_getsecurity(struct inode *inode, const char *name, void **buffer,
                                magic |= VFS_CAP_FLAGS_EFFECTIVE;
                        memcpy(&cap->data, &nscap->data, sizeof(__le32) * 2 * VFS_CAP_U32);
                        cap->magic_etc = cpu_to_le32(magic);
+               } else {
+                       size = -ENOMEM;
                }
        }
        kfree(tmpbuf);
index 4cafe6a19167613cb64b29ac59c895e91285b390..6bd9358e5e62f917de456a4c229b077f4c999ae8 100644 (file)
@@ -1471,7 +1471,9 @@ static inline u16 socket_type_to_security_class(int family, int type, int protoc
                        return SECCLASS_QIPCRTR_SOCKET;
                case PF_SMC:
                        return SECCLASS_SMC_SOCKET;
-#if PF_MAX > 44
+               case PF_XDP:
+                       return SECCLASS_XDP_SOCKET;
+#if PF_MAX > 45
 #error New address family defined, please update this function.
 #endif
                }
@@ -4576,6 +4578,7 @@ static int selinux_socket_post_create(struct socket *sock, int family,
 static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, int addrlen)
 {
        struct sock *sk = sock->sk;
+       struct sk_security_struct *sksec = sk->sk_security;
        u16 family;
        int err;
 
@@ -4587,11 +4590,11 @@ static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, in
        family = sk->sk_family;
        if (family == PF_INET || family == PF_INET6) {
                char *addrp;
-               struct sk_security_struct *sksec = sk->sk_security;
                struct common_audit_data ad;
                struct lsm_network_audit net = {0,};
                struct sockaddr_in *addr4 = NULL;
                struct sockaddr_in6 *addr6 = NULL;
+               u16 family_sa = address->sa_family;
                unsigned short snum;
                u32 sid, node_perm;
 
@@ -4601,11 +4604,20 @@ static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, in
                 * need to check address->sa_family as it is possible to have
                 * sk->sk_family = PF_INET6 with addr->sa_family = AF_INET.
                 */
-               switch (address->sa_family) {
+               switch (family_sa) {
+               case AF_UNSPEC:
                case AF_INET:
                        if (addrlen < sizeof(struct sockaddr_in))
                                return -EINVAL;
                        addr4 = (struct sockaddr_in *)address;
+                       if (family_sa == AF_UNSPEC) {
+                               /* see __inet_bind(), we only want to allow
+                                * AF_UNSPEC if the address is INADDR_ANY
+                                */
+                               if (addr4->sin_addr.s_addr != htonl(INADDR_ANY))
+                                       goto err_af;
+                               family_sa = AF_INET;
+                       }
                        snum = ntohs(addr4->sin_port);
                        addrp = (char *)&addr4->sin_addr.s_addr;
                        break;
@@ -4617,15 +4629,14 @@ static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, in
                        addrp = (char *)&addr6->sin6_addr.s6_addr;
                        break;
                default:
-                       /* Note that SCTP services expect -EINVAL, whereas
-                        * others expect -EAFNOSUPPORT.
-                        */
-                       if (sksec->sclass == SECCLASS_SCTP_SOCKET)
-                               return -EINVAL;
-                       else
-                               return -EAFNOSUPPORT;
+                       goto err_af;
                }
 
+               ad.type = LSM_AUDIT_DATA_NET;
+               ad.u.net = &net;
+               ad.u.net->sport = htons(snum);
+               ad.u.net->family = family_sa;
+
                if (snum) {
                        int low, high;
 
@@ -4637,10 +4648,6 @@ static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, in
                                                      snum, &sid);
                                if (err)
                                        goto out;
-                               ad.type = LSM_AUDIT_DATA_NET;
-                               ad.u.net = &net;
-                               ad.u.net->sport = htons(snum);
-                               ad.u.net->family = family;
                                err = avc_has_perm(&selinux_state,
                                                   sksec->sid, sid,
                                                   sksec->sclass,
@@ -4672,16 +4679,11 @@ static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, in
                        break;
                }
 
-               err = sel_netnode_sid(addrp, family, &sid);
+               err = sel_netnode_sid(addrp, family_sa, &sid);
                if (err)
                        goto out;
 
-               ad.type = LSM_AUDIT_DATA_NET;
-               ad.u.net = &net;
-               ad.u.net->sport = htons(snum);
-               ad.u.net->family = family;
-
-               if (address->sa_family == AF_INET)
+               if (family_sa == AF_INET)
                        ad.u.net->v4info.saddr = addr4->sin_addr.s_addr;
                else
                        ad.u.net->v6info.saddr = addr6->sin6_addr;
@@ -4694,6 +4696,11 @@ static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, in
        }
 out:
        return err;
+err_af:
+       /* Note that SCTP services expect -EINVAL, others -EAFNOSUPPORT. */
+       if (sksec->sclass == SECCLASS_SCTP_SOCKET)
+               return -EINVAL;
+       return -EAFNOSUPPORT;
 }
 
 /* This supports connect(2) and SCTP connect services such as sctp_connectx(3)
@@ -4771,7 +4778,7 @@ static int selinux_socket_connect_helper(struct socket *sock,
                ad.type = LSM_AUDIT_DATA_NET;
                ad.u.net = &net;
                ad.u.net->dport = htons(snum);
-               ad.u.net->family = sk->sk_family;
+               ad.u.net->family = address->sa_family;
                err = avc_has_perm(&selinux_state,
                                   sksec->sid, sid, sksec->sclass, perm, &ad);
                if (err)
@@ -5272,6 +5279,7 @@ static int selinux_sctp_bind_connect(struct sock *sk, int optname,
        while (walk_size < addrlen) {
                addr = addr_buf;
                switch (addr->sa_family) {
+               case AF_UNSPEC:
                case AF_INET:
                        len = sizeof(struct sockaddr_in);
                        break;
@@ -5279,7 +5287,7 @@ static int selinux_sctp_bind_connect(struct sock *sk, int optname,
                        len = sizeof(struct sockaddr_in6);
                        break;
                default:
-                       return -EAFNOSUPPORT;
+                       return -EINVAL;
                }
 
                err = -EINVAL;
index 7f0372426494827a57f598412c0228c79d9ba42d..bd5fe0d3204ae98b67b234a1459f01f01ddc1d78 100644 (file)
@@ -240,9 +240,11 @@ struct security_class_mapping secclass_map[] = {
          { "manage_subnet", NULL } },
        { "bpf",
          {"map_create", "map_read", "map_write", "prog_load", "prog_run"} },
+       { "xdp_socket",
+         { COMMON_SOCK_PERMS, NULL } },
        { NULL }
   };
 
-#if PF_MAX > 44
+#if PF_MAX > 45
 #error New address family defined, please update secclass_map.
 #endif
index 69734b0eafd0d6941cc1a232d219e5d9705a0c42..9aa15bfc79369aebf6eaa0ef8dd32acecd7a1ec5 100644 (file)
@@ -1492,7 +1492,7 @@ static int snd_ctl_tlv_ioctl(struct snd_ctl_file *file,
                              int op_flag)
 {
        struct snd_ctl_tlv header;
-       unsigned int *container;
+       unsigned int __user *container;
        unsigned int container_size;
        struct snd_kcontrol *kctl;
        struct snd_ctl_elem_id id;
index a848836a5de0468534d5eecfd24adf4bc743f9f2..507fd5210c1cd54d764e718ab00c7c657857ad6c 100644 (file)
@@ -396,8 +396,7 @@ static int snd_ctl_elem_add_compat(struct snd_ctl_file *file,
        if (copy_from_user(&data->id, &data32->id, sizeof(data->id)) ||
            copy_from_user(&data->type, &data32->type, 3 * sizeof(u32)))
                goto error;
-       if (get_user(data->owner, &data32->owner) ||
-           get_user(data->type, &data32->type))
+       if (get_user(data->owner, &data32->owner))
                goto error;
        switch (data->type) {
        case SNDRV_CTL_ELEM_TYPE_BOOLEAN:
index b719d0bd833ecb6d7560380db5eb3b78a9b040c4..6491afbb5fd5704a31a72b2847d4fef2f0001e2f 100644 (file)
@@ -27,10 +27,11 @@ static int snd_pcm_ioctl_delay_compat(struct snd_pcm_substream *substream,
                                      s32 __user *src)
 {
        snd_pcm_sframes_t delay;
+       int err;
 
-       delay = snd_pcm_delay(substream);
-       if (delay < 0)
-               return delay;
+       err = snd_pcm_delay(substream, &delay);
+       if (err)
+               return err;
        if (put_user(delay, src))
                return -EFAULT;
        return 0;
@@ -422,6 +423,8 @@ static int snd_pcm_ioctl_xfern_compat(struct snd_pcm_substream *substream,
                return -ENOTTY;
        if (substream->stream != dir)
                return -EINVAL;
+       if (substream->runtime->status->state == SNDRV_PCM_STATE_OPEN)
+               return -EBADFD;
 
        if ((ch = substream->runtime->channels) > 128)
                return -EINVAL;
index 35ffccea94c3eb6cfe7755b6c1da7547d018a9a0..0e875d5a9e8621ee378014176596207d201a2363 100644 (file)
@@ -2692,7 +2692,8 @@ static int snd_pcm_hwsync(struct snd_pcm_substream *substream)
        return err;
 }
                
-static snd_pcm_sframes_t snd_pcm_delay(struct snd_pcm_substream *substream)
+static int snd_pcm_delay(struct snd_pcm_substream *substream,
+                        snd_pcm_sframes_t *delay)
 {
        struct snd_pcm_runtime *runtime = substream->runtime;
        int err;
@@ -2708,7 +2709,9 @@ static snd_pcm_sframes_t snd_pcm_delay(struct snd_pcm_substream *substream)
                n += runtime->delay;
        }
        snd_pcm_stream_unlock_irq(substream);
-       return err < 0 ? err : n;
+       if (!err)
+               *delay = n;
+       return err;
 }
                
 static int snd_pcm_sync_ptr(struct snd_pcm_substream *substream,
@@ -2751,6 +2754,7 @@ static int snd_pcm_sync_ptr(struct snd_pcm_substream *substream,
        sync_ptr.s.status.hw_ptr = status->hw_ptr;
        sync_ptr.s.status.tstamp = status->tstamp;
        sync_ptr.s.status.suspended_state = status->suspended_state;
+       sync_ptr.s.status.audio_tstamp = status->audio_tstamp;
        snd_pcm_stream_unlock_irq(substream);
        if (copy_to_user(_sync_ptr, &sync_ptr, sizeof(sync_ptr)))
                return -EFAULT;
@@ -2916,11 +2920,13 @@ static int snd_pcm_common_ioctl(struct file *file,
                return snd_pcm_hwsync(substream);
        case SNDRV_PCM_IOCTL_DELAY:
        {
-               snd_pcm_sframes_t delay = snd_pcm_delay(substream);
+               snd_pcm_sframes_t delay;
                snd_pcm_sframes_t __user *res = arg;
+               int err;
 
-               if (delay < 0)
-                       return delay;
+               err = snd_pcm_delay(substream, &delay);
+               if (err)
+                       return err;
                if (put_user(delay, res))
                        return -EFAULT;
                return 0;
@@ -3008,13 +3014,7 @@ int snd_pcm_kernel_ioctl(struct snd_pcm_substream *substream,
        case SNDRV_PCM_IOCTL_DROP:
                return snd_pcm_drop(substream);
        case SNDRV_PCM_IOCTL_DELAY:
-       {
-               result = snd_pcm_delay(substream);
-               if (result < 0)
-                       return result;
-               *frames = result;
-               return 0;
-       }
+               return snd_pcm_delay(substream, frames);
        default:
                return -EINVAL;
        }
@@ -3234,7 +3234,7 @@ static __poll_t snd_pcm_capture_poll(struct file *file, poll_table * wait)
 /*
  * mmap status record
  */
-static int snd_pcm_mmap_status_fault(struct vm_fault *vmf)
+static vm_fault_t snd_pcm_mmap_status_fault(struct vm_fault *vmf)
 {
        struct snd_pcm_substream *substream = vmf->vma->vm_private_data;
        struct snd_pcm_runtime *runtime;
@@ -3270,7 +3270,7 @@ static int snd_pcm_mmap_status(struct snd_pcm_substream *substream, struct file
 /*
  * mmap control record
  */
-static int snd_pcm_mmap_control_fault(struct vm_fault *vmf)
+static vm_fault_t snd_pcm_mmap_control_fault(struct vm_fault *vmf)
 {
        struct snd_pcm_substream *substream = vmf->vma->vm_private_data;
        struct snd_pcm_runtime *runtime;
@@ -3359,7 +3359,7 @@ snd_pcm_default_page_ops(struct snd_pcm_substream *substream, unsigned long ofs)
 /*
  * fault callback for mmapping a RAM page
  */
-static int snd_pcm_mmap_data_fault(struct vm_fault *vmf)
+static vm_fault_t snd_pcm_mmap_data_fault(struct vm_fault *vmf)
 {
        struct snd_pcm_substream *substream = vmf->vma->vm_private_data;
        struct snd_pcm_runtime *runtime;
index f69764d7cdd7025d3935143883c162372e3c0c7c..e30e30ba6e3984804ec325457d76d8195d69245d 100644 (file)
@@ -36,8 +36,6 @@ static int snd_rawmidi_ioctl_params_compat(struct snd_rawmidi_file *rfile,
        struct snd_rawmidi_params params;
        unsigned int val;
 
-       if (rfile->output == NULL)
-               return -EINVAL;
        if (get_user(params.stream, &src->stream) ||
            get_user(params.buffer_size, &src->buffer_size) ||
            get_user(params.avail_min, &src->avail_min) ||
@@ -46,8 +44,12 @@ static int snd_rawmidi_ioctl_params_compat(struct snd_rawmidi_file *rfile,
        params.no_active_sensing = val;
        switch (params.stream) {
        case SNDRV_RAWMIDI_STREAM_OUTPUT:
+               if (!rfile->output)
+                       return -EINVAL;
                return snd_rawmidi_output_params(rfile->output, &params);
        case SNDRV_RAWMIDI_STREAM_INPUT:
+               if (!rfile->input)
+                       return -EINVAL;
                return snd_rawmidi_input_params(rfile->input, &params);
        }
        return -EINVAL;
@@ -67,16 +69,18 @@ static int snd_rawmidi_ioctl_status_compat(struct snd_rawmidi_file *rfile,
        int err;
        struct snd_rawmidi_status status;
 
-       if (rfile->output == NULL)
-               return -EINVAL;
        if (get_user(status.stream, &src->stream))
                return -EFAULT;
 
        switch (status.stream) {
        case SNDRV_RAWMIDI_STREAM_OUTPUT:
+               if (!rfile->output)
+                       return -EINVAL;
                err = snd_rawmidi_output_status(rfile->output, &status);
                break;
        case SNDRV_RAWMIDI_STREAM_INPUT:
+               if (!rfile->input)
+                       return -EINVAL;
                err = snd_rawmidi_input_status(rfile->input, &status);
                break;
        default:
@@ -112,16 +116,18 @@ static int snd_rawmidi_ioctl_status_x32(struct snd_rawmidi_file *rfile,
        int err;
        struct snd_rawmidi_status status;
 
-       if (rfile->output == NULL)
-               return -EINVAL;
        if (get_user(status.stream, &src->stream))
                return -EFAULT;
 
        switch (status.stream) {
        case SNDRV_RAWMIDI_STREAM_OUTPUT:
+               if (!rfile->output)
+                       return -EINVAL;
                err = snd_rawmidi_output_status(rfile->output, &status);
                break;
        case SNDRV_RAWMIDI_STREAM_INPUT:
+               if (!rfile->input)
+                       return -EINVAL;
                err = snd_rawmidi_input_status(rfile->input, &status);
                break;
        default:
index c3908862bc8b63932aaa8724a50d7a5ae19a8cf3..86ca584c27b28081663e8493e7b54b5d0009784d 100644 (file)
@@ -26,6 +26,7 @@
 #include <sound/seq_oss_legacy.h>
 #include "seq_oss_readq.h"
 #include "seq_oss_writeq.h"
+#include <linux/nospec.h>
 
 
 /*
@@ -287,10 +288,10 @@ note_on_event(struct seq_oss_devinfo *dp, int dev, int ch, int note, int vel, st
 {
        struct seq_oss_synthinfo *info;
 
-       if (!snd_seq_oss_synth_is_valid(dp, dev))
+       info = snd_seq_oss_synth_info(dp, dev);
+       if (!info)
                return -ENXIO;
 
-       info = &dp->synths[dev];
        switch (info->arg.event_passing) {
        case SNDRV_SEQ_OSS_PROCESS_EVENTS:
                if (! info->ch || ch < 0 || ch >= info->nr_voices) {
@@ -298,6 +299,7 @@ note_on_event(struct seq_oss_devinfo *dp, int dev, int ch, int note, int vel, st
                        return set_note_event(dp, dev, SNDRV_SEQ_EVENT_NOTEON, ch, note, vel, ev);
                }
 
+               ch = array_index_nospec(ch, info->nr_voices);
                if (note == 255 && info->ch[ch].note >= 0) {
                        /* volume control */
                        int type;
@@ -347,10 +349,10 @@ note_off_event(struct seq_oss_devinfo *dp, int dev, int ch, int note, int vel, s
 {
        struct seq_oss_synthinfo *info;
 
-       if (!snd_seq_oss_synth_is_valid(dp, dev))
+       info = snd_seq_oss_synth_info(dp, dev);
+       if (!info)
                return -ENXIO;
 
-       info = &dp->synths[dev];
        switch (info->arg.event_passing) {
        case SNDRV_SEQ_OSS_PROCESS_EVENTS:
                if (! info->ch || ch < 0 || ch >= info->nr_voices) {
@@ -358,6 +360,7 @@ note_off_event(struct seq_oss_devinfo *dp, int dev, int ch, int note, int vel, s
                        return set_note_event(dp, dev, SNDRV_SEQ_EVENT_NOTEON, ch, note, vel, ev);
                }
 
+               ch = array_index_nospec(ch, info->nr_voices);
                if (info->ch[ch].note >= 0) {
                        note = info->ch[ch].note;
                        info->ch[ch].vel = 0;
@@ -381,7 +384,7 @@ note_off_event(struct seq_oss_devinfo *dp, int dev, int ch, int note, int vel, s
 static int
 set_note_event(struct seq_oss_devinfo *dp, int dev, int type, int ch, int note, int vel, struct snd_seq_event *ev)
 {
-       if (! snd_seq_oss_synth_is_valid(dp, dev))
+       if (!snd_seq_oss_synth_info(dp, dev))
                return -ENXIO;
        
        ev->type = type;
@@ -399,7 +402,7 @@ set_note_event(struct seq_oss_devinfo *dp, int dev, int type, int ch, int note,
 static int
 set_control_event(struct seq_oss_devinfo *dp, int dev, int type, int ch, int param, int val, struct snd_seq_event *ev)
 {
-       if (! snd_seq_oss_synth_is_valid(dp, dev))
+       if (!snd_seq_oss_synth_info(dp, dev))
                return -ENXIO;
        
        ev->type = type;
index b30b2139e3f033fd71e59fbe10cfa1e25340adc6..9debd1b8fd2880fde1e0fe349a4745bdb443b477 100644 (file)
@@ -29,6 +29,7 @@
 #include "../seq_lock.h"
 #include <linux/init.h>
 #include <linux/slab.h>
+#include <linux/nospec.h>
 
 
 /*
@@ -315,6 +316,7 @@ get_mididev(struct seq_oss_devinfo *dp, int dev)
 {
        if (dev < 0 || dev >= dp->max_mididev)
                return NULL;
+       dev = array_index_nospec(dev, dp->max_mididev);
        return get_mdev(dev);
 }
 
index cd0e0ebbfdb1a1931b50f0fc9d70cdd08c17f740..278ebb9931225998dd07f0606eeabe289d71aff5 100644 (file)
@@ -26,6 +26,7 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/slab.h>
+#include <linux/nospec.h>
 
 /*
  * constants
@@ -339,17 +340,13 @@ snd_seq_oss_synth_cleanup(struct seq_oss_devinfo *dp)
        dp->max_synthdev = 0;
 }
 
-/*
- * check if the specified device is MIDI mapped device
- */
-static int
-is_midi_dev(struct seq_oss_devinfo *dp, int dev)
+static struct seq_oss_synthinfo *
+get_synthinfo_nospec(struct seq_oss_devinfo *dp, int dev)
 {
        if (dev < 0 || dev >= dp->max_synthdev)
-               return 0;
-       if (dp->synths[dev].is_midi)
-               return 1;
-       return 0;
+               return NULL;
+       dev = array_index_nospec(dev, SNDRV_SEQ_OSS_MAX_SYNTH_DEVS);
+       return &dp->synths[dev];
 }
 
 /*
@@ -359,14 +356,20 @@ static struct seq_oss_synth *
 get_synthdev(struct seq_oss_devinfo *dp, int dev)
 {
        struct seq_oss_synth *rec;
-       if (dev < 0 || dev >= dp->max_synthdev)
-               return NULL;
-       if (! dp->synths[dev].opened)
+       struct seq_oss_synthinfo *info = get_synthinfo_nospec(dp, dev);
+
+       if (!info)
                return NULL;
-       if (dp->synths[dev].is_midi)
-               return &midi_synth_dev;
-       if ((rec = get_sdev(dev)) == NULL)
+       if (!info->opened)
                return NULL;
+       if (info->is_midi) {
+               rec = &midi_synth_dev;
+               snd_use_lock_use(&rec->use_lock);
+       } else {
+               rec = get_sdev(dev);
+               if (!rec)
+                       return NULL;
+       }
        if (! rec->opened) {
                snd_use_lock_free(&rec->use_lock);
                return NULL;
@@ -402,10 +405,8 @@ snd_seq_oss_synth_reset(struct seq_oss_devinfo *dp, int dev)
        struct seq_oss_synth *rec;
        struct seq_oss_synthinfo *info;
 
-       if (snd_BUG_ON(dev < 0 || dev >= dp->max_synthdev))
-               return;
-       info = &dp->synths[dev];
-       if (! info->opened)
+       info = get_synthinfo_nospec(dp, dev);
+       if (!info || !info->opened)
                return;
        if (info->sysex)
                info->sysex->len = 0; /* reset sysex */
@@ -454,12 +455,14 @@ snd_seq_oss_synth_load_patch(struct seq_oss_devinfo *dp, int dev, int fmt,
                            const char __user *buf, int p, int c)
 {
        struct seq_oss_synth *rec;
+       struct seq_oss_synthinfo *info;
        int rc;
 
-       if (dev < 0 || dev >= dp->max_synthdev)
+       info = get_synthinfo_nospec(dp, dev);
+       if (!info)
                return -ENXIO;
 
-       if (is_midi_dev(dp, dev))
+       if (info->is_midi)
                return 0;
        if ((rec = get_synthdev(dp, dev)) == NULL)
                return -ENXIO;
@@ -467,24 +470,25 @@ snd_seq_oss_synth_load_patch(struct seq_oss_devinfo *dp, int dev, int fmt,
        if (rec->oper.load_patch == NULL)
                rc = -ENXIO;
        else
-               rc = rec->oper.load_patch(&dp->synths[dev].arg, fmt, buf, p, c);
+               rc = rec->oper.load_patch(&info->arg, fmt, buf, p, c);
        snd_use_lock_free(&rec->use_lock);
        return rc;
 }
 
 /*
- * check if the device is valid synth device
+ * check if the device is valid synth device and return the synth info
  */
-int
-snd_seq_oss_synth_is_valid(struct seq_oss_devinfo *dp, int dev)
+struct seq_oss_synthinfo *
+snd_seq_oss_synth_info(struct seq_oss_devinfo *dp, int dev)
 {
        struct seq_oss_synth *rec;
+
        rec = get_synthdev(dp, dev);
        if (rec) {
                snd_use_lock_free(&rec->use_lock);
-               return 1;
+               return get_synthinfo_nospec(dp, dev);
        }
-       return 0;
+       return NULL;
 }
 
 
@@ -499,16 +503,18 @@ snd_seq_oss_synth_sysex(struct seq_oss_devinfo *dp, int dev, unsigned char *buf,
        int i, send;
        unsigned char *dest;
        struct seq_oss_synth_sysex *sysex;
+       struct seq_oss_synthinfo *info;
 
-       if (! snd_seq_oss_synth_is_valid(dp, dev))
+       info = snd_seq_oss_synth_info(dp, dev);
+       if (!info)
                return -ENXIO;
 
-       sysex = dp->synths[dev].sysex;
+       sysex = info->sysex;
        if (sysex == NULL) {
                sysex = kzalloc(sizeof(*sysex), GFP_KERNEL);
                if (sysex == NULL)
                        return -ENOMEM;
-               dp->synths[dev].sysex = sysex;
+               info->sysex = sysex;
        }
 
        send = 0;
@@ -553,10 +559,12 @@ snd_seq_oss_synth_sysex(struct seq_oss_devinfo *dp, int dev, unsigned char *buf,
 int
 snd_seq_oss_synth_addr(struct seq_oss_devinfo *dp, int dev, struct snd_seq_event *ev)
 {
-       if (! snd_seq_oss_synth_is_valid(dp, dev))
+       struct seq_oss_synthinfo *info = snd_seq_oss_synth_info(dp, dev);
+
+       if (!info)
                return -EINVAL;
-       snd_seq_oss_fill_addr(dp, ev, dp->synths[dev].arg.addr.client,
-                             dp->synths[dev].arg.addr.port);
+       snd_seq_oss_fill_addr(dp, ev, info->arg.addr.client,
+                             info->arg.addr.port);
        return 0;
 }
 
@@ -568,16 +576,18 @@ int
 snd_seq_oss_synth_ioctl(struct seq_oss_devinfo *dp, int dev, unsigned int cmd, unsigned long addr)
 {
        struct seq_oss_synth *rec;
+       struct seq_oss_synthinfo *info;
        int rc;
 
-       if (is_midi_dev(dp, dev))
+       info = get_synthinfo_nospec(dp, dev);
+       if (!info || info->is_midi)
                return -ENXIO;
        if ((rec = get_synthdev(dp, dev)) == NULL)
                return -ENXIO;
        if (rec->oper.ioctl == NULL)
                rc = -ENXIO;
        else
-               rc = rec->oper.ioctl(&dp->synths[dev].arg, cmd, addr);
+               rc = rec->oper.ioctl(&info->arg, cmd, addr);
        snd_use_lock_free(&rec->use_lock);
        return rc;
 }
@@ -589,7 +599,10 @@ snd_seq_oss_synth_ioctl(struct seq_oss_devinfo *dp, int dev, unsigned int cmd, u
 int
 snd_seq_oss_synth_raw_event(struct seq_oss_devinfo *dp, int dev, unsigned char *data, struct snd_seq_event *ev)
 {
-       if (! snd_seq_oss_synth_is_valid(dp, dev) || is_midi_dev(dp, dev))
+       struct seq_oss_synthinfo *info;
+
+       info = snd_seq_oss_synth_info(dp, dev);
+       if (!info || info->is_midi)
                return -ENXIO;
        ev->type = SNDRV_SEQ_EVENT_OSS;
        memcpy(ev->data.raw8.d, data, 8);
index 74ac55f166b6517751d197c14a5aaf4b6805ca01..a63f9e22974dfb33ff0f309f0ab309625e0265ea 100644 (file)
@@ -37,7 +37,8 @@ void snd_seq_oss_synth_cleanup(struct seq_oss_devinfo *dp);
 void snd_seq_oss_synth_reset(struct seq_oss_devinfo *dp, int dev);
 int snd_seq_oss_synth_load_patch(struct seq_oss_devinfo *dp, int dev, int fmt,
                                 const char __user *buf, int p, int c);
-int snd_seq_oss_synth_is_valid(struct seq_oss_devinfo *dp, int dev);
+struct seq_oss_synthinfo *snd_seq_oss_synth_info(struct seq_oss_devinfo *dp,
+                                                int dev);
 int snd_seq_oss_synth_sysex(struct seq_oss_devinfo *dp, int dev, unsigned char *buf,
                            struct snd_seq_event *ev);
 int snd_seq_oss_synth_addr(struct seq_oss_devinfo *dp, int dev, struct snd_seq_event *ev);
index f48a4cd24ffce2d50639e177822bb96b31a93d02..289ae6bb81d9d1f86f24b5bedc5a49feb8b00c0c 100644 (file)
@@ -174,12 +174,12 @@ static void snd_virmidi_output_trigger(struct snd_rawmidi_substream *substream,
                        }
                        return;
                }
+               spin_lock_irqsave(&substream->runtime->lock, flags);
                if (vmidi->event.type != SNDRV_SEQ_EVENT_NONE) {
                        if (snd_seq_kernel_client_dispatch(vmidi->client, &vmidi->event, in_atomic(), 0) < 0)
-                               return;
+                               goto out;
                        vmidi->event.type = SNDRV_SEQ_EVENT_NONE;
                }
-               spin_lock_irqsave(&substream->runtime->lock, flags);
                while (1) {
                        count = __snd_rawmidi_transmit_peek(substream, buf, sizeof(buf));
                        if (count <= 0)
index 58e349fc893f3815904aa0d85244bc4e4d6e9232..eab7f594ebe7599d74aee47129cbb51047549286 100644 (file)
@@ -831,9 +831,11 @@ static int loopback_rate_shift_get(struct snd_kcontrol *kcontrol,
 {
        struct loopback *loopback = snd_kcontrol_chip(kcontrol);
        
+       mutex_lock(&loopback->cable_lock);
        ucontrol->value.integer.value[0] =
                loopback->setup[kcontrol->id.subdevice]
                               [kcontrol->id.device].rate_shift;
+       mutex_unlock(&loopback->cable_lock);
        return 0;
 }
 
@@ -865,9 +867,11 @@ static int loopback_notify_get(struct snd_kcontrol *kcontrol,
 {
        struct loopback *loopback = snd_kcontrol_chip(kcontrol);
        
+       mutex_lock(&loopback->cable_lock);
        ucontrol->value.integer.value[0] =
                loopback->setup[kcontrol->id.subdevice]
                               [kcontrol->id.device].notify;
+       mutex_unlock(&loopback->cable_lock);
        return 0;
 }
 
@@ -879,12 +883,14 @@ static int loopback_notify_put(struct snd_kcontrol *kcontrol,
        int change = 0;
 
        val = ucontrol->value.integer.value[0] ? 1 : 0;
+       mutex_lock(&loopback->cable_lock);
        if (val != loopback->setup[kcontrol->id.subdevice]
                                [kcontrol->id.device].notify) {
                loopback->setup[kcontrol->id.subdevice]
                        [kcontrol->id.device].notify = val;
                change = 1;
        }
+       mutex_unlock(&loopback->cable_lock);
        return change;
 }
 
@@ -892,15 +898,18 @@ static int loopback_active_get(struct snd_kcontrol *kcontrol,
                               struct snd_ctl_elem_value *ucontrol)
 {
        struct loopback *loopback = snd_kcontrol_chip(kcontrol);
-       struct loopback_cable *cable = loopback->cables
-                       [kcontrol->id.subdevice][kcontrol->id.device ^ 1];
+       struct loopback_cable *cable;
+
        unsigned int val = 0;
 
+       mutex_lock(&loopback->cable_lock);
+       cable = loopback->cables[kcontrol->id.subdevice][kcontrol->id.device ^ 1];
        if (cable != NULL) {
                unsigned int running = cable->running ^ cable->pause;
 
                val = (running & (1 << SNDRV_PCM_STREAM_PLAYBACK)) ? 1 : 0;
        }
+       mutex_unlock(&loopback->cable_lock);
        ucontrol->value.integer.value[0] = val;
        return 0;
 }
@@ -943,9 +952,11 @@ static int loopback_rate_get(struct snd_kcontrol *kcontrol,
 {
        struct loopback *loopback = snd_kcontrol_chip(kcontrol);
        
+       mutex_lock(&loopback->cable_lock);
        ucontrol->value.integer.value[0] =
                loopback->setup[kcontrol->id.subdevice]
                               [kcontrol->id.device].rate;
+       mutex_unlock(&loopback->cable_lock);
        return 0;
 }
 
@@ -965,9 +976,11 @@ static int loopback_channels_get(struct snd_kcontrol *kcontrol,
 {
        struct loopback *loopback = snd_kcontrol_chip(kcontrol);
        
+       mutex_lock(&loopback->cable_lock);
        ucontrol->value.integer.value[0] =
                loopback->setup[kcontrol->id.subdevice]
                               [kcontrol->id.device].channels;
+       mutex_unlock(&loopback->cable_lock);
        return 0;
 }
 
index ddcc1a325a618124b9206a9f0c0c9a23774938ed..42920a2433282befccd62aaa43bbec949fe22e6c 100644 (file)
@@ -21,6 +21,7 @@
 
 #include <linux/slab.h>
 #include <linux/export.h>
+#include <linux/nospec.h>
 #include <sound/opl3.h>
 #include <sound/asound_fm.h>
 
@@ -448,7 +449,7 @@ static int snd_opl3_set_voice(struct snd_opl3 * opl3, struct snd_dm_fm_voice * v
 {
        unsigned short reg_side;
        unsigned char op_offset;
-       unsigned char voice_offset;
+       unsigned char voice_offset, voice_op;
 
        unsigned short opl3_reg;
        unsigned char reg_val;
@@ -473,7 +474,9 @@ static int snd_opl3_set_voice(struct snd_opl3 * opl3, struct snd_dm_fm_voice * v
                voice_offset = voice->voice - MAX_OPL2_VOICES;
        }
        /* Get register offset of operator */
-       op_offset = snd_opl3_regmap[voice_offset][voice->op];
+       voice_offset = array_index_nospec(voice_offset, MAX_OPL2_VOICES);
+       voice_op = array_index_nospec(voice->op, 4);
+       op_offset = snd_opl3_regmap[voice_offset][voice_op];
 
        reg_val = 0x00;
        /* Set amplitude modulation (tremolo) effect */
index 4a1dc145327b19f8f5dac1123ae0a20264d93506..cb9acfe60f6a350951d3a464969dacb3797ec64a 100644 (file)
@@ -773,8 +773,6 @@ static void amdtp_stream_first_callback(struct fw_iso_context *context,
        u32 cycle;
        unsigned int packets;
 
-       s->max_payload_length = amdtp_stream_get_max_payload(s);
-
        /*
         * For in-stream, first packet has come.
         * For out-stream, prepared to transmit first packet
@@ -879,6 +877,9 @@ int amdtp_stream_start(struct amdtp_stream *s, int channel, int speed)
 
        amdtp_stream_update(s);
 
+       if (s->direction == AMDTP_IN_STREAM)
+               s->max_payload_length = amdtp_stream_get_max_payload(s);
+
        if (s->flags & CIP_NO_HEADER)
                s->tag = TAG_NO_CIP_HEADER;
        else
index 8573289c381ed7314c6e1a08d4b39191756941f9..928a255bfc351406058b1c2bd8a43d785f63ef97 100644 (file)
@@ -435,7 +435,7 @@ int snd_dice_stream_init_duplex(struct snd_dice *dice)
                err = init_stream(dice, AMDTP_IN_STREAM, i);
                if (err < 0) {
                        for (; i >= 0; i--)
-                               destroy_stream(dice, AMDTP_OUT_STREAM, i);
+                               destroy_stream(dice, AMDTP_IN_STREAM, i);
                        goto end;
                }
        }
index 4ddb4cdd054b860142da4356dc6abeef44b4c698..96bb01b6b7512df8aea3871e61275773d4d8543d 100644 (file)
@@ -14,7 +14,7 @@ MODULE_LICENSE("GPL v2");
 #define OUI_WEISS              0x001c6a
 #define OUI_LOUD               0x000ff2
 #define OUI_FOCUSRITE          0x00130e
-#define OUI_TCELECTRONIC       0x001486
+#define OUI_TCELECTRONIC       0x000166
 
 #define DICE_CATEGORY_ID       0x04
 #define WEISS_CATEGORY_ID      0x00
index 7eb617175fdec656f0ae6a8ad8387c416f535b39..a31a70dccecff024f92ee8bbcc9ead5ef1cd4bed 100644 (file)
@@ -23,6 +23,7 @@
 
 #include "hpi_internal.h"
 #include "hpimsginit.h"
+#include <linux/nospec.h>
 
 /* The actual message size for each object type */
 static u16 msg_size[HPI_OBJ_MAXINDEX + 1] = HPI_MESSAGE_SIZE_BY_OBJECT;
@@ -39,10 +40,12 @@ static void hpi_init_message(struct hpi_message *phm, u16 object,
 {
        u16 size;
 
-       if ((object > 0) && (object <= HPI_OBJ_MAXINDEX))
+       if ((object > 0) && (object <= HPI_OBJ_MAXINDEX)) {
+               object = array_index_nospec(object, HPI_OBJ_MAXINDEX + 1);
                size = msg_size[object];
-       else
+       } else {
                size = sizeof(*phm);
+       }
 
        memset(phm, 0, size);
        phm->size = size;
@@ -66,10 +69,12 @@ void hpi_init_response(struct hpi_response *phr, u16 object, u16 function,
 {
        u16 size;
 
-       if ((object > 0) && (object <= HPI_OBJ_MAXINDEX))
+       if ((object > 0) && (object <= HPI_OBJ_MAXINDEX)) {
+               object = array_index_nospec(object, HPI_OBJ_MAXINDEX + 1);
                size = res_size[object];
-       else
+       } else {
                size = sizeof(*phr);
+       }
 
        memset(phr, 0, sizeof(*phr));
        phr->size = size;
index 5badd08e1d69cc12657410359255ab691eff62f0..b1a2a7ea4172331c07b50340c5d26ebe2f9e6bf0 100644 (file)
@@ -33,6 +33,7 @@
 #include <linux/stringify.h>
 #include <linux/module.h>
 #include <linux/vmalloc.h>
+#include <linux/nospec.h>
 
 #ifdef MODULE_FIRMWARE
 MODULE_FIRMWARE("asihpi/dsp5000.bin");
@@ -186,7 +187,8 @@ long asihpi_hpi_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
                struct hpi_adapter *pa = NULL;
 
                if (hm->h.adapter_index < ARRAY_SIZE(adapters))
-                       pa = &adapters[hm->h.adapter_index];
+                       pa = &adapters[array_index_nospec(hm->h.adapter_index,
+                                                         ARRAY_SIZE(adapters))];
 
                if (!pa || !pa->adapter || !pa->adapter->type) {
                        hpi_init_response(&hr->r0, hm->h.object,
index 57df06e76968ac4abd0f00b3005c416af256cb7b..cc009a4a3d1d206a8480a506894a59c0226c2e9a 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/compat.h>
+#include <linux/nospec.h>
 #include <sound/core.h>
 #include "hda_codec.h"
 #include "hda_local.h"
@@ -51,7 +52,16 @@ static int get_wcap_ioctl(struct hda_codec *codec,
        
        if (get_user(verb, &arg->verb))
                return -EFAULT;
-       res = get_wcaps(codec, verb >> 24);
+       /* open-code get_wcaps(verb>>24) with nospec */
+       verb >>= 24;
+       if (verb < codec->core.start_nid ||
+           verb >= codec->core.start_nid + codec->core.num_nodes) {
+               res = 0;
+       } else {
+               verb -= codec->core.start_nid;
+               verb = array_index_nospec(verb, codec->core.num_nodes);
+               res = codec->wcaps[verb];
+       }
        if (put_user(res, &arg->res))
                return -EFAULT;
        return 0;
index 7a111a1b58360d34062abf83d06503482929fa32..a0c93b9c9a283684ce1894c6d963c7917e4651d0 100644 (file)
@@ -1647,7 +1647,8 @@ static void azx_check_snoop_available(struct azx *chip)
                 */
                u8 val;
                pci_read_config_byte(chip->pci, 0x42, &val);
-               if (!(val & 0x80) && chip->pci->revision == 0x30)
+               if (!(val & 0x80) && (chip->pci->revision == 0x30 ||
+                                     chip->pci->revision == 0x20))
                        snoop = false;
        }
 
@@ -2209,6 +2210,8 @@ static struct snd_pci_quirk power_save_blacklist[] = {
        SND_PCI_QUIRK(0x1849, 0x0c0c, "Asrock B85M-ITX", 0),
        /* https://bugzilla.redhat.com/show_bug.cgi?id=1525104 */
        SND_PCI_QUIRK(0x1043, 0x8733, "Asus Prime X370-Pro", 0),
+       /* https://bugzilla.redhat.com/show_bug.cgi?id=1572975 */
+       SND_PCI_QUIRK(0x17aa, 0x36a7, "Lenovo C50 All in one", 0),
        /* https://bugzilla.kernel.org/show_bug.cgi?id=198611 */
        SND_PCI_QUIRK(0x17aa, 0x2227, "Lenovo X1 Carbon 3rd Gen", 0),
        {}
index b4f1b6e88305496f91d028ceb82fe9b8a6a60ccb..7d7eb1354eeec4dc48a734251e6cb9f5d12cd850 100644 (file)
@@ -1383,6 +1383,8 @@ static void hdmi_pcm_setup_pin(struct hdmi_spec *spec,
                pcm = get_pcm_rec(spec, per_pin->pcm_idx);
        else
                return;
+       if (!pcm->pcm)
+               return;
        if (!test_bit(per_pin->pcm_idx, &spec->pcm_in_use))
                return;
 
@@ -2151,8 +2153,13 @@ static int generic_hdmi_build_controls(struct hda_codec *codec)
        int dev, err;
        int pin_idx, pcm_idx;
 
-
        for (pcm_idx = 0; pcm_idx < spec->pcm_used; pcm_idx++) {
+               if (!get_pcm_rec(spec, pcm_idx)->pcm) {
+                       /* no PCM: mark this for skipping permanently */
+                       set_bit(pcm_idx, &spec->pcm_bitmap);
+                       continue;
+               }
+
                err = generic_hdmi_build_jack(codec, pcm_idx);
                if (err < 0)
                        return err;
index aef1f52db7d9e5264fdcdb382dac477edc098514..01a6643fc7d4727818f5335a04b91f2ab45e4f29 100644 (file)
@@ -331,6 +331,7 @@ static void alc_fill_eapd_coef(struct hda_codec *codec)
                /* fallthrough */
        case 0x10ec0215:
        case 0x10ec0233:
+       case 0x10ec0235:
        case 0x10ec0236:
        case 0x10ec0255:
        case 0x10ec0256:
@@ -2362,6 +2363,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = {
        SND_PCI_QUIRK_VENDOR(0x1462, "MSI", ALC882_FIXUP_GPIO3),
        SND_PCI_QUIRK(0x147b, 0x107a, "Abit AW9D-MAX", ALC882_FIXUP_ABIT_AW9D_MAX),
        SND_PCI_QUIRK(0x1558, 0x9501, "Clevo P950HR", ALC1220_FIXUP_CLEVO_P950),
+       SND_PCI_QUIRK(0x1558, 0x95e2, "Clevo P950ER", ALC1220_FIXUP_CLEVO_P950),
        SND_PCI_QUIRK_VENDOR(0x1558, "Clevo laptop", ALC882_FIXUP_EAPD),
        SND_PCI_QUIRK(0x161f, 0x2054, "Medion laptop", ALC883_FIXUP_EAPD),
        SND_PCI_QUIRK(0x17aa, 0x3a0d, "Lenovo Y530", ALC882_FIXUP_LENOVO_Y530),
@@ -3831,7 +3833,7 @@ static void alc280_fixup_hp_gpio4(struct hda_codec *codec,
        }
 }
 
-#if IS_REACHABLE(INPUT)
+#if IS_REACHABLE(CONFIG_INPUT)
 static void gpio2_mic_hotkey_event(struct hda_codec *codec,
                                   struct hda_jack_callback *event)
 {
@@ -6370,6 +6372,8 @@ static const struct hda_fixup alc269_fixups[] = {
                        { 0x19, 0x01a1913c }, /* use as headset mic, without its own jack detect */
                        { }
                },
+               .chained = true,
+               .chain_id = ALC269_FIXUP_HEADSET_MIC
        },
 };
 
@@ -6573,6 +6577,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x17aa, 0x30bb, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY),
        SND_PCI_QUIRK(0x17aa, 0x30e2, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY),
        SND_PCI_QUIRK(0x17aa, 0x310c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION),
+       SND_PCI_QUIRK(0x17aa, 0x312f, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION),
+       SND_PCI_QUIRK(0x17aa, 0x3138, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION),
        SND_PCI_QUIRK(0x17aa, 0x313c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION),
        SND_PCI_QUIRK(0x17aa, 0x3112, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY),
        SND_PCI_QUIRK(0x17aa, 0x3902, "Lenovo E50-80", ALC269_FIXUP_DMIC_THINKPAD_ACPI),
@@ -7157,8 +7163,11 @@ static int patch_alc269(struct hda_codec *codec)
        case 0x10ec0298:
                spec->codec_variant = ALC269_TYPE_ALC298;
                break;
+       case 0x10ec0235:
        case 0x10ec0255:
                spec->codec_variant = ALC269_TYPE_ALC255;
+               spec->shutup = alc256_shutup;
+               spec->init_hook = alc256_init;
                break;
        case 0x10ec0236:
        case 0x10ec0256:
index 4c59983158e0ed8d37bcad8ae3735129a1bd1ef8..11b5b5e0e0580fd3a3d33cf99dd71728db1bdd3a 100644 (file)
 #include <linux/pci.h>
 #include <linux/math64.h>
 #include <linux/io.h>
+#include <linux/nospec.h>
 
 #include <sound/core.h>
 #include <sound/control.h>
@@ -5698,40 +5699,43 @@ static int snd_hdspm_channel_info(struct snd_pcm_substream *substream,
                struct snd_pcm_channel_info *info)
 {
        struct hdspm *hdspm = snd_pcm_substream_chip(substream);
+       unsigned int channel = info->channel;
 
        if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) {
-               if (snd_BUG_ON(info->channel >= hdspm->max_channels_out)) {
+               if (snd_BUG_ON(channel >= hdspm->max_channels_out)) {
                        dev_info(hdspm->card->dev,
                                 "snd_hdspm_channel_info: output channel out of range (%d)\n",
-                                info->channel);
+                                channel);
                        return -EINVAL;
                }
 
-               if (hdspm->channel_map_out[info->channel] < 0) {
+               channel = array_index_nospec(channel, hdspm->max_channels_out);
+               if (hdspm->channel_map_out[channel] < 0) {
                        dev_info(hdspm->card->dev,
                                 "snd_hdspm_channel_info: output channel %d mapped out\n",
-                                info->channel);
+                                channel);
                        return -EINVAL;
                }
 
-               info->offset = hdspm->channel_map_out[info->channel] *
+               info->offset = hdspm->channel_map_out[channel] *
                        HDSPM_CHANNEL_BUFFER_BYTES;
        } else {
-               if (snd_BUG_ON(info->channel >= hdspm->max_channels_in)) {
+               if (snd_BUG_ON(channel >= hdspm->max_channels_in)) {
                        dev_info(hdspm->card->dev,
                                 "snd_hdspm_channel_info: input channel out of range (%d)\n",
-                                info->channel);
+                                channel);
                        return -EINVAL;
                }
 
-               if (hdspm->channel_map_in[info->channel] < 0) {
+               channel = array_index_nospec(channel, hdspm->max_channels_in);
+               if (hdspm->channel_map_in[channel] < 0) {
                        dev_info(hdspm->card->dev,
                                 "snd_hdspm_channel_info: input channel %d mapped out\n",
-                                info->channel);
+                                channel);
                        return -EINVAL;
                }
 
-               info->offset = hdspm->channel_map_in[info->channel] *
+               info->offset = hdspm->channel_map_in[channel] *
                        HDSPM_CHANNEL_BUFFER_BYTES;
        }
 
index df648b1d92177c30c2380b96de4e9ebd24fb07aa..edd765e2237707ea468455723d542eb74e97bf0f 100644 (file)
@@ -26,6 +26,7 @@
 #include <linux/pci.h>
 #include <linux/module.h>
 #include <linux/io.h>
+#include <linux/nospec.h>
 
 #include <sound/core.h>
 #include <sound/control.h>
@@ -2071,9 +2072,10 @@ static int snd_rme9652_channel_info(struct snd_pcm_substream *substream,
        if (snd_BUG_ON(info->channel >= RME9652_NCHANNELS))
                return -EINVAL;
 
-       if ((chn = rme9652->channel_map[info->channel]) < 0) {
+       chn = rme9652->channel_map[array_index_nospec(info->channel,
+                                                     RME9652_NCHANNELS)];
+       if (chn < 0)
                return -EINVAL;
-       }
 
        info->offset = chn * RME9652_CHANNEL_BUFFER_BYTES;
        info->first = 0;
index b205c782e494133d05fd27cb76509e7e690d5f85..f41560ecbcd18024f2f1354cd741115fcbd69739 100644 (file)
@@ -43,7 +43,7 @@
 #define DUAL_CHANNEL           2
 
 static struct snd_soc_jack cz_jack;
-struct clk *da7219_dai_clk;
+static struct clk *da7219_dai_clk;
 
 static int cz_da7219_init(struct snd_soc_pcm_runtime *rtd)
 {
index 80c2a06285bbe1f5df619542836bef3a15941c96..12bf24c26818a3ff046a426f56d0cebb356be9a2 100644 (file)
@@ -502,7 +502,7 @@ static int adau17x1_hw_params(struct snd_pcm_substream *substream,
        }
 
        if (adau->sigmadsp) {
-               ret = adau17x1_setup_firmware(adau, params_rate(params));
+               ret = adau17x1_setup_firmware(component, params_rate(params));
                if (ret < 0)
                        return ret;
        }
@@ -835,26 +835,40 @@ bool adau17x1_volatile_register(struct device *dev, unsigned int reg)
 }
 EXPORT_SYMBOL_GPL(adau17x1_volatile_register);
 
-int adau17x1_setup_firmware(struct adau *adau, unsigned int rate)
+int adau17x1_setup_firmware(struct snd_soc_component *component,
+       unsigned int rate)
 {
        int ret;
-       int dspsr;
+       int dspsr, dsp_run;
+       struct adau *adau = snd_soc_component_get_drvdata(component);
+       struct snd_soc_dapm_context *dapm = snd_soc_component_get_dapm(component);
+
+       snd_soc_dapm_mutex_lock(dapm);
 
        ret = regmap_read(adau->regmap, ADAU17X1_DSP_SAMPLING_RATE, &dspsr);
        if (ret)
-               return ret;
+               goto err;
+
+       ret = regmap_read(adau->regmap, ADAU17X1_DSP_RUN, &dsp_run);
+       if (ret)
+               goto err;
 
        regmap_write(adau->regmap, ADAU17X1_DSP_ENABLE, 1);
        regmap_write(adau->regmap, ADAU17X1_DSP_SAMPLING_RATE, 0xf);
+       regmap_write(adau->regmap, ADAU17X1_DSP_RUN, 0);
 
        ret = sigmadsp_setup(adau->sigmadsp, rate);
        if (ret) {
                regmap_write(adau->regmap, ADAU17X1_DSP_ENABLE, 0);
-               return ret;
+               goto err;
        }
        regmap_write(adau->regmap, ADAU17X1_DSP_SAMPLING_RATE, dspsr);
+       regmap_write(adau->regmap, ADAU17X1_DSP_RUN, dsp_run);
 
-       return 0;
+err:
+       snd_soc_dapm_mutex_unlock(dapm);
+
+       return ret;
 }
 EXPORT_SYMBOL_GPL(adau17x1_setup_firmware);
 
index a7b1cb770814624f51465664e2dd3e592a91f1df..e6fe87beec07701810be444eee2abd3280c51674 100644 (file)
@@ -68,7 +68,8 @@ int adau17x1_resume(struct snd_soc_component *component);
 
 extern const struct snd_soc_dai_ops adau17x1_dai_ops;
 
-int adau17x1_setup_firmware(struct adau *adau, unsigned int rate);
+int adau17x1_setup_firmware(struct snd_soc_component *component,
+       unsigned int rate);
 bool adau17x1_has_dsp(struct adau *adau);
 
 #define ADAU17X1_CLOCK_CONTROL                 0x4000
index 12ee83d52405d06a72c8cd5a67485c69bf2ddeb1..b7cf7cce95fecd526bd4d8699e6dee9f4119d4db 100644 (file)
@@ -1187,7 +1187,8 @@ static int pm8916_wcd_analog_spmi_probe(struct platform_device *pdev)
                return irq;
        }
 
-       ret = devm_request_irq(dev, irq, pm8916_mbhc_switch_irq_handler,
+       ret = devm_request_threaded_irq(dev, irq, NULL,
+                              pm8916_mbhc_switch_irq_handler,
                               IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING |
                               IRQF_ONESHOT,
                               "mbhc switch irq", priv);
@@ -1201,7 +1202,8 @@ static int pm8916_wcd_analog_spmi_probe(struct platform_device *pdev)
                        return irq;
                }
 
-               ret = devm_request_irq(dev, irq, mbhc_btn_press_irq_handler,
+               ret = devm_request_threaded_irq(dev, irq, NULL,
+                                      mbhc_btn_press_irq_handler,
                                       IRQF_TRIGGER_RISING |
                                       IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
                                       "mbhc btn press irq", priv);
@@ -1214,7 +1216,8 @@ static int pm8916_wcd_analog_spmi_probe(struct platform_device *pdev)
                        return irq;
                }
 
-               ret = devm_request_irq(dev, irq, mbhc_btn_release_irq_handler,
+               ret = devm_request_threaded_irq(dev, irq, NULL,
+                                      mbhc_btn_release_irq_handler,
                                       IRQF_TRIGGER_RISING |
                                       IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
                                       "mbhc btn release irq", priv);
index e8a66b03faabf405ec43f7170e0205fd4650eb29..1570b91bf018f9421c91a4c66631d819a800876c 100644 (file)
@@ -89,6 +89,7 @@ static const struct reg_default rt5514_reg[] = {
        {RT5514_PLL3_CALIB_CTRL5,       0x40220012},
        {RT5514_DELAY_BUF_CTRL1,        0x7fff006a},
        {RT5514_DELAY_BUF_CTRL3,        0x00000000},
+       {RT5514_ASRC_IN_CTRL1,          0x00000003},
        {RT5514_DOWNFILTER0_CTRL1,      0x00020c2f},
        {RT5514_DOWNFILTER0_CTRL2,      0x00020c2f},
        {RT5514_DOWNFILTER0_CTRL3,      0x10000362},
@@ -181,6 +182,7 @@ static bool rt5514_readable_register(struct device *dev, unsigned int reg)
        case RT5514_PLL3_CALIB_CTRL5:
        case RT5514_DELAY_BUF_CTRL1:
        case RT5514_DELAY_BUF_CTRL3:
+       case RT5514_ASRC_IN_CTRL1:
        case RT5514_DOWNFILTER0_CTRL1:
        case RT5514_DOWNFILTER0_CTRL2:
        case RT5514_DOWNFILTER0_CTRL3:
@@ -238,6 +240,7 @@ static bool rt5514_i2c_readable_register(struct device *dev,
        case RT5514_DSP_MAPPING | RT5514_PLL3_CALIB_CTRL5:
        case RT5514_DSP_MAPPING | RT5514_DELAY_BUF_CTRL1:
        case RT5514_DSP_MAPPING | RT5514_DELAY_BUF_CTRL3:
+       case RT5514_DSP_MAPPING | RT5514_ASRC_IN_CTRL1:
        case RT5514_DSP_MAPPING | RT5514_DOWNFILTER0_CTRL1:
        case RT5514_DSP_MAPPING | RT5514_DOWNFILTER0_CTRL2:
        case RT5514_DSP_MAPPING | RT5514_DOWNFILTER0_CTRL3:
index 40a700493f4c8f0d3dec67956ee8afb8b20c37f3..da8fd98c7f51c2a1ee3116a00d17d8820bb3582d 100644 (file)
@@ -144,6 +144,13 @@ static int fsl_esai_divisor_cal(struct snd_soc_dai *dai, bool tx, u32 ratio,
 
        psr = ratio <= 256 * maxfp ? ESAI_xCCR_xPSR_BYPASS : ESAI_xCCR_xPSR_DIV8;
 
+       /* Do not loop-search if PM (1 ~ 256) alone can serve the ratio */
+       if (ratio <= 256) {
+               pm = ratio;
+               fp = 1;
+               goto out;
+       }
+
        /* Set the max fluctuation -- 0.1% of the max devisor */
        savesub = (psr ? 1 : 8)  * 256 * maxfp / 1000;
 
index 0823b08923b5ef5d0860c14807af75f899eed0ed..89df2d9f63d7d45d219d7172bcb85e9926e8e549 100644 (file)
@@ -217,6 +217,7 @@ struct fsl_ssi_soc_data {
  * @dai_fmt: DAI configuration this device is currently used with
  * @streams: Mask of current active streams: BIT(TX) and BIT(RX)
  * @i2s_net: I2S and Network mode configurations of SCR register
+ *           (this is the initial settings based on the DAI format)
  * @synchronous: Use synchronous mode - both of TX and RX use STCK and SFCK
  * @use_dma: DMA is used or FIQ with stream filter
  * @use_dual_fifo: DMA with support for dual FIFO mode
@@ -829,16 +830,23 @@ static int fsl_ssi_hw_params(struct snd_pcm_substream *substream,
        }
 
        if (!fsl_ssi_is_ac97(ssi)) {
+               /*
+                * Keep the ssi->i2s_net intact while having a local variable
+                * to override settings for special use cases. Otherwise, the
+                * ssi->i2s_net will lose the settings for regular use cases.
+                */
+               u8 i2s_net = ssi->i2s_net;
+
                /* Normal + Network mode to send 16-bit data in 32-bit frames */
                if (fsl_ssi_is_i2s_cbm_cfs(ssi) && sample_size == 16)
-                       ssi->i2s_net = SSI_SCR_I2S_MODE_NORMAL | SSI_SCR_NET;
+                       i2s_net = SSI_SCR_I2S_MODE_NORMAL | SSI_SCR_NET;
 
                /* Use Normal mode to send mono data at 1st slot of 2 slots */
                if (channels == 1)
-                       ssi->i2s_net = SSI_SCR_I2S_MODE_NORMAL;
+                       i2s_net = SSI_SCR_I2S_MODE_NORMAL;
 
                regmap_update_bits(regs, REG_SSI_SCR,
-                                  SSI_SCR_I2S_NET_MASK, ssi->i2s_net);
+                                  SSI_SCR_I2S_NET_MASK, i2s_net);
        }
 
        /* In synchronous mode, the SSI uses STCCR for capture */
index ceb105cbd461582196bff4e59c225cf6083952bd..addac2a8e52a573d40e5a7a0ede942c2fa85f6ab 100644 (file)
@@ -72,24 +72,28 @@ config SND_SOC_INTEL_BAYTRAIL
          for Baytrail Chromebooks but this option is now deprecated and is
          not recommended, use SND_SST_ATOM_HIFI2_PLATFORM instead.
 
+config SND_SST_ATOM_HIFI2_PLATFORM
+       tristate
+       select SND_SOC_COMPRESS
+
 config SND_SST_ATOM_HIFI2_PLATFORM_PCI
-       tristate "PCI HiFi2 (Medfield, Merrifield) Platforms"
+       tristate "PCI HiFi2 (Merrifield) Platforms"
        depends on X86 && PCI
        select SND_SST_IPC_PCI
-       select SND_SOC_COMPRESS
+       select SND_SST_ATOM_HIFI2_PLATFORM
        help
-         If you have a Intel Medfield or Merrifield/Edison platform, then
+         If you have a Intel Merrifield/Edison platform, then
          enable this option by saying Y or m. Distros will typically not
-         enable this option: Medfield devices are not available to
-         developers and while Merrifield/Edison can run a mainline kernel with
-         limited functionality it will require a firmware file which
-         is not in the standard firmware tree
+         enable this option: while Merrifield/Edison can run a mainline
+         kernel with limited functionality it will require a firmware file
+         which is not in the standard firmware tree
 
-config SND_SST_ATOM_HIFI2_PLATFORM
+config SND_SST_ATOM_HIFI2_PLATFORM_ACPI
        tristate "ACPI HiFi2 (Baytrail, Cherrytrail) Platforms"
+       default ACPI
        depends on X86 && ACPI
        select SND_SST_IPC_ACPI
-       select SND_SOC_COMPRESS
+       select SND_SST_ATOM_HIFI2_PLATFORM
        select SND_SOC_ACPI_INTEL_MATCH
        select IOSF_MBI
        help
index 09db2aec12a3010525b872ec817626cb5365b311..b2f5d2fa354d1d888dddf829847883bc84bdcdd1 100644 (file)
@@ -281,7 +281,7 @@ static int omap_dmic_dai_trigger(struct snd_pcm_substream *substream,
 static int omap_dmic_select_fclk(struct omap_dmic *dmic, int clk_id,
                                 unsigned int freq)
 {
-       struct clk *parent_clk;
+       struct clk *parent_clk, *mux;
        char *parent_clk_name;
        int ret = 0;
 
@@ -329,14 +329,21 @@ static int omap_dmic_select_fclk(struct omap_dmic *dmic, int clk_id,
                return -ENODEV;
        }
 
+       mux = clk_get_parent(dmic->fclk);
+       if (IS_ERR(mux)) {
+               dev_err(dmic->dev, "can't get fck mux parent\n");
+               clk_put(parent_clk);
+               return -ENODEV;
+       }
+
        mutex_lock(&dmic->mutex);
        if (dmic->active) {
                /* disable clock while reparenting */
                pm_runtime_put_sync(dmic->dev);
-               ret = clk_set_parent(dmic->fclk, parent_clk);
+               ret = clk_set_parent(mux, parent_clk);
                pm_runtime_get_sync(dmic->dev);
        } else {
-               ret = clk_set_parent(dmic->fclk, parent_clk);
+               ret = clk_set_parent(mux, parent_clk);
        }
        mutex_unlock(&dmic->mutex);
 
@@ -349,6 +356,7 @@ static int omap_dmic_select_fclk(struct omap_dmic *dmic, int clk_id,
        dmic->fclk_freq = freq;
 
 err_busy:
+       clk_put(mux);
        clk_put(parent_clk);
 
        return ret;
index 6a76688a8ba953d5b8f373e6a382c8437ec7bfa6..94f081b93258f8948035cee1b816feef3ea79a68 100644 (file)
@@ -1536,7 +1536,7 @@ static int rsnd_remove(struct platform_device *pdev)
        return ret;
 }
 
-static int rsnd_suspend(struct device *dev)
+static int __maybe_unused rsnd_suspend(struct device *dev)
 {
        struct rsnd_priv *priv = dev_get_drvdata(dev);
 
@@ -1545,7 +1545,7 @@ static int rsnd_suspend(struct device *dev)
        return 0;
 }
 
-static int rsnd_resume(struct device *dev)
+static int __maybe_unused rsnd_resume(struct device *dev)
 {
        struct rsnd_priv *priv = dev_get_drvdata(dev);
 
index fa27d0fca6dce10ed8befc8df9cdf7406dbcac75..986b8b2f90fba577cce10462e75ad9966258b49f 100644 (file)
@@ -513,7 +513,7 @@ static void remove_widget(struct snd_soc_component *comp,
         */
        if (dobj->widget.kcontrol_type == SND_SOC_TPLG_TYPE_ENUM) {
                /* enumerated widget mixer */
-               for (i = 0; i < w->num_kcontrols; i++) {
+               for (i = 0; w->kcontrols != NULL && i < w->num_kcontrols; i++) {
                        struct snd_kcontrol *kcontrol = w->kcontrols[i];
                        struct soc_enum *se =
                                (struct soc_enum *)kcontrol->private_value;
@@ -530,7 +530,7 @@ static void remove_widget(struct snd_soc_component *comp,
                }
        } else {
                /* volume mixer or bytes controls */
-               for (i = 0; i < w->num_kcontrols; i++) {
+               for (i = 0; w->kcontrols != NULL && i < w->num_kcontrols; i++) {
                        struct snd_kcontrol *kcontrol = w->kcontrols[i];
 
                        if (dobj->widget.kcontrol_type
@@ -1325,8 +1325,10 @@ static struct snd_kcontrol_new *soc_tplg_dapm_widget_denum_create(
                        ec->hdr.name);
 
                kc[i].name = kstrdup(ec->hdr.name, GFP_KERNEL);
-               if (kc[i].name == NULL)
+               if (kc[i].name == NULL) {
+                       kfree(se);
                        goto err_se;
+               }
                kc[i].private_value = (long)se;
                kc[i].iface = SNDRV_CTL_ELEM_IFACE_MIXER;
                kc[i].access = ec->hdr.access;
@@ -1442,8 +1444,10 @@ static struct snd_kcontrol_new *soc_tplg_dapm_widget_dbytes_create(
                        be->hdr.name, be->hdr.access);
 
                kc[i].name = kstrdup(be->hdr.name, GFP_KERNEL);
-               if (kc[i].name == NULL)
+               if (kc[i].name == NULL) {
+                       kfree(sbe);
                        goto err;
+               }
                kc[i].private_value = (long)sbe;
                kc[i].iface = SNDRV_CTL_ELEM_IFACE_MIXER;
                kc[i].access = be->hdr.access;
@@ -2576,7 +2580,7 @@ int snd_soc_tplg_component_remove(struct snd_soc_component *comp, u32 index)
 
                        /* match index */
                        if (dobj->index != index &&
-                               dobj->index != SND_SOC_TPLG_INDEX_ALL)
+                               index != SND_SOC_TPLG_INDEX_ALL)
                                continue;
 
                        switch (dobj->type) {
index 6d7cde56a355ea57266bbd85c94de9b4a7cefea5..e2cf55c53ea8bce34d13e6773bf599e8ec630859 100644 (file)
@@ -125,7 +125,7 @@ static int send_midi_async(struct usb_line6 *line6, unsigned char *data,
        }
 
        usb_fill_int_urb(urb, line6->usbdev,
-                        usb_sndbulkpipe(line6->usbdev,
+                        usb_sndintpipe(line6->usbdev,
                                         line6->properties->ep_ctrl_w),
                         transfer_buffer, length, midi_sent, line6,
                         line6->interval);
index 301ad61ed4267f28476af340325ae5173a12d2ad..bb5ab7a7dfa58b0d21cb08acfeff2cca29ba7865 100644 (file)
@@ -967,6 +967,14 @@ static void volume_control_quirks(struct usb_mixer_elem_info *cval,
                }
                break;
 
+       case USB_ID(0x0d8c, 0x0103):
+               if (!strcmp(kctl->id.name, "PCM Playback Volume")) {
+                       usb_audio_info(chip,
+                                "set volume quirk for CM102-A+/102S+\n");
+                       cval->min = -256;
+               }
+               break;
+
        case USB_ID(0x0471, 0x0101):
        case USB_ID(0x0471, 0x0104):
        case USB_ID(0x0471, 0x0105):
@@ -1776,7 +1784,8 @@ static int parse_audio_feature_unit(struct mixer_build *state, int unitid,
                                build_feature_ctl(state, _ftr, ch_bits, control,
                                                  &iterm, unitid, ch_read_only);
                        if (uac_v2v3_control_is_readable(master_bits, control))
-                               build_feature_ctl(state, _ftr, 0, i, &iterm, unitid,
+                               build_feature_ctl(state, _ftr, 0, control,
+                                                 &iterm, unitid,
                                                  !uac_v2v3_control_is_writeable(master_bits,
                                                                                 control));
                }
@@ -1859,7 +1868,7 @@ static int parse_audio_input_terminal(struct mixer_build *state, int unitid,
        check_input_term(state, d->bTerminalID, &iterm);
        if (state->mixer->protocol == UAC_VERSION_2) {
                /* Check for jack detection. */
-               if (uac_v2v3_control_is_readable(d->bmControls,
+               if (uac_v2v3_control_is_readable(le16_to_cpu(d->bmControls),
                                                 UAC2_TE_CONNECTOR)) {
                        build_connector_control(state, &iterm, true);
                }
@@ -2561,7 +2570,7 @@ static int snd_usb_mixer_controls(struct usb_mixer_interface *mixer)
                        if (err < 0 && err != -EINVAL)
                                return err;
 
-                       if (uac_v2v3_control_is_readable(desc->bmControls,
+                       if (uac_v2v3_control_is_readable(le16_to_cpu(desc->bmControls),
                                                         UAC2_TE_CONNECTOR)) {
                                build_connector_control(&state, &state.oterm,
                                                        false);
index 9038b2e7df732d453993a180071a98ea3b2ff79f..eaa03acd4686bdd20e19b69502c23ca465977ade 100644 (file)
@@ -353,8 +353,11 @@ static struct usbmix_name_map bose_companion5_map[] = {
 /*
  * Dell usb dock with ALC4020 codec had a firmware problem where it got
  * screwed up when zero volume is passed; just skip it as a workaround
+ *
+ * Also the extension unit gives an access error, so skip it as well.
  */
 static const struct usbmix_name_map dell_alc4020_map[] = {
+       { 4, NULL },    /* extension unit */
        { 16, NULL },
        { 19, NULL },
        { 0 }
index 6a8f5843334e98558d09174e157c308644c736a5..5ed334575fc73cd7f184a9be450f555f190a4d61 100644 (file)
@@ -349,7 +349,7 @@ snd_pcm_chmap_elem *convert_chmap_v3(struct uac3_cluster_header_descriptor
                         * TODO: this conversion is not complete, update it
                         * after adding UAC3 values to asound.h
                         */
-                       switch (is->bChPurpose) {
+                       switch (is->bChRelationship) {
                        case UAC3_CH_MONO:
                                map = SNDRV_CHMAP_MONO;
                                break;
@@ -576,7 +576,7 @@ static int parse_uac_endpoint_attributes(struct snd_usb_audio *chip,
 
        if (protocol == UAC_VERSION_1) {
                attributes = csep->bmAttributes;
-       } else {
+       } else if (protocol == UAC_VERSION_2) {
                struct uac2_iso_endpoint_descriptor *csep2 =
                        (struct uac2_iso_endpoint_descriptor *) csep;
 
@@ -585,6 +585,13 @@ static int parse_uac_endpoint_attributes(struct snd_usb_audio *chip,
                /* emulate the endpoint attributes of a v1 device */
                if (csep2->bmControls & UAC2_CONTROL_PITCH)
                        attributes |= UAC_EP_CS_ATTR_PITCH_CONTROL;
+       } else { /* UAC_VERSION_3 */
+               struct uac3_iso_endpoint_descriptor *csep3 =
+                       (struct uac3_iso_endpoint_descriptor *) csep;
+
+               /* emulate the endpoint attributes of a v1 device */
+               if (le32_to_cpu(csep3->bmControls) & UAC2_CONTROL_PITCH)
+                       attributes |= UAC_EP_CS_ATTR_PITCH_CONTROL;
        }
 
        return attributes;
index ebcab5c5465d28dfff6b0fbebc5185b3e8f30001..8082f7b077f187453a38db038345eaa41d4c9a1e 100644 (file)
@@ -139,7 +139,7 @@ static void usb_stream_hwdep_vm_open(struct vm_area_struct *area)
        snd_printdd(KERN_DEBUG "%i\n", atomic_read(&us122l->mmap_count));
 }
 
-static int usb_stream_hwdep_vm_fault(struct vm_fault *vmf)
+static vm_fault_t usb_stream_hwdep_vm_fault(struct vm_fault *vmf)
 {
        unsigned long offset;
        struct page *page;
index d8bd7c99b48c91ab5a64914af75a434755922256..c1dd9a7b48df6749d8c566fc6fbac2a7fb1d630b 100644 (file)
@@ -31,7 +31,7 @@
 #include "usbusx2y.h"
 #include "usX2Yhwdep.h"
 
-static int snd_us428ctls_vm_fault(struct vm_fault *vmf)
+static vm_fault_t snd_us428ctls_vm_fault(struct vm_fault *vmf)
 {
        unsigned long offset;
        struct page * page;
index 0d050528a4e154dd856e3120fa464ffddb179ca4..4fd9276b8e501a198443ac0db59439365fd10c89 100644 (file)
@@ -652,7 +652,7 @@ static void snd_usX2Y_hwdep_pcm_vm_close(struct vm_area_struct *area)
 }
 
 
-static int snd_usX2Y_hwdep_pcm_vm_fault(struct vm_fault *vmf)
+static vm_fault_t snd_usX2Y_hwdep_pcm_vm_fault(struct vm_fault *vmf)
 {
        unsigned long offset;
        void *vaddr;
index 6edd177bb1c7c66e0ec32caf7ec8d2c3680ed2f3..caae4843cb7001fbee1fa9b222850df7006850fb 100644 (file)
@@ -135,6 +135,15 @@ struct kvm_arch_memory_slot {
 #define KVM_REG_ARM_CRM_SHIFT          7
 #define KVM_REG_ARM_32_CRN_MASK                0x0000000000007800
 #define KVM_REG_ARM_32_CRN_SHIFT       11
+/*
+ * For KVM currently all guest registers are nonsecure, but we reserve a bit
+ * in the encoding to distinguish secure from nonsecure for AArch32 system
+ * registers that are banked by security. This is 1 for the secure banked
+ * register, and 0 for the nonsecure banked register or if the register is
+ * not banked by security.
+ */
+#define KVM_REG_ARM_SECURE_MASK        0x0000000010000000
+#define KVM_REG_ARM_SECURE_SHIFT       28
 
 #define ARM_CP15_REG_SHIFT_MASK(x,n) \
        (((x) << KVM_REG_ARM_ ## n ## _SHIFT) & KVM_REG_ARM_ ## n ## _MASK)
@@ -186,6 +195,12 @@ struct kvm_arch_memory_slot {
 #define KVM_REG_ARM_VFP_FPINST         0x1009
 #define KVM_REG_ARM_VFP_FPINST2                0x100A
 
+/* KVM-as-firmware specific pseudo-registers */
+#define KVM_REG_ARM_FW                 (0x0014 << KVM_REG_ARM_COPROC_SHIFT)
+#define KVM_REG_ARM_FW_REG(r)          (KVM_REG_ARM | KVM_REG_SIZE_U64 | \
+                                        KVM_REG_ARM_FW | ((r) & 0xffff))
+#define KVM_REG_ARM_PSCI_VERSION       KVM_REG_ARM_FW_REG(0)
+
 /* Device Control API: ARM VGIC */
 #define KVM_DEV_ARM_VGIC_GRP_ADDR      0
 #define KVM_DEV_ARM_VGIC_GRP_DIST_REGS 1
index 9abbf30446545a0668083b0891461f015563bcb1..04b3256f8e6d5f8e3e368b043f0fdcfeb7c23164 100644 (file)
@@ -206,6 +206,12 @@ struct kvm_arch_memory_slot {
 #define KVM_REG_ARM_TIMER_CNT          ARM64_SYS_REG(3, 3, 14, 3, 2)
 #define KVM_REG_ARM_TIMER_CVAL         ARM64_SYS_REG(3, 3, 14, 0, 2)
 
+/* KVM-as-firmware specific pseudo-registers */
+#define KVM_REG_ARM_FW                 (0x0014 << KVM_REG_ARM_COPROC_SHIFT)
+#define KVM_REG_ARM_FW_REG(r)          (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \
+                                        KVM_REG_ARM_FW | ((r) & 0xffff))
+#define KVM_REG_ARM_PSCI_VERSION       KVM_REG_ARM_FW_REG(0)
+
 /* Device Control API: ARM VGIC */
 #define KVM_DEV_ARM_VGIC_GRP_ADDR      0
 #define KVM_DEV_ARM_VGIC_GRP_DIST_REGS 1
index d554c11e01ff46742d53148df0ffb9c3476e8d6e..578793e97431da25b0d5f3cbc20ae4c0655db075 100644 (file)
 #define X86_FEATURE_AVX512_VPOPCNTDQ   (16*32+14) /* POPCNT for vectors of DW/QW */
 #define X86_FEATURE_LA57               (16*32+16) /* 5-level page tables */
 #define X86_FEATURE_RDPID              (16*32+22) /* RDPID instruction */
+#define X86_FEATURE_CLDEMOTE           (16*32+25) /* CLDEMOTE instruction */
 
 /* AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 */
 #define X86_FEATURE_OVERFLOW_RECOV     (17*32+ 0) /* MCA overflow recovery support */
index fb3a6de7440bce69c794449ecf5740e8df924f87..6847d85400a8b738ca2adf00ea95fdf8f349fafc 100644 (file)
 # define NEED_MOVBE    0
 #endif
 
-#ifdef CONFIG_X86_5LEVEL
-# define NEED_LA57     (1<<(X86_FEATURE_LA57 & 31))
-#else
-# define NEED_LA57     0
-#endif
-
 #ifdef CONFIG_X86_64
 #ifdef CONFIG_PARAVIRT
 /* Paravirtualized systems may not have PSE or PGE available */
 #define REQUIRED_MASK13        0
 #define REQUIRED_MASK14        0
 #define REQUIRED_MASK15        0
-#define REQUIRED_MASK16        (NEED_LA57)
+#define REQUIRED_MASK16        0
 #define REQUIRED_MASK17        0
 #define REQUIRED_MASK18        0
 #define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19)
index f3a960488eae0fcf308ba8467c8de26a3061a840..c535c2fdea136a5e58725e72a2d867f545d393eb 100644 (file)
@@ -354,8 +354,25 @@ struct kvm_xcrs {
        __u64 padding[16];
 };
 
-/* definition of registers in kvm_run */
+#define KVM_SYNC_X86_REGS      (1UL << 0)
+#define KVM_SYNC_X86_SREGS     (1UL << 1)
+#define KVM_SYNC_X86_EVENTS    (1UL << 2)
+
+#define KVM_SYNC_X86_VALID_FIELDS \
+       (KVM_SYNC_X86_REGS| \
+        KVM_SYNC_X86_SREGS| \
+        KVM_SYNC_X86_EVENTS)
+
+/* kvm_sync_regs struct included by kvm_run struct */
 struct kvm_sync_regs {
+       /* Members of this structure are potentially malicious.
+        * Care must be taken by code reading, esp. interpreting,
+        * data fields from them inside KVM to prevent TOCTOU and
+        * double-fetch types of vulnerabilities.
+        */
+       struct kvm_regs regs;
+       struct kvm_sregs sregs;
+       struct kvm_vcpu_events events;
 };
 
 #define KVM_X86_QUIRK_LINT0_REENABLED  (1 << 0)
index 1ea545965ee36c3a8dca16fd0e34f19aa4894e60..53b60ad452f5d3ebd5d2edad0b80ef12558f2215 100644 (file)
@@ -76,6 +76,8 @@ $(OUTPUT)bpf_asm: $(OUTPUT)bpf_asm.o $(OUTPUT)bpf_exp.yacc.o $(OUTPUT)bpf_exp.le
        $(QUIET_LINK)$(CC) $(CFLAGS) -o $@ $^
 
 $(OUTPUT)bpf_exp.lex.c: $(OUTPUT)bpf_exp.yacc.c
+$(OUTPUT)bpf_exp.yacc.o: $(OUTPUT)bpf_exp.yacc.c
+$(OUTPUT)bpf_exp.lex.o: $(OUTPUT)bpf_exp.lex.c
 
 clean: bpftool_clean
        $(call QUIET_CLEAN, bpf-progs)
index 4f254bcc442347ffb529925fed70d7e299d50773..61b9aa5d641529666c2f7695fc6c1804e86899ce 100644 (file)
@@ -1063,7 +1063,7 @@ static int cmd_load_pcap(char *file)
 
 static int cmd_load(char *arg)
 {
-       char *subcmd, *cont, *tmp = strdup(arg);
+       char *subcmd, *cont = NULL, *tmp = strdup(arg);
        int ret = CMD_OK;
 
        subcmd = strtok_r(tmp, " ", &cont);
@@ -1073,7 +1073,10 @@ static int cmd_load(char *arg)
                bpf_reset();
                bpf_reset_breakpoints();
 
-               ret = cmd_load_bpf(cont);
+               if (!cont)
+                       ret = CMD_ERR;
+               else
+                       ret = cmd_load_bpf(cont);
        } else if (matches(subcmd, "pcap") == 0) {
                ret = cmd_load_pcap(cont);
        } else {
diff --git a/tools/bpf/bpftool/.gitignore b/tools/bpf/bpftool/.gitignore
new file mode 100644 (file)
index 0000000..d7e678c
--- /dev/null
@@ -0,0 +1,3 @@
+*.d
+bpftool
+FEATURE-DUMP.bpftool
index 5f512b14bff96a6cfbd871b6c2228cca3b0c08dd..a6258bc8ec4f57d0cf1d5b35a95030fd66cbe881 100644 (file)
@@ -22,17 +22,19 @@ MAP COMMANDS
 =============
 
 |      **bpftool** **map { show | list }**   [*MAP*]
-|      **bpftool** **map dump**    *MAP*
-|      **bpftool** **map update**  *MAP*  **key** [**hex**] *BYTES*   **value** [**hex**] *VALUE* [*UPDATE_FLAGS*]
-|      **bpftool** **map lookup**  *MAP*  **key** [**hex**] *BYTES*
-|      **bpftool** **map getnext** *MAP* [**key** [**hex**] *BYTES*]
-|      **bpftool** **map delete**  *MAP*  **key** [**hex**] *BYTES*
-|      **bpftool** **map pin**     *MAP*  *FILE*
+|      **bpftool** **map dump**       *MAP*
+|      **bpftool** **map update**     *MAP*  **key** *DATA*   **value** *VALUE* [*UPDATE_FLAGS*]
+|      **bpftool** **map lookup**     *MAP*  **key** *DATA*
+|      **bpftool** **map getnext**    *MAP* [**key** *DATA*]
+|      **bpftool** **map delete**     *MAP*  **key** *DATA*
+|      **bpftool** **map pin**        *MAP*  *FILE*
+|      **bpftool** **map event_pipe** *MAP* [**cpu** *N* **index** *M*]
 |      **bpftool** **map help**
 |
 |      *MAP* := { **id** *MAP_ID* | **pinned** *FILE* }
+|      *DATA* := { [**hex**] *BYTES* }
 |      *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* }
-|      *VALUE* := { *BYTES* | *MAP* | *PROG* }
+|      *VALUE* := { *DATA* | *MAP* | *PROG* }
 |      *UPDATE_FLAGS* := { **any** | **exist** | **noexist** }
 
 DESCRIPTION
@@ -48,7 +50,7 @@ DESCRIPTION
        **bpftool map dump**    *MAP*
                  Dump all entries in a given *MAP*.
 
-       **bpftool map update**  *MAP*  **key** [**hex**] *BYTES*   **value** [**hex**] *VALUE* [*UPDATE_FLAGS*]
+       **bpftool map update**  *MAP*  **key** *DATA*   **value** *VALUE* [*UPDATE_FLAGS*]
                  Update map entry for a given *KEY*.
 
                  *UPDATE_FLAGS* can be one of: **any** update existing entry
@@ -61,13 +63,13 @@ DESCRIPTION
                  the bytes are parsed as decimal values, unless a "0x" prefix
                  (for hexadecimal) or a "0" prefix (for octal) is provided.
 
-       **bpftool map lookup**  *MAP*  **key** [**hex**] *BYTES*
+       **bpftool map lookup**  *MAP*  **key** *DATA*
                  Lookup **key** in the map.
 
-       **bpftool map getnext** *MAP* [**key** [**hex**] *BYTES*]
+       **bpftool map getnext** *MAP* [**key** *DATA*]
                  Get next key.  If *key* is not specified, get first key.
 
-       **bpftool map delete**  *MAP*  **key** [**hex**] *BYTES*
+       **bpftool map delete**  *MAP*  **key** *DATA*
                  Remove entry from the map.
 
        **bpftool map pin**     *MAP*  *FILE*
@@ -75,6 +77,22 @@ DESCRIPTION
 
                  Note: *FILE* must be located in *bpffs* mount.
 
+       **bpftool** **map event_pipe** *MAP* [**cpu** *N* **index** *M*]
+                 Read events from a BPF_MAP_TYPE_PERF_EVENT_ARRAY map.
+
+                 Install perf rings into a perf event array map and dump
+                 output of any bpf_perf_event_output() call in the kernel.
+                 By default read the number of CPUs on the system and
+                 install perf ring for each CPU in the corresponding index
+                 in the array.
+
+                 If **cpu** and **index** are specified, install perf ring
+                 for given **cpu** at **index** in the array (single ring).
+
+                 Note that installing a perf ring into an array will silently
+                 replace any existing ring.  Any other application will stop
+                 receiving events if it installed its rings earlier.
+
        **bpftool map help**
                  Print short help message.
 
index 67ca6c69376cd95a030142ee11f2d66778ddc0ec..43d34a5c3ec527a95b7bbde6f28616597faf9d66 100644 (file)
@@ -95,7 +95,7 @@ EXAMPLES
 **# bpftool prog show**
 ::
 
-  10: xdp  name some_prog  tag 005a3d2123620c8b
+  10: xdp  name some_prog  tag 005a3d2123620c8b  gpl
        loaded_at Sep 29/20:11  uid 0
        xlated 528B  jited 370B  memlock 4096B  map_ids 10
 
@@ -108,6 +108,7 @@ EXAMPLES
                 "id": 10,
                 "type": "xdp",
                 "tag": "005a3d2123620c8b",
+                "gpl_compatible": true,
                 "loaded_at": "Sep 29/20:11",
                 "uid": 0,
                 "bytes_xlated": 528,
index 20689a321ffe3798a1ad10982f5e995bb024db76..564cb0d9692b630e58f56509c318d2064c0258fb 100644 (file)
@@ -23,7 +23,7 @@ SYNOPSIS
 
        *MAP-COMMANDS* :=
        { **show** | **list** | **dump** | **update** | **lookup** | **getnext** | **delete**
-       | **pin** | **help** }
+       | **pin** | **event_pipe** | **help** }
 
        *PROG-COMMANDS* := { **show** | **list** | **dump jited** | **dump xlated** | **pin**
        | **load** | **help** }
index 4e69782c4a793f0860e6532aac5b6aaaecc873e0..892dbf095bffd79ac6bbc9b3a70a33efa3955f54 100644 (file)
@@ -39,7 +39,12 @@ CC = gcc
 
 CFLAGS += -O2
 CFLAGS += -W -Wall -Wextra -Wno-unused-parameter -Wshadow -Wno-missing-field-initializers
-CFLAGS += -DPACKAGE='"bpftool"' -D__EXPORTED_HEADERS__ -I$(srctree)/tools/include/uapi -I$(srctree)/tools/include -I$(srctree)/tools/lib/bpf -I$(srctree)/kernel/bpf/
+CFLAGS += -DPACKAGE='"bpftool"' -D__EXPORTED_HEADERS__ \
+       -I$(srctree)/kernel/bpf/ \
+       -I$(srctree)/tools/include \
+       -I$(srctree)/tools/include/uapi \
+       -I$(srctree)/tools/lib/bpf \
+       -I$(srctree)/tools/perf
 CFLAGS += -DBPFTOOL_VERSION='"$(BPFTOOL_VERSION)"'
 LIBS = -lelf -lbfd -lopcodes $(LIBBPF)
 
index 852d84a98acda32893561f74c51091be79c6aaab..b301c9b315f1e5b11100abb124bc60bc2730f219 100644 (file)
@@ -1,6 +1,6 @@
 # bpftool(8) bash completion                               -*- shell-script -*-
 #
-# Copyright (C) 2017 Netronome Systems, Inc.
+# Copyright (C) 2017-2018 Netronome Systems, Inc.
 #
 # This software is dual licensed under the GNU General License
 # Version 2, June 1991 as shown in the file COPYING in the top-level
@@ -79,6 +79,14 @@ _bpftool_get_map_ids()
         command sed -n 's/.*"id": \(.*\),$/\1/p' )" -- "$cur" ) )
 }
 
+_bpftool_get_perf_map_ids()
+{
+    COMPREPLY+=( $( compgen -W "$( bpftool -jp map  2>&1 | \
+        command grep -C2 perf_event_array | \
+        command sed -n 's/.*"id": \(.*\),$/\1/p' )" -- "$cur" ) )
+}
+
+
 _bpftool_get_prog_ids()
 {
     COMPREPLY+=( $( compgen -W "$( bpftool -jp prog 2>&1 | \
@@ -359,10 +367,34 @@ _bpftool()
                     fi
                     return 0
                     ;;
+                event_pipe)
+                    case $prev in
+                        $command)
+                            COMPREPLY=( $( compgen -W "$MAP_TYPE" -- "$cur" ) )
+                            return 0
+                            ;;
+                        id)
+                            _bpftool_get_perf_map_ids
+                            return 0
+                            ;;
+                        cpu)
+                            return 0
+                            ;;
+                        index)
+                            return 0
+                            ;;
+                        *)
+                            _bpftool_once_attr 'cpu'
+                            _bpftool_once_attr 'index'
+                            return 0
+                            ;;
+                    esac
+                    ;;
                 *)
                     [[ $prev == $object ]] && \
                         COMPREPLY=( $( compgen -W 'delete dump getnext help \
-                            lookup pin show list update' -- "$cur" ) )
+                            lookup pin event_pipe show list update' -- \
+                            "$cur" ) )
                     ;;
             esac
             ;;
index 465995281dcd36f2b25e97aef7316ebd0e441ff5..32f9e397a6c07a988edd80a3338fd2a29930d3e0 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 Netronome Systems, Inc.
+ * Copyright (C) 2017-2018 Netronome Systems, Inc.
  *
  * This software is dual licensed under the GNU General License Version 2,
  * June 1991 as shown in the file COPYING in the top-level directory of this
@@ -33,6 +33,7 @@
 
 /* Author: Jakub Kicinski <kubakici@wp.pl> */
 
+#include <ctype.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <fts.h>
@@ -330,6 +331,16 @@ char *get_fdinfo(int fd, const char *key)
        return NULL;
 }
 
+void print_data_json(uint8_t *data, size_t len)
+{
+       unsigned int i;
+
+       jsonw_start_array(json_wtr);
+       for (i = 0; i < len; i++)
+               jsonw_printf(json_wtr, "%d", data[i]);
+       jsonw_end_array(json_wtr);
+}
+
 void print_hex_data_json(uint8_t *data, size_t len)
 {
        unsigned int i;
@@ -420,6 +431,70 @@ void delete_pinned_obj_table(struct pinned_obj_table *tab)
        }
 }
 
+unsigned int get_page_size(void)
+{
+       static int result;
+
+       if (!result)
+               result = getpagesize();
+       return result;
+}
+
+unsigned int get_possible_cpus(void)
+{
+       static unsigned int result;
+       char buf[128];
+       long int n;
+       char *ptr;
+       int fd;
+
+       if (result)
+               return result;
+
+       fd = open("/sys/devices/system/cpu/possible", O_RDONLY);
+       if (fd < 0) {
+               p_err("can't open sysfs possible cpus");
+               exit(-1);
+       }
+
+       n = read(fd, buf, sizeof(buf));
+       if (n < 2) {
+               p_err("can't read sysfs possible cpus");
+               exit(-1);
+       }
+       close(fd);
+
+       if (n == sizeof(buf)) {
+               p_err("read sysfs possible cpus overflow");
+               exit(-1);
+       }
+
+       ptr = buf;
+       n = 0;
+       while (*ptr && *ptr != '\n') {
+               unsigned int a, b;
+
+               if (sscanf(ptr, "%u-%u", &a, &b) == 2) {
+                       n += b - a + 1;
+
+                       ptr = strchr(ptr, '-') + 1;
+               } else if (sscanf(ptr, "%u", &a) == 1) {
+                       n++;
+               } else {
+                       assert(0);
+               }
+
+               while (isdigit(*ptr))
+                       ptr++;
+               if (*ptr == ',')
+                       ptr++;
+       }
+
+       result = n;
+
+       return result;
+}
+
 static char *
 ifindex_to_name_ns(__u32 ifindex, __u32 ns_dev, __u32 ns_ino, char *buf)
 {
index b8e9584d624647e733195f753b48f28cc54e84e5..6173cd997e7a41670990dad3b637acb343a135ec 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 Netronome Systems, Inc.
+ * Copyright (C) 2017-2018 Netronome Systems, Inc.
  *
  * This software is dual licensed under the GNU General License Version 2,
  * June 1991 as shown in the file COPYING in the top-level directory of this
@@ -117,14 +117,19 @@ int do_pin_fd(int fd, const char *name);
 
 int do_prog(int argc, char **arg);
 int do_map(int argc, char **arg);
+int do_event_pipe(int argc, char **argv);
 int do_cgroup(int argc, char **arg);
 
 int prog_parse_fd(int *argc, char ***argv);
+int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len);
 
 void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes,
                       const char *arch);
+void print_data_json(uint8_t *data, size_t len);
 void print_hex_data_json(uint8_t *data, size_t len);
 
+unsigned int get_page_size(void);
+unsigned int get_possible_cpus(void);
 const char *ifindex_to_bfd_name_ns(__u32 ifindex, __u64 ns_dev, __u64 ns_ino);
 
 #endif
index a6cdb640a0d7c309ef3f926a5d0605ee1a0858ef..097b1a5e046b20f8ced90a469c2b8e5ab25bb593 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 Netronome Systems, Inc.
+ * Copyright (C) 2017-2018 Netronome Systems, Inc.
  *
  * This software is dual licensed under the GNU General License Version 2,
  * June 1991 as shown in the file COPYING in the top-level directory of this
@@ -34,7 +34,6 @@
 /* Author: Jakub Kicinski <kubakici@wp.pl> */
 
 #include <assert.h>
-#include <ctype.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <stdbool.h>
@@ -67,63 +66,9 @@ static const char * const map_type_name[] = {
        [BPF_MAP_TYPE_DEVMAP]           = "devmap",
        [BPF_MAP_TYPE_SOCKMAP]          = "sockmap",
        [BPF_MAP_TYPE_CPUMAP]           = "cpumap",
+       [BPF_MAP_TYPE_SOCKHASH]         = "sockhash",
 };
 
-static unsigned int get_possible_cpus(void)
-{
-       static unsigned int result;
-       char buf[128];
-       long int n;
-       char *ptr;
-       int fd;
-
-       if (result)
-               return result;
-
-       fd = open("/sys/devices/system/cpu/possible", O_RDONLY);
-       if (fd < 0) {
-               p_err("can't open sysfs possible cpus");
-               exit(-1);
-       }
-
-       n = read(fd, buf, sizeof(buf));
-       if (n < 2) {
-               p_err("can't read sysfs possible cpus");
-               exit(-1);
-       }
-       close(fd);
-
-       if (n == sizeof(buf)) {
-               p_err("read sysfs possible cpus overflow");
-               exit(-1);
-       }
-
-       ptr = buf;
-       n = 0;
-       while (*ptr && *ptr != '\n') {
-               unsigned int a, b;
-
-               if (sscanf(ptr, "%u-%u", &a, &b) == 2) {
-                       n += b - a + 1;
-
-                       ptr = strchr(ptr, '-') + 1;
-               } else if (sscanf(ptr, "%u", &a) == 1) {
-                       n++;
-               } else {
-                       assert(0);
-               }
-
-               while (isdigit(*ptr))
-                       ptr++;
-               if (*ptr == ',')
-                       ptr++;
-       }
-
-       result = n;
-
-       return result;
-}
-
 static bool map_is_per_cpu(__u32 type)
 {
        return type == BPF_MAP_TYPE_PERCPU_HASH ||
@@ -186,8 +131,7 @@ static int map_parse_fd(int *argc, char ***argv)
        return -1;
 }
 
-static int
-map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len)
+int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len)
 {
        int err;
        int fd;
@@ -873,23 +817,25 @@ static int do_help(int argc, char **argv)
 
        fprintf(stderr,
                "Usage: %s %s { show | list }   [MAP]\n"
-               "       %s %s dump    MAP\n"
-               "       %s %s update  MAP  key [hex] BYTES value [hex] VALUE [UPDATE_FLAGS]\n"
-               "       %s %s lookup  MAP  key [hex] BYTES\n"
-               "       %s %s getnext MAP [key [hex] BYTES]\n"
-               "       %s %s delete  MAP  key [hex] BYTES\n"
-               "       %s %s pin     MAP  FILE\n"
+               "       %s %s dump       MAP\n"
+               "       %s %s update     MAP  key DATA value VALUE [UPDATE_FLAGS]\n"
+               "       %s %s lookup     MAP  key DATA\n"
+               "       %s %s getnext    MAP [key DATA]\n"
+               "       %s %s delete     MAP  key DATA\n"
+               "       %s %s pin        MAP  FILE\n"
+               "       %s %s event_pipe MAP [cpu N index M]\n"
                "       %s %s help\n"
                "\n"
                "       MAP := { id MAP_ID | pinned FILE }\n"
+               "       DATA := { [hex] BYTES }\n"
                "       " HELP_SPEC_PROGRAM "\n"
-               "       VALUE := { BYTES | MAP | PROG }\n"
+               "       VALUE := { DATA | MAP | PROG }\n"
                "       UPDATE_FLAGS := { any | exist | noexist }\n"
                "       " HELP_SPEC_OPTIONS "\n"
                "",
                bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
                bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
-               bin_name, argv[-2], bin_name, argv[-2]);
+               bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2]);
 
        return 0;
 }
@@ -904,6 +850,7 @@ static const struct cmd cmds[] = {
        { "getnext",    do_getnext },
        { "delete",     do_delete },
        { "pin",        do_pin },
+       { "event_pipe", do_event_pipe },
        { 0 }
 };
 
diff --git a/tools/bpf/bpftool/map_perf_ring.c b/tools/bpf/bpftool/map_perf_ring.c
new file mode 100644 (file)
index 0000000..1832100
--- /dev/null
@@ -0,0 +1,306 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2018 Netronome Systems, Inc. */
+/* This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <errno.h>
+#include <fcntl.h>
+#include <libbpf.h>
+#include <poll.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+#include <linux/bpf.h>
+#include <linux/perf_event.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/syscall.h>
+
+#include <bpf.h>
+#include <perf-sys.h>
+
+#include "main.h"
+
+#define MMAP_PAGE_CNT  16
+
+static bool stop;
+
+struct event_ring_info {
+       int fd;
+       int key;
+       unsigned int cpu;
+       void *mem;
+};
+
+struct perf_event_sample {
+       struct perf_event_header header;
+       u64 time;
+       __u32 size;
+       unsigned char data[];
+};
+
+static void int_exit(int signo)
+{
+       fprintf(stderr, "Stopping...\n");
+       stop = true;
+}
+
+static enum bpf_perf_event_ret print_bpf_output(void *event, void *priv)
+{
+       struct event_ring_info *ring = priv;
+       struct perf_event_sample *e = event;
+       struct {
+               struct perf_event_header header;
+               __u64 id;
+               __u64 lost;
+       } *lost = event;
+
+       if (json_output) {
+               jsonw_start_object(json_wtr);
+               jsonw_name(json_wtr, "type");
+               jsonw_uint(json_wtr, e->header.type);
+               jsonw_name(json_wtr, "cpu");
+               jsonw_uint(json_wtr, ring->cpu);
+               jsonw_name(json_wtr, "index");
+               jsonw_uint(json_wtr, ring->key);
+               if (e->header.type == PERF_RECORD_SAMPLE) {
+                       jsonw_name(json_wtr, "timestamp");
+                       jsonw_uint(json_wtr, e->time);
+                       jsonw_name(json_wtr, "data");
+                       print_data_json(e->data, e->size);
+               } else if (e->header.type == PERF_RECORD_LOST) {
+                       jsonw_name(json_wtr, "lost");
+                       jsonw_start_object(json_wtr);
+                       jsonw_name(json_wtr, "id");
+                       jsonw_uint(json_wtr, lost->id);
+                       jsonw_name(json_wtr, "count");
+                       jsonw_uint(json_wtr, lost->lost);
+                       jsonw_end_object(json_wtr);
+               }
+               jsonw_end_object(json_wtr);
+       } else {
+               if (e->header.type == PERF_RECORD_SAMPLE) {
+                       printf("== @%lld.%09lld CPU: %d index: %d =====\n",
+                              e->time / 1000000000ULL, e->time % 1000000000ULL,
+                              ring->cpu, ring->key);
+                       fprint_hex(stdout, e->data, e->size, " ");
+                       printf("\n");
+               } else if (e->header.type == PERF_RECORD_LOST) {
+                       printf("lost %lld events\n", lost->lost);
+               } else {
+                       printf("unknown event type=%d size=%d\n",
+                              e->header.type, e->header.size);
+               }
+       }
+
+       return LIBBPF_PERF_EVENT_CONT;
+}
+
+static void
+perf_event_read(struct event_ring_info *ring, void **buf, size_t *buf_len)
+{
+       enum bpf_perf_event_ret ret;
+
+       ret = bpf_perf_event_read_simple(ring->mem,
+                                        MMAP_PAGE_CNT * get_page_size(),
+                                        get_page_size(), buf, buf_len,
+                                        print_bpf_output, ring);
+       if (ret != LIBBPF_PERF_EVENT_CONT) {
+               fprintf(stderr, "perf read loop failed with %d\n", ret);
+               stop = true;
+       }
+}
+
+static int perf_mmap_size(void)
+{
+       return get_page_size() * (MMAP_PAGE_CNT + 1);
+}
+
+static void *perf_event_mmap(int fd)
+{
+       int mmap_size = perf_mmap_size();
+       void *base;
+
+       base = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+       if (base == MAP_FAILED) {
+               p_err("event mmap failed: %s\n", strerror(errno));
+               return NULL;
+       }
+
+       return base;
+}
+
+static void perf_event_unmap(void *mem)
+{
+       if (munmap(mem, perf_mmap_size()))
+               fprintf(stderr, "Can't unmap ring memory!\n");
+}
+
+static int bpf_perf_event_open(int map_fd, int key, int cpu)
+{
+       struct perf_event_attr attr = {
+               .sample_type = PERF_SAMPLE_RAW | PERF_SAMPLE_TIME,
+               .type = PERF_TYPE_SOFTWARE,
+               .config = PERF_COUNT_SW_BPF_OUTPUT,
+       };
+       int pmu_fd;
+
+       pmu_fd = sys_perf_event_open(&attr, -1, cpu, -1, 0);
+       if (pmu_fd < 0) {
+               p_err("failed to open perf event %d for CPU %d", key, cpu);
+               return -1;
+       }
+
+       if (bpf_map_update_elem(map_fd, &key, &pmu_fd, BPF_ANY)) {
+               p_err("failed to update map for event %d for CPU %d", key, cpu);
+               goto err_close;
+       }
+       if (ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0)) {
+               p_err("failed to enable event %d for CPU %d", key, cpu);
+               goto err_close;
+       }
+
+       return pmu_fd;
+
+err_close:
+       close(pmu_fd);
+       return -1;
+}
+
+int do_event_pipe(int argc, char **argv)
+{
+       int i, nfds, map_fd, index = -1, cpu = -1;
+       struct bpf_map_info map_info = {};
+       struct event_ring_info *rings;
+       size_t tmp_buf_sz = 0;
+       void *tmp_buf = NULL;
+       struct pollfd *pfds;
+       __u32 map_info_len;
+       bool do_all = true;
+
+       map_info_len = sizeof(map_info);
+       map_fd = map_parse_fd_and_info(&argc, &argv, &map_info, &map_info_len);
+       if (map_fd < 0)
+               return -1;
+
+       if (map_info.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
+               p_err("map is not a perf event array");
+               goto err_close_map;
+       }
+
+       while (argc) {
+               if (argc < 2)
+                       BAD_ARG();
+
+               if (is_prefix(*argv, "cpu")) {
+                       char *endptr;
+
+                       NEXT_ARG();
+                       cpu = strtoul(*argv, &endptr, 0);
+                       if (*endptr) {
+                               p_err("can't parse %s as CPU ID", **argv);
+                               goto err_close_map;
+                       }
+
+                       NEXT_ARG();
+               } else if (is_prefix(*argv, "index")) {
+                       char *endptr;
+
+                       NEXT_ARG();
+                       index = strtoul(*argv, &endptr, 0);
+                       if (*endptr) {
+                               p_err("can't parse %s as index", **argv);
+                               goto err_close_map;
+                       }
+
+                       NEXT_ARG();
+               } else {
+                       BAD_ARG();
+               }
+
+               do_all = false;
+       }
+
+       if (!do_all) {
+               if (index == -1 || cpu == -1) {
+                       p_err("cpu and index must be specified together");
+                       goto err_close_map;
+               }
+
+               nfds = 1;
+       } else {
+               nfds = min(get_possible_cpus(), map_info.max_entries);
+               cpu = 0;
+               index = 0;
+       }
+
+       rings = calloc(nfds, sizeof(rings[0]));
+       if (!rings)
+               goto err_close_map;
+
+       pfds = calloc(nfds, sizeof(pfds[0]));
+       if (!pfds)
+               goto err_free_rings;
+
+       for (i = 0; i < nfds; i++) {
+               rings[i].cpu = cpu + i;
+               rings[i].key = index + i;
+
+               rings[i].fd = bpf_perf_event_open(map_fd, rings[i].key,
+                                                 rings[i].cpu);
+               if (rings[i].fd < 0)
+                       goto err_close_fds_prev;
+
+               rings[i].mem = perf_event_mmap(rings[i].fd);
+               if (!rings[i].mem)
+                       goto err_close_fds_current;
+
+               pfds[i].fd = rings[i].fd;
+               pfds[i].events = POLLIN;
+       }
+
+       signal(SIGINT, int_exit);
+       signal(SIGHUP, int_exit);
+       signal(SIGTERM, int_exit);
+
+       if (json_output)
+               jsonw_start_array(json_wtr);
+
+       while (!stop) {
+               poll(pfds, nfds, 200);
+               for (i = 0; i < nfds; i++)
+                       perf_event_read(&rings[i], &tmp_buf, &tmp_buf_sz);
+       }
+       free(tmp_buf);
+
+       if (json_output)
+               jsonw_end_array(json_wtr);
+
+       for (i = 0; i < nfds; i++) {
+               perf_event_unmap(rings[i].mem);
+               close(rings[i].fd);
+       }
+       free(pfds);
+       free(rings);
+       close(map_fd);
+
+       return 0;
+
+err_close_fds_prev:
+       while (i--) {
+               perf_event_unmap(rings[i].mem);
+err_close_fds_current:
+               close(rings[i].fd);
+       }
+       free(pfds);
+err_free_rings:
+       free(rings);
+err_close_map:
+       close(map_fd);
+       return -1;
+}
index 548adb9b73175e44f9ab6e9d856b7eae2c00265a..9bdfdf2d3fbe4ade06046075d46772eef4ad42a2 100644 (file)
@@ -96,7 +96,10 @@ static void print_boot_time(__u64 nsecs, char *buf, unsigned int size)
                return;
        }
 
-       strftime(buf, size, "%b %d/%H:%M", &load_tm);
+       if (json_output)
+               strftime(buf, size, "%s", &load_tm);
+       else
+               strftime(buf, size, "%FT%T%z", &load_tm);
 }
 
 static int prog_fd_by_tag(unsigned char *tag)
@@ -235,6 +238,8 @@ static void print_prog_json(struct bpf_prog_info *info, int fd)
                     info->tag[0], info->tag[1], info->tag[2], info->tag[3],
                     info->tag[4], info->tag[5], info->tag[6], info->tag[7]);
 
+       jsonw_bool_field(json_wtr, "gpl_compatible", info->gpl_compatible);
+
        print_dev_json(info->ifindex, info->netns_dev, info->netns_ino);
 
        if (info->load_time) {
@@ -243,7 +248,8 @@ static void print_prog_json(struct bpf_prog_info *info, int fd)
                print_boot_time(info->load_time, buf, sizeof(buf));
 
                /* Piggy back on load_time, since 0 uid is a valid one */
-               jsonw_string_field(json_wtr, "loaded_at", buf);
+               jsonw_name(json_wtr, "loaded_at");
+               jsonw_printf(json_wtr, "%s", buf);
                jsonw_uint_field(json_wtr, "uid", info->created_by_uid);
        }
 
@@ -295,6 +301,7 @@ static void print_prog_plain(struct bpf_prog_info *info, int fd)
        printf("tag ");
        fprint_hex(stdout, info->tag, BPF_TAG_SIZE, "");
        print_dev_plain(info->ifindex, info->netns_dev, info->netns_ino);
+       printf("%s", info->gpl_compatible ? "  gpl" : "");
        printf("\n");
 
        if (info->load_time) {
index 04e32f965ad7f038beb2d8db9dc2119e07628744..1827c2f973f93c533ca9a3e12ced636d13319556 100644 (file)
@@ -151,11 +151,21 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
  * required ordering.
  */
 
-#define READ_ONCE(x) \
-       ({ union { typeof(x) __val; char __c[1]; } __u; __read_once_size(&(x), __u.__c, sizeof(x)); __u.__val; })
-
-#define WRITE_ONCE(x, val) \
-       ({ union { typeof(x) __val; char __c[1]; } __u = { .__val = (val) }; __write_once_size(&(x), __u.__c, sizeof(x)); __u.__val; })
+#define READ_ONCE(x)                                   \
+({                                                     \
+       union { typeof(x) __val; char __c[1]; } __u =   \
+               { .__c = { 0 } };                       \
+       __read_once_size(&(x), __u.__c, sizeof(x));     \
+       __u.__val;                                      \
+})
+
+#define WRITE_ONCE(x, val)                             \
+({                                                     \
+       union { typeof(x) __val; char __c[1]; } __u =   \
+               { .__val = (val) };                     \
+       __write_once_size(&(x), __u.__c, sizeof(x));    \
+       __u.__val;                                      \
+})
 
 
 #ifndef __fallthrough
index edfeaba954295a76c1c842253004404365436208..a1a959ba24ffada75e13de61aa570490a988c8d4 100644 (file)
@@ -1,18 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright(C) 2015 Linaro Limited. All rights reserved.
  * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef _LINUX_CORESIGHT_PMU_H
index b21b586b985424a03338023f96a3e9e3d996b2af..1738c0391da4af73793716edb2d1109726c690f9 100644 (file)
@@ -6,8 +6,9 @@
 #include <stdbool.h>
 
 #define spinlock_t             pthread_mutex_t
-#define DEFINE_SPINLOCK(x)     pthread_mutex_t x = PTHREAD_MUTEX_INITIALIZER;
+#define DEFINE_SPINLOCK(x)     pthread_mutex_t x = PTHREAD_MUTEX_INITIALIZER
 #define __SPIN_LOCK_UNLOCKED(x)        (pthread_mutex_t)PTHREAD_MUTEX_INITIALIZER
+#define spin_lock_init(x)      pthread_mutex_init(x, NULL)
 
 #define spin_lock_irqsave(x, f)                (void)f, pthread_mutex_lock(x)
 #define spin_unlock_irqrestore(x, f)   (void)f, pthread_mutex_unlock(x)
index f8b134f5608f3cbeb0d5afcbbf7963599f4f9e4b..e7ee32861d51d4b2e47b9182a48c05fe837b8d21 100644 (file)
@@ -27,6 +27,9 @@
 # define MAP_UNINITIALIZED 0x0         /* Don't support this flag */
 #endif
 
+/* 0x0100 - 0x80000 flags are defined in asm-generic/mman.h */
+#define MAP_FIXED_NOREPLACE    0x100000        /* MAP_FIXED which doesn't unmap underlying mapping */
+
 /*
  * Flags for mlock
  */
diff --git a/tools/include/uapi/asm/bitsperlong.h b/tools/include/uapi/asm/bitsperlong.h
new file mode 100644 (file)
index 0000000..8dd6aef
--- /dev/null
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#if defined(__i386__) || defined(__x86_64__)
+#include "../../arch/x86/include/uapi/asm/bitsperlong.h"
+#elif defined(__aarch64__)
+#include "../../arch/arm64/include/uapi/asm/bitsperlong.h"
+#elif defined(__powerpc__)
+#include "../../arch/powerpc/include/uapi/asm/bitsperlong.h"
+#elif defined(__s390__)
+#include "../../arch/s390/include/uapi/asm/bitsperlong.h"
+#elif defined(__sparc__)
+#include "../../arch/sparc/include/uapi/asm/bitsperlong.h"
+#elif defined(__mips__)
+#include "../../arch/mips/include/uapi/asm/bitsperlong.h"
+#elif defined(__ia64__)
+#include "../../arch/ia64/include/uapi/asm/bitsperlong.h"
+#else
+#include <asm-generic/bitsperlong.h>
+#endif
diff --git a/tools/include/uapi/asm/errno.h b/tools/include/uapi/asm/errno.h
new file mode 100644 (file)
index 0000000..ce3c594
--- /dev/null
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#if defined(__i386__) || defined(__x86_64__)
+#include "../../arch/x86/include/uapi/asm/errno.h"
+#elif defined(__powerpc__)
+#include "../../arch/powerpc/include/uapi/asm/errno.h"
+#elif defined(__sparc__)
+#include "../../arch/sparc/include/uapi/asm/errno.h"
+#elif defined(__alpha__)
+#include "../../arch/alpha/include/uapi/asm/errno.h"
+#elif defined(__mips__)
+#include "../../arch/mips/include/uapi/asm/errno.h"
+#elif defined(__ia64__)
+#include "../../arch/ia64/include/uapi/asm/errno.h"
+#elif defined(__xtensa__)
+#include "../../arch/xtensa/include/uapi/asm/errno.h"
+#else
+#include <asm-generic/errno.h>
+#endif
index 7f7fbb9d0253422b56eac7387b42c10e04972d4a..d94d333a82259cbf07f6725445f7768b96a24419 100644 (file)
@@ -96,6 +96,7 @@ enum bpf_cmd {
        BPF_PROG_QUERY,
        BPF_RAW_TRACEPOINT_OPEN,
        BPF_BTF_LOAD,
+       BPF_BTF_GET_FD_BY_ID,
 };
 
 enum bpf_map_type {
@@ -116,6 +117,8 @@ enum bpf_map_type {
        BPF_MAP_TYPE_DEVMAP,
        BPF_MAP_TYPE_SOCKMAP,
        BPF_MAP_TYPE_CPUMAP,
+       BPF_MAP_TYPE_XSKMAP,
+       BPF_MAP_TYPE_SOCKHASH,
 };
 
 enum bpf_prog_type {
@@ -343,6 +346,7 @@ union bpf_attr {
                        __u32           start_id;
                        __u32           prog_id;
                        __u32           map_id;
+                       __u32           btf_id;
                };
                __u32           next_id;
                __u32           open_flags;
@@ -377,403 +381,1527 @@ union bpf_attr {
        };
 } __attribute__((aligned(8)));
 
-/* BPF helper function descriptions:
- *
- * void *bpf_map_lookup_elem(&map, &key)
- *     Return: Map value or NULL
- *
- * int bpf_map_update_elem(&map, &key, &value, flags)
- *     Return: 0 on success or negative error
- *
- * int bpf_map_delete_elem(&map, &key)
- *     Return: 0 on success or negative error
- *
- * int bpf_probe_read(void *dst, int size, void *src)
- *     Return: 0 on success or negative error
+/* The description below is an attempt at providing documentation to eBPF
+ * developers about the multiple available eBPF helper functions. It can be
+ * parsed and used to produce a manual page. The workflow is the following,
+ * and requires the rst2man utility:
+ *
+ *     $ ./scripts/bpf_helpers_doc.py \
+ *             --filename include/uapi/linux/bpf.h > /tmp/bpf-helpers.rst
+ *     $ rst2man /tmp/bpf-helpers.rst > /tmp/bpf-helpers.7
+ *     $ man /tmp/bpf-helpers.7
+ *
+ * Note that in order to produce this external documentation, some RST
+ * formatting is used in the descriptions to get "bold" and "italics" in
+ * manual pages. Also note that the few trailing white spaces are
+ * intentional, removing them would break paragraphs for rst2man.
+ *
+ * Start of BPF helper function descriptions:
+ *
+ * void *bpf_map_lookup_elem(struct bpf_map *map, const void *key)
+ *     Description
+ *             Perform a lookup in *map* for an entry associated to *key*.
+ *     Return
+ *             Map value associated to *key*, or **NULL** if no entry was
+ *             found.
+ *
+ * int bpf_map_update_elem(struct bpf_map *map, const void *key, const void *value, u64 flags)
+ *     Description
+ *             Add or update the value of the entry associated to *key* in
+ *             *map* with *value*. *flags* is one of:
+ *
+ *             **BPF_NOEXIST**
+ *                     The entry for *key* must not exist in the map.
+ *             **BPF_EXIST**
+ *                     The entry for *key* must already exist in the map.
+ *             **BPF_ANY**
+ *                     No condition on the existence of the entry for *key*.
+ *
+ *             Flag value **BPF_NOEXIST** cannot be used for maps of types
+ *             **BPF_MAP_TYPE_ARRAY** or **BPF_MAP_TYPE_PERCPU_ARRAY**  (all
+ *             elements always exist), the helper would return an error.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * int bpf_map_delete_elem(struct bpf_map *map, const void *key)
+ *     Description
+ *             Delete entry with *key* from *map*.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * int bpf_probe_read(void *dst, u32 size, const void *src)
+ *     Description
+ *             For tracing programs, safely attempt to read *size* bytes from
+ *             address *src* and store the data in *dst*.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
  *
  * u64 bpf_ktime_get_ns(void)
- *     Return: current ktime
- *
- * int bpf_trace_printk(const char *fmt, int fmt_size, ...)
- *     Return: length of buffer written or negative error
- *
- * u32 bpf_prandom_u32(void)
- *     Return: random value
- *
- * u32 bpf_raw_smp_processor_id(void)
- *     Return: SMP processor ID
- *
- * int bpf_skb_store_bytes(skb, offset, from, len, flags)
- *     store bytes into packet
- *     @skb: pointer to skb
- *     @offset: offset within packet from skb->mac_header
- *     @from: pointer where to copy bytes from
- *     @len: number of bytes to store into packet
- *     @flags: bit 0 - if true, recompute skb->csum
- *             other bits - reserved
- *     Return: 0 on success or negative error
- *
- * int bpf_l3_csum_replace(skb, offset, from, to, flags)
- *     recompute IP checksum
- *     @skb: pointer to skb
- *     @offset: offset within packet where IP checksum is located
- *     @from: old value of header field
- *     @to: new value of header field
- *     @flags: bits 0-3 - size of header field
- *             other bits - reserved
- *     Return: 0 on success or negative error
- *
- * int bpf_l4_csum_replace(skb, offset, from, to, flags)
- *     recompute TCP/UDP checksum
- *     @skb: pointer to skb
- *     @offset: offset within packet where TCP/UDP checksum is located
- *     @from: old value of header field
- *     @to: new value of header field
- *     @flags: bits 0-3 - size of header field
- *             bit 4 - is pseudo header
- *             other bits - reserved
- *     Return: 0 on success or negative error
- *
- * int bpf_tail_call(ctx, prog_array_map, index)
- *     jump into another BPF program
- *     @ctx: context pointer passed to next program
- *     @prog_array_map: pointer to map which type is BPF_MAP_TYPE_PROG_ARRAY
- *     @index: 32-bit index inside array that selects specific program to run
- *     Return: 0 on success or negative error
- *
- * int bpf_clone_redirect(skb, ifindex, flags)
- *     redirect to another netdev
- *     @skb: pointer to skb
- *     @ifindex: ifindex of the net device
- *     @flags: bit 0 - if set, redirect to ingress instead of egress
- *             other bits - reserved
- *     Return: 0 on success or negative error
+ *     Description
+ *             Return the time elapsed since system boot, in nanoseconds.
+ *     Return
+ *             Current *ktime*.
+ *
+ * int bpf_trace_printk(const char *fmt, u32 fmt_size, ...)
+ *     Description
+ *             This helper is a "printk()-like" facility for debugging. It
+ *             prints a message defined by format *fmt* (of size *fmt_size*)
+ *             to file *\/sys/kernel/debug/tracing/trace* from DebugFS, if
+ *             available. It can take up to three additional **u64**
+ *             arguments (as an eBPF helpers, the total number of arguments is
+ *             limited to five).
+ *
+ *             Each time the helper is called, it appends a line to the trace.
+ *             The format of the trace is customizable, and the exact output
+ *             one will get depends on the options set in
+ *             *\/sys/kernel/debug/tracing/trace_options* (see also the
+ *             *README* file under the same directory). However, it usually
+ *             defaults to something like:
+ *
+ *             ::
+ *
+ *                     telnet-470   [001] .N.. 419421.045894: 0x00000001: <formatted msg>
+ *
+ *             In the above:
+ *
+ *                     * ``telnet`` is the name of the current task.
+ *                     * ``470`` is the PID of the current task.
+ *                     * ``001`` is the CPU number on which the task is
+ *                       running.
+ *                     * In ``.N..``, each character refers to a set of
+ *                       options (whether irqs are enabled, scheduling
+ *                       options, whether hard/softirqs are running, level of
+ *                       preempt_disabled respectively). **N** means that
+ *                       **TIF_NEED_RESCHED** and **PREEMPT_NEED_RESCHED**
+ *                       are set.
+ *                     * ``419421.045894`` is a timestamp.
+ *                     * ``0x00000001`` is a fake value used by BPF for the
+ *                       instruction pointer register.
+ *                     * ``<formatted msg>`` is the message formatted with
+ *                       *fmt*.
+ *
+ *             The conversion specifiers supported by *fmt* are similar, but
+ *             more limited than for printk(). They are **%d**, **%i**,
+ *             **%u**, **%x**, **%ld**, **%li**, **%lu**, **%lx**, **%lld**,
+ *             **%lli**, **%llu**, **%llx**, **%p**, **%s**. No modifier (size
+ *             of field, padding with zeroes, etc.) is available, and the
+ *             helper will return **-EINVAL** (but print nothing) if it
+ *             encounters an unknown specifier.
+ *
+ *             Also, note that **bpf_trace_printk**\ () is slow, and should
+ *             only be used for debugging purposes. For this reason, a notice
+ *             bloc (spanning several lines) is printed to kernel logs and
+ *             states that the helper should not be used "for production use"
+ *             the first time this helper is used (or more precisely, when
+ *             **trace_printk**\ () buffers are allocated). For passing values
+ *             to user space, perf events should be preferred.
+ *     Return
+ *             The number of bytes written to the buffer, or a negative error
+ *             in case of failure.
+ *
+ * u32 bpf_get_prandom_u32(void)
+ *     Description
+ *             Get a pseudo-random number.
+ *
+ *             From a security point of view, this helper uses its own
+ *             pseudo-random internal state, and cannot be used to infer the
+ *             seed of other random functions in the kernel. However, it is
+ *             essential to note that the generator used by the helper is not
+ *             cryptographically secure.
+ *     Return
+ *             A random 32-bit unsigned value.
+ *
+ * u32 bpf_get_smp_processor_id(void)
+ *     Description
+ *             Get the SMP (symmetric multiprocessing) processor id. Note that
+ *             all programs run with preemption disabled, which means that the
+ *             SMP processor id is stable during all the execution of the
+ *             program.
+ *     Return
+ *             The SMP id of the processor running the program.
+ *
+ * int bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags)
+ *     Description
+ *             Store *len* bytes from address *from* into the packet
+ *             associated to *skb*, at *offset*. *flags* are a combination of
+ *             **BPF_F_RECOMPUTE_CSUM** (automatically recompute the
+ *             checksum for the packet after storing the bytes) and
+ *             **BPF_F_INVALIDATE_HASH** (set *skb*\ **->hash**, *skb*\
+ *             **->swhash** and *skb*\ **->l4hash** to 0).
+ *
+ *             A call to this helper is susceptible to change the underlaying
+ *             packet buffer. Therefore, at load time, all checks on pointers
+ *             previously done by the verifier are invalidated and must be
+ *             performed again, if the helper is used in combination with
+ *             direct packet access.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * int bpf_l3_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 size)
+ *     Description
+ *             Recompute the layer 3 (e.g. IP) checksum for the packet
+ *             associated to *skb*. Computation is incremental, so the helper
+ *             must know the former value of the header field that was
+ *             modified (*from*), the new value of this field (*to*), and the
+ *             number of bytes (2 or 4) for this field, stored in *size*.
+ *             Alternatively, it is possible to store the difference between
+ *             the previous and the new values of the header field in *to*, by
+ *             setting *from* and *size* to 0. For both methods, *offset*
+ *             indicates the location of the IP checksum within the packet.
+ *
+ *             This helper works in combination with **bpf_csum_diff**\ (),
+ *             which does not update the checksum in-place, but offers more
+ *             flexibility and can handle sizes larger than 2 or 4 for the
+ *             checksum to update.
+ *
+ *             A call to this helper is susceptible to change the underlaying
+ *             packet buffer. Therefore, at load time, all checks on pointers
+ *             previously done by the verifier are invalidated and must be
+ *             performed again, if the helper is used in combination with
+ *             direct packet access.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * int bpf_l4_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 flags)
+ *     Description
+ *             Recompute the layer 4 (e.g. TCP, UDP or ICMP) checksum for the
+ *             packet associated to *skb*. Computation is incremental, so the
+ *             helper must know the former value of the header field that was
+ *             modified (*from*), the new value of this field (*to*), and the
+ *             number of bytes (2 or 4) for this field, stored on the lowest
+ *             four bits of *flags*. Alternatively, it is possible to store
+ *             the difference between the previous and the new values of the
+ *             header field in *to*, by setting *from* and the four lowest
+ *             bits of *flags* to 0. For both methods, *offset* indicates the
+ *             location of the IP checksum within the packet. In addition to
+ *             the size of the field, *flags* can be added (bitwise OR) actual
+ *             flags. With **BPF_F_MARK_MANGLED_0**, a null checksum is left
+ *             untouched (unless **BPF_F_MARK_ENFORCE** is added as well), and
+ *             for updates resulting in a null checksum the value is set to
+ *             **CSUM_MANGLED_0** instead. Flag **BPF_F_PSEUDO_HDR** indicates
+ *             the checksum is to be computed against a pseudo-header.
+ *
+ *             This helper works in combination with **bpf_csum_diff**\ (),
+ *             which does not update the checksum in-place, but offers more
+ *             flexibility and can handle sizes larger than 2 or 4 for the
+ *             checksum to update.
+ *
+ *             A call to this helper is susceptible to change the underlaying
+ *             packet buffer. Therefore, at load time, all checks on pointers
+ *             previously done by the verifier are invalidated and must be
+ *             performed again, if the helper is used in combination with
+ *             direct packet access.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * int bpf_tail_call(void *ctx, struct bpf_map *prog_array_map, u32 index)
+ *     Description
+ *             This special helper is used to trigger a "tail call", or in
+ *             other words, to jump into another eBPF program. The same stack
+ *             frame is used (but values on stack and in registers for the
+ *             caller are not accessible to the callee). This mechanism allows
+ *             for program chaining, either for raising the maximum number of
+ *             available eBPF instructions, or to execute given programs in
+ *             conditional blocks. For security reasons, there is an upper
+ *             limit to the number of successive tail calls that can be
+ *             performed.
+ *
+ *             Upon call of this helper, the program attempts to jump into a
+ *             program referenced at index *index* in *prog_array_map*, a
+ *             special map of type **BPF_MAP_TYPE_PROG_ARRAY**, and passes
+ *             *ctx*, a pointer to the context.
+ *
+ *             If the call succeeds, the kernel immediately runs the first
+ *             instruction of the new program. This is not a function call,
+ *             and it never returns to the previous program. If the call
+ *             fails, then the helper has no effect, and the caller continues
+ *             to run its subsequent instructions. A call can fail if the
+ *             destination program for the jump does not exist (i.e. *index*
+ *             is superior to the number of entries in *prog_array_map*), or
+ *             if the maximum number of tail calls has been reached for this
+ *             chain of programs. This limit is defined in the kernel by the
+ *             macro **MAX_TAIL_CALL_CNT** (not accessible to user space),
+ *             which is currently set to 32.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * int bpf_clone_redirect(struct sk_buff *skb, u32 ifindex, u64 flags)
+ *     Description
+ *             Clone and redirect the packet associated to *skb* to another
+ *             net device of index *ifindex*. Both ingress and egress
+ *             interfaces can be used for redirection. The **BPF_F_INGRESS**
+ *             value in *flags* is used to make the distinction (ingress path
+ *             is selected if the flag is present, egress path otherwise).
+ *             This is the only flag supported for now.
+ *
+ *             In comparison with **bpf_redirect**\ () helper,
+ *             **bpf_clone_redirect**\ () has the associated cost of
+ *             duplicating the packet buffer, but this can be executed out of
+ *             the eBPF program. Conversely, **bpf_redirect**\ () is more
+ *             efficient, but it is handled through an action code where the
+ *             redirection happens only after the eBPF program has returned.
+ *
+ *             A call to this helper is susceptible to change the underlaying
+ *             packet buffer. Therefore, at load time, all checks on pointers
+ *             previously done by the verifier are invalidated and must be
+ *             performed again, if the helper is used in combination with
+ *             direct packet access.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
  *
  * u64 bpf_get_current_pid_tgid(void)
- *     Return: current->tgid << 32 | current->pid
+ *     Return
+ *             A 64-bit integer containing the current tgid and pid, and
+ *             created as such:
+ *             *current_task*\ **->tgid << 32 \|**
+ *             *current_task*\ **->pid**.
  *
  * u64 bpf_get_current_uid_gid(void)
- *     Return: current_gid << 32 | current_uid
- *
- * int bpf_get_current_comm(char *buf, int size_of_buf)
- *     stores current->comm into buf
- *     Return: 0 on success or negative error
- *
- * u32 bpf_get_cgroup_classid(skb)
- *     retrieve a proc's classid
- *     @skb: pointer to skb
- *     Return: classid if != 0
- *
- * int bpf_skb_vlan_push(skb, vlan_proto, vlan_tci)
- *     Return: 0 on success or negative error
- *
- * int bpf_skb_vlan_pop(skb)
- *     Return: 0 on success or negative error
- *
- * int bpf_skb_get_tunnel_key(skb, key, size, flags)
- * int bpf_skb_set_tunnel_key(skb, key, size, flags)
- *     retrieve or populate tunnel metadata
- *     @skb: pointer to skb
- *     @key: pointer to 'struct bpf_tunnel_key'
- *     @size: size of 'struct bpf_tunnel_key'
- *     @flags: room for future extensions
- *     Return: 0 on success or negative error
- *
- * u64 bpf_perf_event_read(map, flags)
- *     read perf event counter value
- *     @map: pointer to perf_event_array map
- *     @flags: index of event in the map or bitmask flags
- *     Return: value of perf event counter read or error code
- *
- * int bpf_redirect(ifindex, flags)
- *     redirect to another netdev
- *     @ifindex: ifindex of the net device
- *     @flags:
- *       cls_bpf:
- *          bit 0 - if set, redirect to ingress instead of egress
- *          other bits - reserved
- *       xdp_bpf:
- *         all bits - reserved
- *     Return: cls_bpf: TC_ACT_REDIRECT on success or TC_ACT_SHOT on error
- *            xdp_bfp: XDP_REDIRECT on success or XDP_ABORT on error
- * int bpf_redirect_map(map, key, flags)
- *     redirect to endpoint in map
- *     @map: pointer to dev map
- *     @key: index in map to lookup
- *     @flags: --
- *     Return: XDP_REDIRECT on success or XDP_ABORT on error
- *
- * u32 bpf_get_route_realm(skb)
- *     retrieve a dst's tclassid
- *     @skb: pointer to skb
- *     Return: realm if != 0
- *
- * int bpf_perf_event_output(ctx, map, flags, data, size)
- *     output perf raw sample
- *     @ctx: struct pt_regs*
- *     @map: pointer to perf_event_array map
- *     @flags: index of event in the map or bitmask flags
- *     @data: data on stack to be output as raw data
- *     @size: size of data
- *     Return: 0 on success or negative error
- *
- * int bpf_get_stackid(ctx, map, flags)
- *     walk user or kernel stack and return id
- *     @ctx: struct pt_regs*
- *     @map: pointer to stack_trace map
- *     @flags: bits 0-7 - numer of stack frames to skip
- *             bit 8 - collect user stack instead of kernel
- *             bit 9 - compare stacks by hash only
- *             bit 10 - if two different stacks hash into the same stackid
- *                      discard old
- *             other bits - reserved
- *     Return: >= 0 stackid on success or negative error
- *
- * s64 bpf_csum_diff(from, from_size, to, to_size, seed)
- *     calculate csum diff
- *     @from: raw from buffer
- *     @from_size: length of from buffer
- *     @to: raw to buffer
- *     @to_size: length of to buffer
- *     @seed: optional seed
- *     Return: csum result or negative error code
- *
- * int bpf_skb_get_tunnel_opt(skb, opt, size)
- *     retrieve tunnel options metadata
- *     @skb: pointer to skb
- *     @opt: pointer to raw tunnel option data
- *     @size: size of @opt
- *     Return: option size
- *
- * int bpf_skb_set_tunnel_opt(skb, opt, size)
- *     populate tunnel options metadata
- *     @skb: pointer to skb
- *     @opt: pointer to raw tunnel option data
- *     @size: size of @opt
- *     Return: 0 on success or negative error
- *
- * int bpf_skb_change_proto(skb, proto, flags)
- *     Change protocol of the skb. Currently supported is v4 -> v6,
- *     v6 -> v4 transitions. The helper will also resize the skb. eBPF
- *     program is expected to fill the new headers via skb_store_bytes
- *     and lX_csum_replace.
- *     @skb: pointer to skb
- *     @proto: new skb->protocol type
- *     @flags: reserved
- *     Return: 0 on success or negative error
- *
- * int bpf_skb_change_type(skb, type)
- *     Change packet type of skb.
- *     @skb: pointer to skb
- *     @type: new skb->pkt_type type
- *     Return: 0 on success or negative error
- *
- * int bpf_skb_under_cgroup(skb, map, index)
- *     Check cgroup2 membership of skb
- *     @skb: pointer to skb
- *     @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
- *     @index: index of the cgroup in the bpf_map
- *     Return:
- *       == 0 skb failed the cgroup2 descendant test
- *       == 1 skb succeeded the cgroup2 descendant test
- *        < 0 error
- *
- * u32 bpf_get_hash_recalc(skb)
- *     Retrieve and possibly recalculate skb->hash.
- *     @skb: pointer to skb
- *     Return: hash
+ *     Return
+ *             A 64-bit integer containing the current GID and UID, and
+ *             created as such: *current_gid* **<< 32 \|** *current_uid*.
+ *
+ * int bpf_get_current_comm(char *buf, u32 size_of_buf)
+ *     Description
+ *             Copy the **comm** attribute of the current task into *buf* of
+ *             *size_of_buf*. The **comm** attribute contains the name of
+ *             the executable (excluding the path) for the current task. The
+ *             *size_of_buf* must be strictly positive. On success, the
+ *             helper makes sure that the *buf* is NUL-terminated. On failure,
+ *             it is filled with zeroes.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * u32 bpf_get_cgroup_classid(struct sk_buff *skb)
+ *     Description
+ *             Retrieve the classid for the current task, i.e. for the net_cls
+ *             cgroup to which *skb* belongs.
+ *
+ *             This helper can be used on TC egress path, but not on ingress.
+ *
+ *             The net_cls cgroup provides an interface to tag network packets
+ *             based on a user-provided identifier for all traffic coming from
+ *             the tasks belonging to the related cgroup. See also the related
+ *             kernel documentation, available from the Linux sources in file
+ *             *Documentation/cgroup-v1/net_cls.txt*.
+ *
+ *             The Linux kernel has two versions for cgroups: there are
+ *             cgroups v1 and cgroups v2. Both are available to users, who can
+ *             use a mixture of them, but note that the net_cls cgroup is for
+ *             cgroup v1 only. This makes it incompatible with BPF programs
+ *             run on cgroups, which is a cgroup-v2-only feature (a socket can
+ *             only hold data for one version of cgroups at a time).
+ *
+ *             This helper is only available is the kernel was compiled with
+ *             the **CONFIG_CGROUP_NET_CLASSID** configuration option set to
+ *             "**y**" or to "**m**".
+ *     Return
+ *             The classid, or 0 for the default unconfigured classid.
+ *
+ * int bpf_skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci)
+ *     Description
+ *             Push a *vlan_tci* (VLAN tag control information) of protocol
+ *             *vlan_proto* to the packet associated to *skb*, then update
+ *             the checksum. Note that if *vlan_proto* is different from
+ *             **ETH_P_8021Q** and **ETH_P_8021AD**, it is considered to
+ *             be **ETH_P_8021Q**.
+ *
+ *             A call to this helper is susceptible to change the underlaying
+ *             packet buffer. Therefore, at load time, all checks on pointers
+ *             previously done by the verifier are invalidated and must be
+ *             performed again, if the helper is used in combination with
+ *             direct packet access.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_vlan_pop(struct sk_buff *skb)
+ *     Description
+ *             Pop a VLAN header from the packet associated to *skb*.
+ *
+ *             A call to this helper is susceptible to change the underlaying
+ *             packet buffer. Therefore, at load time, all checks on pointers
+ *             previously done by the verifier are invalidated and must be
+ *             performed again, if the helper is used in combination with
+ *             direct packet access.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_get_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags)
+ *     Description
+ *             Get tunnel metadata. This helper takes a pointer *key* to an
+ *             empty **struct bpf_tunnel_key** of **size**, that will be
+ *             filled with tunnel metadata for the packet associated to *skb*.
+ *             The *flags* can be set to **BPF_F_TUNINFO_IPV6**, which
+ *             indicates that the tunnel is based on IPv6 protocol instead of
+ *             IPv4.
+ *
+ *             The **struct bpf_tunnel_key** is an object that generalizes the
+ *             principal parameters used by various tunneling protocols into a
+ *             single struct. This way, it can be used to easily make a
+ *             decision based on the contents of the encapsulation header,
+ *             "summarized" in this struct. In particular, it holds the IP
+ *             address of the remote end (IPv4 or IPv6, depending on the case)
+ *             in *key*\ **->remote_ipv4** or *key*\ **->remote_ipv6**. Also,
+ *             this struct exposes the *key*\ **->tunnel_id**, which is
+ *             generally mapped to a VNI (Virtual Network Identifier), making
+ *             it programmable together with the **bpf_skb_set_tunnel_key**\
+ *             () helper.
+ *
+ *             Let's imagine that the following code is part of a program
+ *             attached to the TC ingress interface, on one end of a GRE
+ *             tunnel, and is supposed to filter out all messages coming from
+ *             remote ends with IPv4 address other than 10.0.0.1:
+ *
+ *             ::
+ *
+ *                     int ret;
+ *                     struct bpf_tunnel_key key = {};
+ *                     
+ *                     ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
+ *                     if (ret < 0)
+ *                             return TC_ACT_SHOT;     // drop packet
+ *                     
+ *                     if (key.remote_ipv4 != 0x0a000001)
+ *                             return TC_ACT_SHOT;     // drop packet
+ *                     
+ *                     return TC_ACT_OK;               // accept packet
+ *
+ *             This interface can also be used with all encapsulation devices
+ *             that can operate in "collect metadata" mode: instead of having
+ *             one network device per specific configuration, the "collect
+ *             metadata" mode only requires a single device where the
+ *             configuration can be extracted from this helper.
+ *
+ *             This can be used together with various tunnels such as VXLan,
+ *             Geneve, GRE or IP in IP (IPIP).
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_set_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags)
+ *     Description
+ *             Populate tunnel metadata for packet associated to *skb.* The
+ *             tunnel metadata is set to the contents of *key*, of *size*. The
+ *             *flags* can be set to a combination of the following values:
+ *
+ *             **BPF_F_TUNINFO_IPV6**
+ *                     Indicate that the tunnel is based on IPv6 protocol
+ *                     instead of IPv4.
+ *             **BPF_F_ZERO_CSUM_TX**
+ *                     For IPv4 packets, add a flag to tunnel metadata
+ *                     indicating that checksum computation should be skipped
+ *                     and checksum set to zeroes.
+ *             **BPF_F_DONT_FRAGMENT**
+ *                     Add a flag to tunnel metadata indicating that the
+ *                     packet should not be fragmented.
+ *             **BPF_F_SEQ_NUMBER**
+ *                     Add a flag to tunnel metadata indicating that a
+ *                     sequence number should be added to tunnel header before
+ *                     sending the packet. This flag was added for GRE
+ *                     encapsulation, but might be used with other protocols
+ *                     as well in the future.
+ *
+ *             Here is a typical usage on the transmit path:
+ *
+ *             ::
+ *
+ *                     struct bpf_tunnel_key key;
+ *                          populate key ...
+ *                     bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0);
+ *                     bpf_clone_redirect(skb, vxlan_dev_ifindex, 0);
+ *
+ *             See also the description of the **bpf_skb_get_tunnel_key**\ ()
+ *             helper for additional information.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * u64 bpf_perf_event_read(struct bpf_map *map, u64 flags)
+ *     Description
+ *             Read the value of a perf event counter. This helper relies on a
+ *             *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. The nature of
+ *             the perf event counter is selected when *map* is updated with
+ *             perf event file descriptors. The *map* is an array whose size
+ *             is the number of available CPUs, and each cell contains a value
+ *             relative to one CPU. The value to retrieve is indicated by
+ *             *flags*, that contains the index of the CPU to look up, masked
+ *             with **BPF_F_INDEX_MASK**. Alternatively, *flags* can be set to
+ *             **BPF_F_CURRENT_CPU** to indicate that the value for the
+ *             current CPU should be retrieved.
+ *
+ *             Note that before Linux 4.13, only hardware perf event can be
+ *             retrieved.
+ *
+ *             Also, be aware that the newer helper
+ *             **bpf_perf_event_read_value**\ () is recommended over
+ *             **bpf_perf_event_read**\ () in general. The latter has some ABI
+ *             quirks where error and counter value are used as a return code
+ *             (which is wrong to do since ranges may overlap). This issue is
+ *             fixed with **bpf_perf_event_read_value**\ (), which at the same
+ *             time provides more features over the **bpf_perf_event_read**\
+ *             () interface. Please refer to the description of
+ *             **bpf_perf_event_read_value**\ () for details.
+ *     Return
+ *             The value of the perf event counter read from the map, or a
+ *             negative error code in case of failure.
+ *
+ * int bpf_redirect(u32 ifindex, u64 flags)
+ *     Description
+ *             Redirect the packet to another net device of index *ifindex*.
+ *             This helper is somewhat similar to **bpf_clone_redirect**\
+ *             (), except that the packet is not cloned, which provides
+ *             increased performance.
+ *
+ *             Except for XDP, both ingress and egress interfaces can be used
+ *             for redirection. The **BPF_F_INGRESS** value in *flags* is used
+ *             to make the distinction (ingress path is selected if the flag
+ *             is present, egress path otherwise). Currently, XDP only
+ *             supports redirection to the egress interface, and accepts no
+ *             flag at all.
+ *
+ *             The same effect can be attained with the more generic
+ *             **bpf_redirect_map**\ (), which requires specific maps to be
+ *             used but offers better performance.
+ *     Return
+ *             For XDP, the helper returns **XDP_REDIRECT** on success or
+ *             **XDP_ABORTED** on error. For other program types, the values
+ *             are **TC_ACT_REDIRECT** on success or **TC_ACT_SHOT** on
+ *             error.
+ *
+ * u32 bpf_get_route_realm(struct sk_buff *skb)
+ *     Description
+ *             Retrieve the realm or the route, that is to say the
+ *             **tclassid** field of the destination for the *skb*. The
+ *             indentifier retrieved is a user-provided tag, similar to the
+ *             one used with the net_cls cgroup (see description for
+ *             **bpf_get_cgroup_classid**\ () helper), but here this tag is
+ *             held by a route (a destination entry), not by a task.
+ *
+ *             Retrieving this identifier works with the clsact TC egress hook
+ *             (see also **tc-bpf(8)**), or alternatively on conventional
+ *             classful egress qdiscs, but not on TC ingress path. In case of
+ *             clsact TC egress hook, this has the advantage that, internally,
+ *             the destination entry has not been dropped yet in the transmit
+ *             path. Therefore, the destination entry does not need to be
+ *             artificially held via **netif_keep_dst**\ () for a classful
+ *             qdisc until the *skb* is freed.
+ *
+ *             This helper is available only if the kernel was compiled with
+ *             **CONFIG_IP_ROUTE_CLASSID** configuration option.
+ *     Return
+ *             The realm of the route for the packet associated to *skb*, or 0
+ *             if none was found.
+ *
+ * int bpf_perf_event_output(struct pt_reg *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
+ *     Description
+ *             Write raw *data* blob into a special BPF perf event held by
+ *             *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf
+ *             event must have the following attributes: **PERF_SAMPLE_RAW**
+ *             as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and
+ *             **PERF_COUNT_SW_BPF_OUTPUT** as **config**.
+ *
+ *             The *flags* are used to indicate the index in *map* for which
+ *             the value must be put, masked with **BPF_F_INDEX_MASK**.
+ *             Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU**
+ *             to indicate that the index of the current CPU core should be
+ *             used.
+ *
+ *             The value to write, of *size*, is passed through eBPF stack and
+ *             pointed by *data*.
+ *
+ *             The context of the program *ctx* needs also be passed to the
+ *             helper.
+ *
+ *             On user space, a program willing to read the values needs to
+ *             call **perf_event_open**\ () on the perf event (either for
+ *             one or for all CPUs) and to store the file descriptor into the
+ *             *map*. This must be done before the eBPF program can send data
+ *             into it. An example is available in file
+ *             *samples/bpf/trace_output_user.c* in the Linux kernel source
+ *             tree (the eBPF program counterpart is in
+ *             *samples/bpf/trace_output_kern.c*).
+ *
+ *             **bpf_perf_event_output**\ () achieves better performance
+ *             than **bpf_trace_printk**\ () for sharing data with user
+ *             space, and is much better suitable for streaming data from eBPF
+ *             programs.
+ *
+ *             Note that this helper is not restricted to tracing use cases
+ *             and can be used with programs attached to TC or XDP as well,
+ *             where it allows for passing data to user space listeners. Data
+ *             can be:
+ *
+ *             * Only custom structs,
+ *             * Only the packet payload, or
+ *             * A combination of both.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset, void *to, u32 len)
+ *     Description
+ *             This helper was provided as an easy way to load data from a
+ *             packet. It can be used to load *len* bytes from *offset* from
+ *             the packet associated to *skb*, into the buffer pointed by
+ *             *to*.
+ *
+ *             Since Linux 4.7, usage of this helper has mostly been replaced
+ *             by "direct packet access", enabling packet data to be
+ *             manipulated with *skb*\ **->data** and *skb*\ **->data_end**
+ *             pointing respectively to the first byte of packet data and to
+ *             the byte after the last byte of packet data. However, it
+ *             remains useful if one wishes to read large quantities of data
+ *             at once from a packet into the eBPF stack.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * int bpf_get_stackid(struct pt_reg *ctx, struct bpf_map *map, u64 flags)
+ *     Description
+ *             Walk a user or a kernel stack and return its id. To achieve
+ *             this, the helper needs *ctx*, which is a pointer to the context
+ *             on which the tracing program is executed, and a pointer to a
+ *             *map* of type **BPF_MAP_TYPE_STACK_TRACE**.
+ *
+ *             The last argument, *flags*, holds the number of stack frames to
+ *             skip (from 0 to 255), masked with
+ *             **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set
+ *             a combination of the following flags:
+ *
+ *             **BPF_F_USER_STACK**
+ *                     Collect a user space stack instead of a kernel stack.
+ *             **BPF_F_FAST_STACK_CMP**
+ *                     Compare stacks by hash only.
+ *             **BPF_F_REUSE_STACKID**
+ *                     If two different stacks hash into the same *stackid*,
+ *                     discard the old one.
+ *
+ *             The stack id retrieved is a 32 bit long integer handle which
+ *             can be further combined with other data (including other stack
+ *             ids) and used as a key into maps. This can be useful for
+ *             generating a variety of graphs (such as flame graphs or off-cpu
+ *             graphs).
+ *
+ *             For walking a stack, this helper is an improvement over
+ *             **bpf_probe_read**\ (), which can be used with unrolled loops
+ *             but is not efficient and consumes a lot of eBPF instructions.
+ *             Instead, **bpf_get_stackid**\ () can collect up to
+ *             **PERF_MAX_STACK_DEPTH** both kernel and user frames. Note that
+ *             this limit can be controlled with the **sysctl** program, and
+ *             that it should be manually increased in order to profile long
+ *             user stacks (such as stacks for Java programs). To do so, use:
+ *
+ *             ::
+ *
+ *                     # sysctl kernel.perf_event_max_stack=<new value>
+ *
+ *     Return
+ *             The positive or null stack id on success, or a negative error
+ *             in case of failure.
+ *
+ * s64 bpf_csum_diff(__be32 *from, u32 from_size, __be32 *to, u32 to_size, __wsum seed)
+ *     Description
+ *             Compute a checksum difference, from the raw buffer pointed by
+ *             *from*, of length *from_size* (that must be a multiple of 4),
+ *             towards the raw buffer pointed by *to*, of size *to_size*
+ *             (same remark). An optional *seed* can be added to the value
+ *             (this can be cascaded, the seed may come from a previous call
+ *             to the helper).
+ *
+ *             This is flexible enough to be used in several ways:
+ *
+ *             * With *from_size* == 0, *to_size* > 0 and *seed* set to
+ *               checksum, it can be used when pushing new data.
+ *             * With *from_size* > 0, *to_size* == 0 and *seed* set to
+ *               checksum, it can be used when removing data from a packet.
+ *             * With *from_size* > 0, *to_size* > 0 and *seed* set to 0, it
+ *               can be used to compute a diff. Note that *from_size* and
+ *               *to_size* do not need to be equal.
+ *
+ *             This helper can be used in combination with
+ *             **bpf_l3_csum_replace**\ () and **bpf_l4_csum_replace**\ (), to
+ *             which one can feed in the difference computed with
+ *             **bpf_csum_diff**\ ().
+ *     Return
+ *             The checksum result, or a negative error code in case of
+ *             failure.
+ *
+ * int bpf_skb_get_tunnel_opt(struct sk_buff *skb, u8 *opt, u32 size)
+ *     Description
+ *             Retrieve tunnel options metadata for the packet associated to
+ *             *skb*, and store the raw tunnel option data to the buffer *opt*
+ *             of *size*.
+ *
+ *             This helper can be used with encapsulation devices that can
+ *             operate in "collect metadata" mode (please refer to the related
+ *             note in the description of **bpf_skb_get_tunnel_key**\ () for
+ *             more details). A particular example where this can be used is
+ *             in combination with the Geneve encapsulation protocol, where it
+ *             allows for pushing (with **bpf_skb_get_tunnel_opt**\ () helper)
+ *             and retrieving arbitrary TLVs (Type-Length-Value headers) from
+ *             the eBPF program. This allows for full customization of these
+ *             headers.
+ *     Return
+ *             The size of the option data retrieved.
+ *
+ * int bpf_skb_set_tunnel_opt(struct sk_buff *skb, u8 *opt, u32 size)
+ *     Description
+ *             Set tunnel options metadata for the packet associated to *skb*
+ *             to the option data contained in the raw buffer *opt* of *size*.
+ *
+ *             See also the description of the **bpf_skb_get_tunnel_opt**\ ()
+ *             helper for additional information.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_change_proto(struct sk_buff *skb, __be16 proto, u64 flags)
+ *     Description
+ *             Change the protocol of the *skb* to *proto*. Currently
+ *             supported are transition from IPv4 to IPv6, and from IPv6 to
+ *             IPv4. The helper takes care of the groundwork for the
+ *             transition, including resizing the socket buffer. The eBPF
+ *             program is expected to fill the new headers, if any, via
+ *             **skb_store_bytes**\ () and to recompute the checksums with
+ *             **bpf_l3_csum_replace**\ () and **bpf_l4_csum_replace**\
+ *             (). The main case for this helper is to perform NAT64
+ *             operations out of an eBPF program.
+ *
+ *             Internally, the GSO type is marked as dodgy so that headers are
+ *             checked and segments are recalculated by the GSO/GRO engine.
+ *             The size for GSO target is adapted as well.
+ *
+ *             All values for *flags* are reserved for future usage, and must
+ *             be left at zero.
+ *
+ *             A call to this helper is susceptible to change the underlaying
+ *             packet buffer. Therefore, at load time, all checks on pointers
+ *             previously done by the verifier are invalidated and must be
+ *             performed again, if the helper is used in combination with
+ *             direct packet access.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_change_type(struct sk_buff *skb, u32 type)
+ *     Description
+ *             Change the packet type for the packet associated to *skb*. This
+ *             comes down to setting *skb*\ **->pkt_type** to *type*, except
+ *             the eBPF program does not have a write access to *skb*\
+ *             **->pkt_type** beside this helper. Using a helper here allows
+ *             for graceful handling of errors.
+ *
+ *             The major use case is to change incoming *skb*s to
+ *             **PACKET_HOST** in a programmatic way instead of having to
+ *             recirculate via **redirect**\ (..., **BPF_F_INGRESS**), for
+ *             example.
+ *
+ *             Note that *type* only allows certain values. At this time, they
+ *             are:
+ *
+ *             **PACKET_HOST**
+ *                     Packet is for us.
+ *             **PACKET_BROADCAST**
+ *                     Send packet to all.
+ *             **PACKET_MULTICAST**
+ *                     Send packet to group.
+ *             **PACKET_OTHERHOST**
+ *                     Send packet to someone else.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_under_cgroup(struct sk_buff *skb, struct bpf_map *map, u32 index)
+ *     Description
+ *             Check whether *skb* is a descendant of the cgroup2 held by
+ *             *map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*.
+ *     Return
+ *             The return value depends on the result of the test, and can be:
+ *
+ *             * 0, if the *skb* failed the cgroup2 descendant test.
+ *             * 1, if the *skb* succeeded the cgroup2 descendant test.
+ *             * A negative error code, if an error occurred.
+ *
+ * u32 bpf_get_hash_recalc(struct sk_buff *skb)
+ *     Description
+ *             Retrieve the hash of the packet, *skb*\ **->hash**. If it is
+ *             not set, in particular if the hash was cleared due to mangling,
+ *             recompute this hash. Later accesses to the hash can be done
+ *             directly with *skb*\ **->hash**.
+ *
+ *             Calling **bpf_set_hash_invalid**\ (), changing a packet
+ *             prototype with **bpf_skb_change_proto**\ (), or calling
+ *             **bpf_skb_store_bytes**\ () with the
+ *             **BPF_F_INVALIDATE_HASH** are actions susceptible to clear
+ *             the hash and to trigger a new computation for the next call to
+ *             **bpf_get_hash_recalc**\ ().
+ *     Return
+ *             The 32-bit hash.
  *
  * u64 bpf_get_current_task(void)
- *     Returns current task_struct
- *     Return: current
- *
- * int bpf_probe_write_user(void *dst, void *src, int len)
- *     safely attempt to write to a location
- *     @dst: destination address in userspace
- *     @src: source address on stack
- *     @len: number of bytes to copy
- *     Return: 0 on success or negative error
- *
- * int bpf_current_task_under_cgroup(map, index)
- *     Check cgroup2 membership of current task
- *     @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
- *     @index: index of the cgroup in the bpf_map
- *     Return:
- *       == 0 current failed the cgroup2 descendant test
- *       == 1 current succeeded the cgroup2 descendant test
- *        < 0 error
- *
- * int bpf_skb_change_tail(skb, len, flags)
- *     The helper will resize the skb to the given new size, to be used f.e.
- *     with control messages.
- *     @skb: pointer to skb
- *     @len: new skb length
- *     @flags: reserved
- *     Return: 0 on success or negative error
- *
- * int bpf_skb_pull_data(skb, len)
- *     The helper will pull in non-linear data in case the skb is non-linear
- *     and not all of len are part of the linear section. Only needed for
- *     read/write with direct packet access.
- *     @skb: pointer to skb
- *     @len: len to make read/writeable
- *     Return: 0 on success or negative error
- *
- * s64 bpf_csum_update(skb, csum)
- *     Adds csum into skb->csum in case of CHECKSUM_COMPLETE.
- *     @skb: pointer to skb
- *     @csum: csum to add
- *     Return: csum on success or negative error
- *
- * void bpf_set_hash_invalid(skb)
- *     Invalidate current skb->hash.
- *     @skb: pointer to skb
- *
- * int bpf_get_numa_node_id()
- *     Return: Id of current NUMA node.
- *
- * int bpf_skb_change_head()
- *     Grows headroom of skb and adjusts MAC header offset accordingly.
- *     Will extends/reallocae as required automatically.
- *     May change skb data pointer and will thus invalidate any check
- *     performed for direct packet access.
- *     @skb: pointer to skb
- *     @len: length of header to be pushed in front
- *     @flags: Flags (unused for now)
- *     Return: 0 on success or negative error
- *
- * int bpf_xdp_adjust_head(xdp_md, delta)
- *     Adjust the xdp_md.data by delta
- *     @xdp_md: pointer to xdp_md
- *     @delta: An positive/negative integer to be added to xdp_md.data
- *     Return: 0 on success or negative on error
+ *     Return
+ *             A pointer to the current task struct.
+ *
+ * int bpf_probe_write_user(void *dst, const void *src, u32 len)
+ *     Description
+ *             Attempt in a safe way to write *len* bytes from the buffer
+ *             *src* to *dst* in memory. It only works for threads that are in
+ *             user context, and *dst* must be a valid user space address.
+ *
+ *             This helper should not be used to implement any kind of
+ *             security mechanism because of TOC-TOU attacks, but rather to
+ *             debug, divert, and manipulate execution of semi-cooperative
+ *             processes.
+ *
+ *             Keep in mind that this feature is meant for experiments, and it
+ *             has a risk of crashing the system and running programs.
+ *             Therefore, when an eBPF program using this helper is attached,
+ *             a warning including PID and process name is printed to kernel
+ *             logs.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * int bpf_current_task_under_cgroup(struct bpf_map *map, u32 index)
+ *     Description
+ *             Check whether the probe is being run is the context of a given
+ *             subset of the cgroup2 hierarchy. The cgroup2 to test is held by
+ *             *map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*.
+ *     Return
+ *             The return value depends on the result of the test, and can be:
+ *
+ *             * 0, if the *skb* task belongs to the cgroup2.
+ *             * 1, if the *skb* task does not belong to the cgroup2.
+ *             * A negative error code, if an error occurred.
+ *
+ * int bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags)
+ *     Description
+ *             Resize (trim or grow) the packet associated to *skb* to the
+ *             new *len*. The *flags* are reserved for future usage, and must
+ *             be left at zero.
+ *
+ *             The basic idea is that the helper performs the needed work to
+ *             change the size of the packet, then the eBPF program rewrites
+ *             the rest via helpers like **bpf_skb_store_bytes**\ (),
+ *             **bpf_l3_csum_replace**\ (), **bpf_l3_csum_replace**\ ()
+ *             and others. This helper is a slow path utility intended for
+ *             replies with control messages. And because it is targeted for
+ *             slow path, the helper itself can afford to be slow: it
+ *             implicitly linearizes, unclones and drops offloads from the
+ *             *skb*.
+ *
+ *             A call to this helper is susceptible to change the underlaying
+ *             packet buffer. Therefore, at load time, all checks on pointers
+ *             previously done by the verifier are invalidated and must be
+ *             performed again, if the helper is used in combination with
+ *             direct packet access.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_pull_data(struct sk_buff *skb, u32 len)
+ *     Description
+ *             Pull in non-linear data in case the *skb* is non-linear and not
+ *             all of *len* are part of the linear section. Make *len* bytes
+ *             from *skb* readable and writable. If a zero value is passed for
+ *             *len*, then the whole length of the *skb* is pulled.
+ *
+ *             This helper is only needed for reading and writing with direct
+ *             packet access.
+ *
+ *             For direct packet access, testing that offsets to access
+ *             are within packet boundaries (test on *skb*\ **->data_end**) is
+ *             susceptible to fail if offsets are invalid, or if the requested
+ *             data is in non-linear parts of the *skb*. On failure the
+ *             program can just bail out, or in the case of a non-linear
+ *             buffer, use a helper to make the data available. The
+ *             **bpf_skb_load_bytes**\ () helper is a first solution to access
+ *             the data. Another one consists in using **bpf_skb_pull_data**
+ *             to pull in once the non-linear parts, then retesting and
+ *             eventually access the data.
+ *
+ *             At the same time, this also makes sure the *skb* is uncloned,
+ *             which is a necessary condition for direct write. As this needs
+ *             to be an invariant for the write part only, the verifier
+ *             detects writes and adds a prologue that is calling
+ *             **bpf_skb_pull_data()** to effectively unclone the *skb* from
+ *             the very beginning in case it is indeed cloned.
+ *
+ *             A call to this helper is susceptible to change the underlaying
+ *             packet buffer. Therefore, at load time, all checks on pointers
+ *             previously done by the verifier are invalidated and must be
+ *             performed again, if the helper is used in combination with
+ *             direct packet access.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * s64 bpf_csum_update(struct sk_buff *skb, __wsum csum)
+ *     Description
+ *             Add the checksum *csum* into *skb*\ **->csum** in case the
+ *             driver has supplied a checksum for the entire packet into that
+ *             field. Return an error otherwise. This helper is intended to be
+ *             used in combination with **bpf_csum_diff**\ (), in particular
+ *             when the checksum needs to be updated after data has been
+ *             written into the packet through direct packet access.
+ *     Return
+ *             The checksum on success, or a negative error code in case of
+ *             failure.
+ *
+ * void bpf_set_hash_invalid(struct sk_buff *skb)
+ *     Description
+ *             Invalidate the current *skb*\ **->hash**. It can be used after
+ *             mangling on headers through direct packet access, in order to
+ *             indicate that the hash is outdated and to trigger a
+ *             recalculation the next time the kernel tries to access this
+ *             hash or when the **bpf_get_hash_recalc**\ () helper is called.
+ *
+ * int bpf_get_numa_node_id(void)
+ *     Description
+ *             Return the id of the current NUMA node. The primary use case
+ *             for this helper is the selection of sockets for the local NUMA
+ *             node, when the program is attached to sockets using the
+ *             **SO_ATTACH_REUSEPORT_EBPF** option (see also **socket(7)**),
+ *             but the helper is also available to other eBPF program types,
+ *             similarly to **bpf_get_smp_processor_id**\ ().
+ *     Return
+ *             The id of current NUMA node.
+ *
+ * int bpf_skb_change_head(struct sk_buff *skb, u32 len, u64 flags)
+ *     Description
+ *             Grows headroom of packet associated to *skb* and adjusts the
+ *             offset of the MAC header accordingly, adding *len* bytes of
+ *             space. It automatically extends and reallocates memory as
+ *             required.
+ *
+ *             This helper can be used on a layer 3 *skb* to push a MAC header
+ *             for redirection into a layer 2 device.
+ *
+ *             All values for *flags* are reserved for future usage, and must
+ *             be left at zero.
+ *
+ *             A call to this helper is susceptible to change the underlaying
+ *             packet buffer. Therefore, at load time, all checks on pointers
+ *             previously done by the verifier are invalidated and must be
+ *             performed again, if the helper is used in combination with
+ *             direct packet access.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * int bpf_xdp_adjust_head(struct xdp_buff *xdp_md, int delta)
+ *     Description
+ *             Adjust (move) *xdp_md*\ **->data** by *delta* bytes. Note that
+ *             it is possible to use a negative value for *delta*. This helper
+ *             can be used to prepare the packet for pushing or popping
+ *             headers.
+ *
+ *             A call to this helper is susceptible to change the underlaying
+ *             packet buffer. Therefore, at load time, all checks on pointers
+ *             previously done by the verifier are invalidated and must be
+ *             performed again, if the helper is used in combination with
+ *             direct packet access.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
  *
  * int bpf_probe_read_str(void *dst, int size, const void *unsafe_ptr)
- *     Copy a NUL terminated string from unsafe address. In case the string
- *     length is smaller than size, the target is not padded with further NUL
- *     bytes. In case the string length is larger than size, just count-1
- *     bytes are copied and the last byte is set to NUL.
- *     @dst: destination address
- *     @size: maximum number of bytes to copy, including the trailing NUL
- *     @unsafe_ptr: unsafe address
- *     Return:
- *       > 0 length of the string including the trailing NUL on success
- *       < 0 error
- *
- * u64 bpf_get_socket_cookie(skb)
- *     Get the cookie for the socket stored inside sk_buff.
- *     @skb: pointer to skb
- *     Return: 8 Bytes non-decreasing number on success or 0 if the socket
- *     field is missing inside sk_buff
- *
- * u32 bpf_get_socket_uid(skb)
- *     Get the owner uid of the socket stored inside sk_buff.
- *     @skb: pointer to skb
- *     Return: uid of the socket owner on success or overflowuid if failed.
- *
- * u32 bpf_set_hash(skb, hash)
- *     Set full skb->hash.
- *     @skb: pointer to skb
- *     @hash: hash to set
- *
- * int bpf_setsockopt(bpf_socket, level, optname, optval, optlen)
- *     Calls setsockopt. Not all opts are available, only those with
- *     integer optvals plus TCP_CONGESTION.
- *     Supported levels: SOL_SOCKET and IPPROTO_TCP
- *     @bpf_socket: pointer to bpf_socket
- *     @level: SOL_SOCKET or IPPROTO_TCP
- *     @optname: option name
- *     @optval: pointer to option value
- *     @optlen: length of optval in bytes
- *     Return: 0 or negative error
- *
- * int bpf_getsockopt(bpf_socket, level, optname, optval, optlen)
- *     Calls getsockopt. Not all opts are available.
- *     Supported levels: IPPROTO_TCP
- *     @bpf_socket: pointer to bpf_socket
- *     @level: IPPROTO_TCP
- *     @optname: option name
- *     @optval: pointer to option value
- *     @optlen: length of optval in bytes
- *     Return: 0 or negative error
- *
- * int bpf_sock_ops_cb_flags_set(bpf_sock_ops, flags)
- *     Set callback flags for sock_ops
- *     @bpf_sock_ops: pointer to bpf_sock_ops_kern struct
- *     @flags: flags value
- *     Return: 0 for no error
- *             -EINVAL if there is no full tcp socket
- *             bits in flags that are not supported by current kernel
- *
- * int bpf_skb_adjust_room(skb, len_diff, mode, flags)
- *     Grow or shrink room in sk_buff.
- *     @skb: pointer to skb
- *     @len_diff: (signed) amount of room to grow/shrink
- *     @mode: operation mode (enum bpf_adj_room_mode)
- *     @flags: reserved for future use
- *     Return: 0 on success or negative error code
- *
- * int bpf_sk_redirect_map(map, key, flags)
- *     Redirect skb to a sock in map using key as a lookup key for the
- *     sock in map.
- *     @map: pointer to sockmap
- *     @key: key to lookup sock in map
- *     @flags: reserved for future use
- *     Return: SK_PASS
- *
- * int bpf_sock_map_update(skops, map, key, flags)
- *     @skops: pointer to bpf_sock_ops
- *     @map: pointer to sockmap to update
- *     @key: key to insert/update sock in map
- *     @flags: same flags as map update elem
- *
- * int bpf_xdp_adjust_meta(xdp_md, delta)
- *     Adjust the xdp_md.data_meta by delta
- *     @xdp_md: pointer to xdp_md
- *     @delta: An positive/negative integer to be added to xdp_md.data_meta
- *     Return: 0 on success or negative on error
- *
- * int bpf_perf_event_read_value(map, flags, buf, buf_size)
- *     read perf event counter value and perf event enabled/running time
- *     @map: pointer to perf_event_array map
- *     @flags: index of event in the map or bitmask flags
- *     @buf: buf to fill
- *     @buf_size: size of the buf
- *     Return: 0 on success or negative error code
- *
- * int bpf_perf_prog_read_value(ctx, buf, buf_size)
- *     read perf prog attached perf event counter and enabled/running time
- *     @ctx: pointer to ctx
- *     @buf: buf to fill
- *     @buf_size: size of the buf
- *     Return : 0 on success or negative error code
- *
- * int bpf_override_return(pt_regs, rc)
- *     @pt_regs: pointer to struct pt_regs
- *     @rc: the return value to set
- *
- * int bpf_msg_redirect_map(map, key, flags)
- *     Redirect msg to a sock in map using key as a lookup key for the
- *     sock in map.
- *     @map: pointer to sockmap
- *     @key: key to lookup sock in map
- *     @flags: reserved for future use
- *     Return: SK_PASS
- *
- * int bpf_bind(ctx, addr, addr_len)
- *     Bind socket to address. Only binding to IP is supported, no port can be
- *     set in addr.
- *     @ctx: pointer to context of type bpf_sock_addr
- *     @addr: pointer to struct sockaddr to bind socket to
- *     @addr_len: length of sockaddr structure
- *     Return: 0 on success or negative error code
- *
- * int bpf_xdp_adjust_tail(xdp_md, delta)
- *     Adjust the xdp_md.data_end by delta. Only shrinking of packet's
- *     size is supported.
- *     @xdp_md: pointer to xdp_md
- *     @delta: A negative integer to be added to xdp_md.data_end
- *     Return: 0 on success or negative on error
+ *     Description
+ *             Copy a NUL terminated string from an unsafe address
+ *             *unsafe_ptr* to *dst*. The *size* should include the
+ *             terminating NUL byte. In case the string length is smaller than
+ *             *size*, the target is not padded with further NUL bytes. If the
+ *             string length is larger than *size*, just *size*-1 bytes are
+ *             copied and the last byte is set to NUL.
+ *
+ *             On success, the length of the copied string is returned. This
+ *             makes this helper useful in tracing programs for reading
+ *             strings, and more importantly to get its length at runtime. See
+ *             the following snippet:
+ *
+ *             ::
+ *
+ *                     SEC("kprobe/sys_open")
+ *                     void bpf_sys_open(struct pt_regs *ctx)
+ *                     {
+ *                             char buf[PATHLEN]; // PATHLEN is defined to 256
+ *                             int res = bpf_probe_read_str(buf, sizeof(buf),
+ *                                                          ctx->di);
+ *
+ *                             // Consume buf, for example push it to
+ *                             // userspace via bpf_perf_event_output(); we
+ *                             // can use res (the string length) as event
+ *                             // size, after checking its boundaries.
+ *                     }
+ *
+ *             In comparison, using **bpf_probe_read()** helper here instead
+ *             to read the string would require to estimate the length at
+ *             compile time, and would often result in copying more memory
+ *             than necessary.
+ *
+ *             Another useful use case is when parsing individual process
+ *             arguments or individual environment variables navigating
+ *             *current*\ **->mm->arg_start** and *current*\
+ *             **->mm->env_start**: using this helper and the return value,
+ *             one can quickly iterate at the right offset of the memory area.
+ *     Return
+ *             On success, the strictly positive length of the string,
+ *             including the trailing NUL character. On error, a negative
+ *             value.
+ *
+ * u64 bpf_get_socket_cookie(struct sk_buff *skb)
+ *     Description
+ *             If the **struct sk_buff** pointed by *skb* has a known socket,
+ *             retrieve the cookie (generated by the kernel) of this socket.
+ *             If no cookie has been set yet, generate a new cookie. Once
+ *             generated, the socket cookie remains stable for the life of the
+ *             socket. This helper can be useful for monitoring per socket
+ *             networking traffic statistics as it provides a unique socket
+ *             identifier per namespace.
+ *     Return
+ *             A 8-byte long non-decreasing number on success, or 0 if the
+ *             socket field is missing inside *skb*.
+ *
+ * u32 bpf_get_socket_uid(struct sk_buff *skb)
+ *     Return
+ *             The owner UID of the socket associated to *skb*. If the socket
+ *             is **NULL**, or if it is not a full socket (i.e. if it is a
+ *             time-wait or a request socket instead), **overflowuid** value
+ *             is returned (note that **overflowuid** might also be the actual
+ *             UID value for the socket).
+ *
+ * u32 bpf_set_hash(struct sk_buff *skb, u32 hash)
+ *     Description
+ *             Set the full hash for *skb* (set the field *skb*\ **->hash**)
+ *             to value *hash*.
+ *     Return
+ *             0
+ *
+ * int bpf_setsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, char *optval, int optlen)
+ *     Description
+ *             Emulate a call to **setsockopt()** on the socket associated to
+ *             *bpf_socket*, which must be a full socket. The *level* at
+ *             which the option resides and the name *optname* of the option
+ *             must be specified, see **setsockopt(2)** for more information.
+ *             The option value of length *optlen* is pointed by *optval*.
+ *
+ *             This helper actually implements a subset of **setsockopt()**.
+ *             It supports the following *level*\ s:
+ *
+ *             * **SOL_SOCKET**, which supports the following *optname*\ s:
+ *               **SO_RCVBUF**, **SO_SNDBUF**, **SO_MAX_PACING_RATE**,
+ *               **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**.
+ *             * **IPPROTO_TCP**, which supports the following *optname*\ s:
+ *               **TCP_CONGESTION**, **TCP_BPF_IW**,
+ *               **TCP_BPF_SNDCWND_CLAMP**.
+ *             * **IPPROTO_IP**, which supports *optname* **IP_TOS**.
+ *             * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_adjust_room(struct sk_buff *skb, u32 len_diff, u32 mode, u64 flags)
+ *     Description
+ *             Grow or shrink the room for data in the packet associated to
+ *             *skb* by *len_diff*, and according to the selected *mode*.
+ *
+ *             There is a single supported mode at this time:
+ *
+ *             * **BPF_ADJ_ROOM_NET**: Adjust room at the network layer
+ *               (room space is added or removed below the layer 3 header).
+ *
+ *             All values for *flags* are reserved for future usage, and must
+ *             be left at zero.
+ *
+ *             A call to this helper is susceptible to change the underlaying
+ *             packet buffer. Therefore, at load time, all checks on pointers
+ *             previously done by the verifier are invalidated and must be
+ *             performed again, if the helper is used in combination with
+ *             direct packet access.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * int bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags)
+ *     Description
+ *             Redirect the packet to the endpoint referenced by *map* at
+ *             index *key*. Depending on its type, this *map* can contain
+ *             references to net devices (for forwarding packets through other
+ *             ports), or to CPUs (for redirecting XDP frames to another CPU;
+ *             but this is only implemented for native XDP (with driver
+ *             support) as of this writing).
+ *
+ *             All values for *flags* are reserved for future usage, and must
+ *             be left at zero.
+ *
+ *             When used to redirect packets to net devices, this helper
+ *             provides a high performance increase over **bpf_redirect**\ ().
+ *             This is due to various implementation details of the underlying
+ *             mechanisms, one of which is the fact that **bpf_redirect_map**\
+ *             () tries to send packet as a "bulk" to the device.
+ *     Return
+ *             **XDP_REDIRECT** on success, or **XDP_ABORTED** on error.
+ *
+ * int bpf_sk_redirect_map(struct bpf_map *map, u32 key, u64 flags)
+ *     Description
+ *             Redirect the packet to the socket referenced by *map* (of type
+ *             **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and
+ *             egress interfaces can be used for redirection. The
+ *             **BPF_F_INGRESS** value in *flags* is used to make the
+ *             distinction (ingress path is selected if the flag is present,
+ *             egress path otherwise). This is the only flag supported for now.
+ *     Return
+ *             **SK_PASS** on success, or **SK_DROP** on error.
+ *
+ * int bpf_sock_map_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags)
+ *     Description
+ *             Add an entry to, or update a *map* referencing sockets. The
+ *             *skops* is used as a new value for the entry associated to
+ *             *key*. *flags* is one of:
+ *
+ *             **BPF_NOEXIST**
+ *                     The entry for *key* must not exist in the map.
+ *             **BPF_EXIST**
+ *                     The entry for *key* must already exist in the map.
+ *             **BPF_ANY**
+ *                     No condition on the existence of the entry for *key*.
+ *
+ *             If the *map* has eBPF programs (parser and verdict), those will
+ *             be inherited by the socket being added. If the socket is
+ *             already attached to eBPF programs, this results in an error.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * int bpf_xdp_adjust_meta(struct xdp_buff *xdp_md, int delta)
+ *     Description
+ *             Adjust the address pointed by *xdp_md*\ **->data_meta** by
+ *             *delta* (which can be positive or negative). Note that this
+ *             operation modifies the address stored in *xdp_md*\ **->data**,
+ *             so the latter must be loaded only after the helper has been
+ *             called.
+ *
+ *             The use of *xdp_md*\ **->data_meta** is optional and programs
+ *             are not required to use it. The rationale is that when the
+ *             packet is processed with XDP (e.g. as DoS filter), it is
+ *             possible to push further meta data along with it before passing
+ *             to the stack, and to give the guarantee that an ingress eBPF
+ *             program attached as a TC classifier on the same device can pick
+ *             this up for further post-processing. Since TC works with socket
+ *             buffers, it remains possible to set from XDP the **mark** or
+ *             **priority** pointers, or other pointers for the socket buffer.
+ *             Having this scratch space generic and programmable allows for
+ *             more flexibility as the user is free to store whatever meta
+ *             data they need.
+ *
+ *             A call to this helper is susceptible to change the underlaying
+ *             packet buffer. Therefore, at load time, all checks on pointers
+ *             previously done by the verifier are invalidated and must be
+ *             performed again, if the helper is used in combination with
+ *             direct packet access.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * int bpf_perf_event_read_value(struct bpf_map *map, u64 flags, struct bpf_perf_event_value *buf, u32 buf_size)
+ *     Description
+ *             Read the value of a perf event counter, and store it into *buf*
+ *             of size *buf_size*. This helper relies on a *map* of type
+ *             **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. The nature of the perf event
+ *             counter is selected when *map* is updated with perf event file
+ *             descriptors. The *map* is an array whose size is the number of
+ *             available CPUs, and each cell contains a value relative to one
+ *             CPU. The value to retrieve is indicated by *flags*, that
+ *             contains the index of the CPU to look up, masked with
+ *             **BPF_F_INDEX_MASK**. Alternatively, *flags* can be set to
+ *             **BPF_F_CURRENT_CPU** to indicate that the value for the
+ *             current CPU should be retrieved.
+ *
+ *             This helper behaves in a way close to
+ *             **bpf_perf_event_read**\ () helper, save that instead of
+ *             just returning the value observed, it fills the *buf*
+ *             structure. This allows for additional data to be retrieved: in
+ *             particular, the enabled and running times (in *buf*\
+ *             **->enabled** and *buf*\ **->running**, respectively) are
+ *             copied. In general, **bpf_perf_event_read_value**\ () is
+ *             recommended over **bpf_perf_event_read**\ (), which has some
+ *             ABI issues and provides fewer functionalities.
+ *
+ *             These values are interesting, because hardware PMU (Performance
+ *             Monitoring Unit) counters are limited resources. When there are
+ *             more PMU based perf events opened than available counters,
+ *             kernel will multiplex these events so each event gets certain
+ *             percentage (but not all) of the PMU time. In case that
+ *             multiplexing happens, the number of samples or counter value
+ *             will not reflect the case compared to when no multiplexing
+ *             occurs. This makes comparison between different runs difficult.
+ *             Typically, the counter value should be normalized before
+ *             comparing to other experiments. The usual normalization is done
+ *             as follows.
+ *
+ *             ::
+ *
+ *                     normalized_counter = counter * t_enabled / t_running
+ *
+ *             Where t_enabled is the time enabled for event and t_running is
+ *             the time running for event since last normalization. The
+ *             enabled and running times are accumulated since the perf event
+ *             open. To achieve scaling factor between two invocations of an
+ *             eBPF program, users can can use CPU id as the key (which is
+ *             typical for perf array usage model) to remember the previous
+ *             value and do the calculation inside the eBPF program.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * int bpf_perf_prog_read_value(struct bpf_perf_event_data *ctx, struct bpf_perf_event_value *buf, u32 buf_size)
+ *     Description
+ *             For en eBPF program attached to a perf event, retrieve the
+ *             value of the event counter associated to *ctx* and store it in
+ *             the structure pointed by *buf* and of size *buf_size*. Enabled
+ *             and running times are also stored in the structure (see
+ *             description of helper **bpf_perf_event_read_value**\ () for
+ *             more details).
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * int bpf_getsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, char *optval, int optlen)
+ *     Description
+ *             Emulate a call to **getsockopt()** on the socket associated to
+ *             *bpf_socket*, which must be a full socket. The *level* at
+ *             which the option resides and the name *optname* of the option
+ *             must be specified, see **getsockopt(2)** for more information.
+ *             The retrieved value is stored in the structure pointed by
+ *             *opval* and of length *optlen*.
+ *
+ *             This helper actually implements a subset of **getsockopt()**.
+ *             It supports the following *level*\ s:
+ *
+ *             * **IPPROTO_TCP**, which supports *optname*
+ *               **TCP_CONGESTION**.
+ *             * **IPPROTO_IP**, which supports *optname* **IP_TOS**.
+ *             * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * int bpf_override_return(struct pt_reg *regs, u64 rc)
+ *     Description
+ *             Used for error injection, this helper uses kprobes to override
+ *             the return value of the probed function, and to set it to *rc*.
+ *             The first argument is the context *regs* on which the kprobe
+ *             works.
+ *
+ *             This helper works by setting setting the PC (program counter)
+ *             to an override function which is run in place of the original
+ *             probed function. This means the probed function is not run at
+ *             all. The replacement function just returns with the required
+ *             value.
+ *
+ *             This helper has security implications, and thus is subject to
+ *             restrictions. It is only available if the kernel was compiled
+ *             with the **CONFIG_BPF_KPROBE_OVERRIDE** configuration
+ *             option, and in this case it only works on functions tagged with
+ *             **ALLOW_ERROR_INJECTION** in the kernel code.
+ *
+ *             Also, the helper is only available for the architectures having
+ *             the CONFIG_FUNCTION_ERROR_INJECTION option. As of this writing,
+ *             x86 architecture is the only one to support this feature.
+ *     Return
+ *             0
+ *
+ * int bpf_sock_ops_cb_flags_set(struct bpf_sock_ops *bpf_sock, int argval)
+ *     Description
+ *             Attempt to set the value of the **bpf_sock_ops_cb_flags** field
+ *             for the full TCP socket associated to *bpf_sock_ops* to
+ *             *argval*.
+ *
+ *             The primary use of this field is to determine if there should
+ *             be calls to eBPF programs of type
+ *             **BPF_PROG_TYPE_SOCK_OPS** at various points in the TCP
+ *             code. A program of the same type can change its value, per
+ *             connection and as necessary, when the connection is
+ *             established. This field is directly accessible for reading, but
+ *             this helper must be used for updates in order to return an
+ *             error if an eBPF program tries to set a callback that is not
+ *             supported in the current kernel.
+ *
+ *             The supported callback values that *argval* can combine are:
+ *
+ *             * **BPF_SOCK_OPS_RTO_CB_FLAG** (retransmission time out)
+ *             * **BPF_SOCK_OPS_RETRANS_CB_FLAG** (retransmission)
+ *             * **BPF_SOCK_OPS_STATE_CB_FLAG** (TCP state change)
+ *
+ *             Here are some examples of where one could call such eBPF
+ *             program:
+ *
+ *             * When RTO fires.
+ *             * When a packet is retransmitted.
+ *             * When the connection terminates.
+ *             * When a packet is sent.
+ *             * When a packet is received.
+ *     Return
+ *             Code **-EINVAL** if the socket is not a full TCP socket;
+ *             otherwise, a positive number containing the bits that could not
+ *             be set is returned (which comes down to 0 if all bits were set
+ *             as required).
+ *
+ * int bpf_msg_redirect_map(struct sk_msg_buff *msg, struct bpf_map *map, u32 key, u64 flags)
+ *     Description
+ *             This helper is used in programs implementing policies at the
+ *             socket level. If the message *msg* is allowed to pass (i.e. if
+ *             the verdict eBPF program returns **SK_PASS**), redirect it to
+ *             the socket referenced by *map* (of type
+ *             **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and
+ *             egress interfaces can be used for redirection. The
+ *             **BPF_F_INGRESS** value in *flags* is used to make the
+ *             distinction (ingress path is selected if the flag is present,
+ *             egress path otherwise). This is the only flag supported for now.
+ *     Return
+ *             **SK_PASS** on success, or **SK_DROP** on error.
+ *
+ * int bpf_msg_apply_bytes(struct sk_msg_buff *msg, u32 bytes)
+ *     Description
+ *             For socket policies, apply the verdict of the eBPF program to
+ *             the next *bytes* (number of bytes) of message *msg*.
+ *
+ *             For example, this helper can be used in the following cases:
+ *
+ *             * A single **sendmsg**\ () or **sendfile**\ () system call
+ *               contains multiple logical messages that the eBPF program is
+ *               supposed to read and for which it should apply a verdict.
+ *             * An eBPF program only cares to read the first *bytes* of a
+ *               *msg*. If the message has a large payload, then setting up
+ *               and calling the eBPF program repeatedly for all bytes, even
+ *               though the verdict is already known, would create unnecessary
+ *               overhead.
+ *
+ *             When called from within an eBPF program, the helper sets a
+ *             counter internal to the BPF infrastructure, that is used to
+ *             apply the last verdict to the next *bytes*. If *bytes* is
+ *             smaller than the current data being processed from a
+ *             **sendmsg**\ () or **sendfile**\ () system call, the first
+ *             *bytes* will be sent and the eBPF program will be re-run with
+ *             the pointer for start of data pointing to byte number *bytes*
+ *             **+ 1**. If *bytes* is larger than the current data being
+ *             processed, then the eBPF verdict will be applied to multiple
+ *             **sendmsg**\ () or **sendfile**\ () calls until *bytes* are
+ *             consumed.
+ *
+ *             Note that if a socket closes with the internal counter holding
+ *             a non-zero value, this is not a problem because data is not
+ *             being buffered for *bytes* and is sent as it is received.
+ *     Return
+ *             0
+ *
+ * int bpf_msg_cork_bytes(struct sk_msg_buff *msg, u32 bytes)
+ *     Description
+ *             For socket policies, prevent the execution of the verdict eBPF
+ *             program for message *msg* until *bytes* (byte number) have been
+ *             accumulated.
+ *
+ *             This can be used when one needs a specific number of bytes
+ *             before a verdict can be assigned, even if the data spans
+ *             multiple **sendmsg**\ () or **sendfile**\ () calls. The extreme
+ *             case would be a user calling **sendmsg**\ () repeatedly with
+ *             1-byte long message segments. Obviously, this is bad for
+ *             performance, but it is still valid. If the eBPF program needs
+ *             *bytes* bytes to validate a header, this helper can be used to
+ *             prevent the eBPF program to be called again until *bytes* have
+ *             been accumulated.
+ *     Return
+ *             0
+ *
+ * int bpf_msg_pull_data(struct sk_msg_buff *msg, u32 start, u32 end, u64 flags)
+ *     Description
+ *             For socket policies, pull in non-linear data from user space
+ *             for *msg* and set pointers *msg*\ **->data** and *msg*\
+ *             **->data_end** to *start* and *end* bytes offsets into *msg*,
+ *             respectively.
+ *
+ *             If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a
+ *             *msg* it can only parse data that the (**data**, **data_end**)
+ *             pointers have already consumed. For **sendmsg**\ () hooks this
+ *             is likely the first scatterlist element. But for calls relying
+ *             on the **sendpage** handler (e.g. **sendfile**\ ()) this will
+ *             be the range (**0**, **0**) because the data is shared with
+ *             user space and by default the objective is to avoid allowing
+ *             user space to modify data while (or after) eBPF verdict is
+ *             being decided. This helper can be used to pull in data and to
+ *             set the start and end pointer to given values. Data will be
+ *             copied if necessary (i.e. if data was not linear and if start
+ *             and end pointers do not point to the same chunk).
+ *
+ *             A call to this helper is susceptible to change the underlaying
+ *             packet buffer. Therefore, at load time, all checks on pointers
+ *             previously done by the verifier are invalidated and must be
+ *             performed again, if the helper is used in combination with
+ *             direct packet access.
+ *
+ *             All values for *flags* are reserved for future usage, and must
+ *             be left at zero.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * int bpf_bind(struct bpf_sock_addr *ctx, struct sockaddr *addr, int addr_len)
+ *     Description
+ *             Bind the socket associated to *ctx* to the address pointed by
+ *             *addr*, of length *addr_len*. This allows for making outgoing
+ *             connection from the desired IP address, which can be useful for
+ *             example when all processes inside a cgroup should use one
+ *             single IP address on a host that has multiple IP configured.
+ *
+ *             This helper works for IPv4 and IPv6, TCP and UDP sockets. The
+ *             domain (*addr*\ **->sa_family**) must be **AF_INET** (or
+ *             **AF_INET6**). Looking for a free port to bind to can be
+ *             expensive, therefore binding to port is not permitted by the
+ *             helper: *addr*\ **->sin_port** (or **sin6_port**, respectively)
+ *             must be set to zero.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * int bpf_xdp_adjust_tail(struct xdp_buff *xdp_md, int delta)
+ *     Description
+ *             Adjust (move) *xdp_md*\ **->data_end** by *delta* bytes. It is
+ *             only possible to shrink the packet as of this writing,
+ *             therefore *delta* must be a negative integer.
+ *
+ *             A call to this helper is susceptible to change the underlaying
+ *             packet buffer. Therefore, at load time, all checks on pointers
+ *             previously done by the verifier are invalidated and must be
+ *             performed again, if the helper is used in combination with
+ *             direct packet access.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_get_xfrm_state(struct sk_buff *skb, u32 index, struct bpf_xfrm_state *xfrm_state, u32 size, u64 flags)
+ *     Description
+ *             Retrieve the XFRM state (IP transform framework, see also
+ *             **ip-xfrm(8)**) at *index* in XFRM "security path" for *skb*.
+ *
+ *             The retrieved value is stored in the **struct bpf_xfrm_state**
+ *             pointed by *xfrm_state* and of length *size*.
+ *
+ *             All values for *flags* are reserved for future usage, and must
+ *             be left at zero.
+ *
+ *             This helper is available only if the kernel was compiled with
+ *             **CONFIG_XFRM** configuration option.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * int bpf_get_stack(struct pt_regs *regs, void *buf, u32 size, u64 flags)
+ *     Description
+ *             Return a user or a kernel stack in bpf program provided buffer.
+ *             To achieve this, the helper needs *ctx*, which is a pointer
+ *             to the context on which the tracing program is executed.
+ *             To store the stacktrace, the bpf program provides *buf* with
+ *             a nonnegative *size*.
+ *
+ *             The last argument, *flags*, holds the number of stack frames to
+ *             skip (from 0 to 255), masked with
+ *             **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set
+ *             the following flags:
+ *
+ *             **BPF_F_USER_STACK**
+ *                     Collect a user space stack instead of a kernel stack.
+ *             **BPF_F_USER_BUILD_ID**
+ *                     Collect buildid+offset instead of ips for user stack,
+ *                     only valid if **BPF_F_USER_STACK** is also specified.
+ *
+ *             **bpf_get_stack**\ () can collect up to
+ *             **PERF_MAX_STACK_DEPTH** both kernel and user frames, subject
+ *             to sufficient large buffer size. Note that
+ *             this limit can be controlled with the **sysctl** program, and
+ *             that it should be manually increased in order to profile long
+ *             user stacks (such as stacks for Java programs). To do so, use:
+ *
+ *             ::
+ *
+ *                     # sysctl kernel.perf_event_max_stack=<new value>
+ *
+ *     Return
+ *             a non-negative value equal to or less than size on success, or
+ *             a negative error in case of failure.
+ *
+ * int skb_load_bytes_relative(const struct sk_buff *skb, u32 offset, void *to, u32 len, u32 start_header)
+ *     Description
+ *             This helper is similar to **bpf_skb_load_bytes**\ () in that
+ *             it provides an easy way to load *len* bytes from *offset*
+ *             from the packet associated to *skb*, into the buffer pointed
+ *             by *to*. The difference to **bpf_skb_load_bytes**\ () is that
+ *             a fifth argument *start_header* exists in order to select a
+ *             base offset to start from. *start_header* can be one of:
+ *
+ *             **BPF_HDR_START_MAC**
+ *                     Base offset to load data from is *skb*'s mac header.
+ *             **BPF_HDR_START_NET**
+ *                     Base offset to load data from is *skb*'s network header.
+ *
+ *             In general, "direct packet access" is the preferred method to
+ *             access packet data, however, this helper is in particular useful
+ *             in socket filters where *skb*\ **->data** does not always point
+ *             to the start of the mac header and where "direct packet access"
+ *             is not available.
+ *
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * int bpf_fib_lookup(void *ctx, struct bpf_fib_lookup *params, int plen, u32 flags)
+ *     Description
+ *             Do FIB lookup in kernel tables using parameters in *params*.
+ *             If lookup is successful and result shows packet is to be
+ *             forwarded, the neighbor tables are searched for the nexthop.
+ *             If successful (ie., FIB lookup shows forwarding and nexthop
+ *             is resolved), the nexthop address is returned in ipv4_dst,
+ *             ipv6_dst or mpls_out based on family, smac is set to mac
+ *             address of egress device, dmac is set to nexthop mac address,
+ *             rt_metric is set to metric from route.
+ *
+ *             *plen* argument is the size of the passed in struct.
+ *             *flags* argument can be one or more BPF_FIB_LOOKUP_ flags:
+ *
+ *             **BPF_FIB_LOOKUP_DIRECT** means do a direct table lookup vs
+ *             full lookup using FIB rules
+ *             **BPF_FIB_LOOKUP_OUTPUT** means do lookup from an egress
+ *             perspective (default is ingress)
+ *
+ *             *ctx* is either **struct xdp_md** for XDP programs or
+ *             **struct sk_buff** tc cls_act programs.
+ *
+ *     Return
+ *             Egress device index on success, 0 if packet needs to continue
+ *             up the stack for further processing or a negative error in case
+ *             of failure.
+ *
+ * int bpf_sock_hash_update(struct bpf_sock_ops_kern *skops, struct bpf_map *map, void *key, u64 flags)
+ *     Description
+ *             Add an entry to, or update a sockhash *map* referencing sockets.
+ *             The *skops* is used as a new value for the entry associated to
+ *             *key*. *flags* is one of:
+ *
+ *             **BPF_NOEXIST**
+ *                     The entry for *key* must not exist in the map.
+ *             **BPF_EXIST**
+ *                     The entry for *key* must already exist in the map.
+ *             **BPF_ANY**
+ *                     No condition on the existence of the entry for *key*.
+ *
+ *             If the *map* has eBPF programs (parser and verdict), those will
+ *             be inherited by the socket being added. If the socket is
+ *             already attached to eBPF programs, this results in an error.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * int bpf_msg_redirect_hash(struct sk_msg_buff *msg, struct bpf_map *map, void *key, u64 flags)
+ *     Description
+ *             This helper is used in programs implementing policies at the
+ *             socket level. If the message *msg* is allowed to pass (i.e. if
+ *             the verdict eBPF program returns **SK_PASS**), redirect it to
+ *             the socket referenced by *map* (of type
+ *             **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and
+ *             egress interfaces can be used for redirection. The
+ *             **BPF_F_INGRESS** value in *flags* is used to make the
+ *             distinction (ingress path is selected if the flag is present,
+ *             egress path otherwise). This is the only flag supported for now.
+ *     Return
+ *             **SK_PASS** on success, or **SK_DROP** on error.
+ *
+ * int bpf_sk_redirect_hash(struct sk_buff *skb, struct bpf_map *map, void *key, u64 flags)
+ *     Description
+ *             This helper is used in programs implementing policies at the
+ *             skb socket level. If the sk_buff *skb* is allowed to pass (i.e.
+ *             if the verdeict eBPF program returns **SK_PASS**), redirect it
+ *             to the socket referenced by *map* (of type
+ *             **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and
+ *             egress interfaces can be used for redirection. The
+ *             **BPF_F_INGRESS** value in *flags* is used to make the
+ *             distinction (ingress path is selected if the flag is present,
+ *             egress otherwise). This is the only flag supported for now.
+ *     Return
+ *             **SK_PASS** on success, or **SK_DROP** on error.
  */
 #define __BPF_FUNC_MAPPER(FN)          \
        FN(unspec),                     \
@@ -841,7 +1969,14 @@ union bpf_attr {
        FN(msg_cork_bytes),             \
        FN(msg_pull_data),              \
        FN(bind),                       \
-       FN(xdp_adjust_tail),
+       FN(xdp_adjust_tail),            \
+       FN(skb_get_xfrm_state),         \
+       FN(get_stack),                  \
+       FN(skb_load_bytes_relative),    \
+       FN(fib_lookup),                 \
+       FN(sock_hash_update),           \
+       FN(msg_redirect_hash),          \
+       FN(sk_redirect_hash),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -875,15 +2010,19 @@ enum bpf_func_id {
 /* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */
 #define BPF_F_TUNINFO_IPV6             (1ULL << 0)
 
-/* BPF_FUNC_get_stackid flags. */
+/* flags for both BPF_FUNC_get_stackid and BPF_FUNC_get_stack. */
 #define BPF_F_SKIP_FIELD_MASK          0xffULL
 #define BPF_F_USER_STACK               (1ULL << 8)
+/* flags used by BPF_FUNC_get_stackid only. */
 #define BPF_F_FAST_STACK_CMP           (1ULL << 9)
 #define BPF_F_REUSE_STACKID            (1ULL << 10)
+/* flags used by BPF_FUNC_get_stack only. */
+#define BPF_F_USER_BUILD_ID            (1ULL << 11)
 
 /* BPF_FUNC_skb_set_tunnel_key flags. */
 #define BPF_F_ZERO_CSUM_TX             (1ULL << 1)
 #define BPF_F_DONT_FRAGMENT            (1ULL << 2)
+#define BPF_F_SEQ_NUMBER               (1ULL << 3)
 
 /* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and
  * BPF_FUNC_perf_event_read_value flags.
@@ -898,6 +2037,12 @@ enum bpf_adj_room_mode {
        BPF_ADJ_ROOM_NET,
 };
 
+/* Mode for BPF_FUNC_skb_load_bytes_relative helper. */
+enum bpf_hdr_start_off {
+       BPF_HDR_START_MAC,
+       BPF_HDR_START_NET,
+};
+
 /* user accessible mirror of in-kernel sk_buff.
  * new fields can only be added to the end of this structure
  */
@@ -946,6 +2091,19 @@ struct bpf_tunnel_key {
        __u32 tunnel_label;
 };
 
+/* user accessible mirror of in-kernel xfrm_state.
+ * new fields can only be added to the end of this structure
+ */
+struct bpf_xfrm_state {
+       __u32 reqid;
+       __u32 spi;      /* Stored in network byte order */
+       __u16 family;
+       union {
+               __u32 remote_ipv4;      /* Stored in network byte order */
+               __u32 remote_ipv6[4];   /* Stored in network byte order */
+       };
+};
+
 /* Generic BPF return codes which all BPF program types may support.
  * The values are binary compatible with their TC_ACT_* counter-part to
  * provide backwards compatibility with existing SCHED_CLS and SCHED_ACT
@@ -1036,6 +2194,7 @@ struct bpf_prog_info {
        __aligned_u64 map_ids;
        char name[BPF_OBJ_NAME_LEN];
        __u32 ifindex;
+       __u32 gpl_compatible:1;
        __u64 netns_dev;
        __u64 netns_ino;
 } __attribute__((aligned(8)));
@@ -1051,6 +2210,15 @@ struct bpf_map_info {
        __u32 ifindex;
        __u64 netns_dev;
        __u64 netns_ino;
+       __u32 btf_id;
+       __u32 btf_key_id;
+       __u32 btf_value_id;
+} __attribute__((aligned(8)));
+
+struct bpf_btf_info {
+       __aligned_u64 btf;
+       __u32 btf_size;
+       __u32 id;
 } __attribute__((aligned(8)));
 
 /* User bpf_sock_addr struct to access socket fields and sockaddr struct passed
@@ -1231,4 +2399,55 @@ struct bpf_raw_tracepoint_args {
        __u64 args[0];
 };
 
+/* DIRECT:  Skip the FIB rules and go to FIB table associated with device
+ * OUTPUT:  Do lookup from egress perspective; default is ingress
+ */
+#define BPF_FIB_LOOKUP_DIRECT  BIT(0)
+#define BPF_FIB_LOOKUP_OUTPUT  BIT(1)
+
+struct bpf_fib_lookup {
+       /* input */
+       __u8    family;   /* network family, AF_INET, AF_INET6, AF_MPLS */
+
+       /* set if lookup is to consider L4 data - e.g., FIB rules */
+       __u8    l4_protocol;
+       __be16  sport;
+       __be16  dport;
+
+       /* total length of packet from network header - used for MTU check */
+       __u16   tot_len;
+       __u32   ifindex;  /* L3 device index for lookup */
+
+       union {
+               /* inputs to lookup */
+               __u8    tos;            /* AF_INET  */
+               __be32  flowlabel;      /* AF_INET6 */
+
+               /* output: metric of fib result */
+               __u32 rt_metric;
+       };
+
+       union {
+               __be32          mpls_in;
+               __be32          ipv4_src;
+               __u32           ipv6_src[4];  /* in6_addr; network order */
+       };
+
+       /* input to bpf_fib_lookup, *dst is destination address.
+        * output: bpf_fib_lookup sets to gateway address
+        */
+       union {
+               /* return for MPLS lookups */
+               __be32          mpls_out[4];  /* support up to 4 labels */
+               __be32          ipv4_dst;
+               __u32           ipv6_dst[4];  /* in6_addr; network order */
+       };
+
+       /* output */
+       __be16  h_vlan_proto;
+       __be16  h_vlan_TCI;
+       __u8    smac[6];     /* ETH_ALEN */
+       __u8    dmac[6];     /* ETH_ALEN */
+};
+
 #endif /* _UAPI__LINUX_BPF_H__ */
index 74a30b1090dfcfe58c95d707671a1b09745d6cab..bcb56ee4701461a6b4a0134903a1eeef8e275ca6 100644 (file)
@@ -6,9 +6,7 @@
 #include <linux/types.h>
 
 #define BTF_MAGIC      0xeB9F
-#define BTF_MAGIC_SWAP 0x9FeB
 #define BTF_VERSION    1
-#define BTF_FLAGS_COMPR        0x01
 
 struct btf_header {
        __u16   magic;
@@ -43,7 +41,7 @@ struct btf_header {
 #define BTF_STR_OFFSET(ref)    ((ref) & BTF_MAX_NAME_OFFSET)
 
 struct btf_type {
-       __u32 name;
+       __u32 name_off;
        /* "info" bits arrangement
         * bits  0-15: vlen (e.g. # of struct's members)
         * bits 16-23: unused
@@ -105,7 +103,7 @@ struct btf_type {
  * info in "struct btf_type").
  */
 struct btf_enum {
-       __u32   name;
+       __u32   name_off;
        __s32   val;
 };
 
@@ -122,7 +120,7 @@ struct btf_array {
  * "struct btf_type").
  */
 struct btf_member {
-       __u32   name;
+       __u32   name_off;
        __u32   type;
        __u32   offset; /* offset in bits */
 };
diff --git a/tools/include/uapi/linux/erspan.h b/tools/include/uapi/linux/erspan.h
new file mode 100644 (file)
index 0000000..8415730
--- /dev/null
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * ERSPAN Tunnel Metadata
+ *
+ * Copyright (c) 2018 VMware
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * Userspace API for metadata mode ERSPAN tunnel
+ */
+#ifndef _UAPI_ERSPAN_H
+#define _UAPI_ERSPAN_H
+
+#include <linux/types.h>       /* For __beXX in userspace */
+#include <asm/byteorder.h>
+
+/* ERSPAN version 2 metadata header */
+struct erspan_md2 {
+       __be32 timestamp;
+       __be16 sgt;     /* security group tag */
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+       __u8    hwid_upper:2,
+               ft:5,
+               p:1;
+       __u8    o:1,
+               gra:2,
+               dir:1,
+               hwid:4;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+       __u8    p:1,
+               ft:5,
+               hwid_upper:2;
+       __u8    hwid:4,
+               dir:1,
+               gra:2,
+               o:1;
+#else
+#error "Please fix <asm/byteorder.h>"
+#endif
+};
+
+struct erspan_metadata {
+       int version;
+       union {
+               __be32 index;           /* Version 1 (type II)*/
+               struct erspan_md2 md2;  /* Version 2 (type III) */
+       } u;
+};
+
+#endif /* _UAPI_ERSPAN_H */
index 6d9447700e18c983804e1fecc4a6854e138d10f6..68699f654118592527096dc26336f57da6a01cdc 100644 (file)
@@ -941,4 +941,43 @@ enum {
        IFLA_EVENT_BONDING_OPTIONS,     /* change in bonding options */
 };
 
+/* tun section */
+
+enum {
+       IFLA_TUN_UNSPEC,
+       IFLA_TUN_OWNER,
+       IFLA_TUN_GROUP,
+       IFLA_TUN_TYPE,
+       IFLA_TUN_PI,
+       IFLA_TUN_VNET_HDR,
+       IFLA_TUN_PERSIST,
+       IFLA_TUN_MULTI_QUEUE,
+       IFLA_TUN_NUM_QUEUES,
+       IFLA_TUN_NUM_DISABLED_QUEUES,
+       __IFLA_TUN_MAX,
+};
+
+#define IFLA_TUN_MAX (__IFLA_TUN_MAX - 1)
+
+/* rmnet section */
+
+#define RMNET_FLAGS_INGRESS_DEAGGREGATION         (1U << 0)
+#define RMNET_FLAGS_INGRESS_MAP_COMMANDS          (1U << 1)
+#define RMNET_FLAGS_INGRESS_MAP_CKSUMV4           (1U << 2)
+#define RMNET_FLAGS_EGRESS_MAP_CKSUMV4            (1U << 3)
+
+enum {
+       IFLA_RMNET_UNSPEC,
+       IFLA_RMNET_MUX_ID,
+       IFLA_RMNET_FLAGS,
+       __IFLA_RMNET_MAX,
+};
+
+#define IFLA_RMNET_MAX (__IFLA_RMNET_MAX - 1)
+
+struct ifla_rmnet_flags {
+       __u32   flags;
+       __u32   mask;
+};
+
 #endif /* _UAPI_LINUX_IF_LINK_H */
index 6b89f87db200333922c0947ce481e4638f2e953d..b02c41e53d5616a3124e16dbec17250654a790b4 100644 (file)
@@ -396,6 +396,10 @@ struct kvm_run {
                char padding[256];
        };
 
+       /* 2048 is the size of the char array used to bound/pad the size
+        * of the union that holds sync regs.
+        */
+       #define SYNC_REGS_SIZE_BYTES 2048
        /*
         * shared registers between kvm and userspace.
         * kvm_valid_regs specifies the register classes set by the host
@@ -407,7 +411,7 @@ struct kvm_run {
        __u64 kvm_dirty_regs;
        union {
                struct kvm_sync_regs regs;
-               char padding[2048];
+               char padding[SYNC_REGS_SIZE_BYTES];
        } s;
 };
 
@@ -672,6 +676,13 @@ struct kvm_ioeventfd {
        __u8  pad[36];
 };
 
+#define KVM_X86_DISABLE_EXITS_MWAIT          (1 << 0)
+#define KVM_X86_DISABLE_EXITS_HTL            (1 << 1)
+#define KVM_X86_DISABLE_EXITS_PAUSE          (1 << 2)
+#define KVM_X86_DISABLE_VALID_EXITS          (KVM_X86_DISABLE_EXITS_MWAIT | \
+                                              KVM_X86_DISABLE_EXITS_HTL | \
+                                              KVM_X86_DISABLE_EXITS_PAUSE)
+
 /* for KVM_ENABLE_CAP */
 struct kvm_enable_cap {
        /* in */
@@ -936,6 +947,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_PPC_GET_CPU_CHAR 151
 #define KVM_CAP_S390_BPB 152
 #define KVM_CAP_GET_MSR_FEATURES 153
+#define KVM_CAP_HYPERV_EVENTFD 154
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -1375,6 +1387,10 @@ struct kvm_enc_region {
 #define KVM_MEMORY_ENCRYPT_REG_REGION    _IOR(KVMIO, 0xbb, struct kvm_enc_region)
 #define KVM_MEMORY_ENCRYPT_UNREG_REGION  _IOR(KVMIO, 0xbc, struct kvm_enc_region)
 
+/* Available with KVM_CAP_HYPERV_EVENTFD */
+#define KVM_HYPERV_EVENTFD        _IOW(KVMIO,  0xbd, struct kvm_hyperv_eventfd)
+
+
 /* Secure Encrypted Virtualization command */
 enum sev_cmd_id {
        /* Guest initialization commands */
@@ -1515,4 +1531,14 @@ struct kvm_assigned_msix_entry {
 #define KVM_ARM_DEV_EL1_PTIMER         (1 << 1)
 #define KVM_ARM_DEV_PMU                        (1 << 2)
 
+struct kvm_hyperv_eventfd {
+       __u32 conn_id;
+       __s32 fd;
+       __u32 flags;
+       __u32 padding[3];
+};
+
+#define KVM_HYPERV_CONN_ID_MASK                0x00ffffff
+#define KVM_HYPERV_EVENTFD_DEASSIGN    (1 << 0)
+
 #endif /* __LINUX_KVM_H */
index 912b85b52344b31aa405b546b5d1160a61a52df1..b8e288a1f7409012d50e464e7993b96d4c404610 100644 (file)
@@ -650,11 +650,23 @@ struct perf_event_mmap_page {
 #define PERF_RECORD_MISC_COMM_EXEC             (1 << 13)
 #define PERF_RECORD_MISC_SWITCH_OUT            (1 << 13)
 /*
- * Indicates that the content of PERF_SAMPLE_IP points to
- * the actual instruction that triggered the event. See also
- * perf_event_attr::precise_ip.
+ * These PERF_RECORD_MISC_* flags below are safely reused
+ * for the following events:
+ *
+ *   PERF_RECORD_MISC_EXACT_IP           - PERF_RECORD_SAMPLE of precise events
+ *   PERF_RECORD_MISC_SWITCH_OUT_PREEMPT - PERF_RECORD_SWITCH* events
+ *
+ *
+ * PERF_RECORD_MISC_EXACT_IP:
+ *   Indicates that the content of PERF_SAMPLE_IP points to
+ *   the actual instruction that triggered the event. See also
+ *   perf_event_attr::precise_ip.
+ *
+ * PERF_RECORD_MISC_SWITCH_OUT_PREEMPT:
+ *   Indicates that thread was preempted in TASK_RUNNING state.
  */
 #define PERF_RECORD_MISC_EXACT_IP              (1 << 14)
+#define PERF_RECORD_MISC_SWITCH_OUT_PREEMPT    (1 << 14)
 /*
  * Reserve the last bit to indicate some extended misc field
  */
index 07d61583fd02bd08b9748d974bca8b86a0706a3a..ed0a120d4f084fa0cfc10b5257ea9bce0c3d3a24 100644 (file)
@@ -242,6 +242,7 @@ typedef int __bitwise snd_pcm_format_t;
 #define        SNDRV_PCM_FORMAT_DSD_U16_BE     ((__force snd_pcm_format_t) 51) /* DSD, 2-byte samples DSD (x16), big endian */
 #define        SNDRV_PCM_FORMAT_DSD_U32_BE     ((__force snd_pcm_format_t) 52) /* DSD, 4-byte samples DSD (x32), big endian */
 #define        SNDRV_PCM_FORMAT_LAST           SNDRV_PCM_FORMAT_DSD_U32_BE
+#define        SNDRV_PCM_FORMAT_FIRST          SNDRV_PCM_FORMAT_S8
 
 #ifdef SNDRV_LITTLE_ENDIAN
 #define        SNDRV_PCM_FORMAT_S16            SNDRV_PCM_FORMAT_S16_LE
index e6d5f8d1477fbc75f722da2d9fe31b16554d9cf5..f3fab4af4260e92e1f5aadcf65560a045f710f0a 100644 (file)
@@ -69,7 +69,7 @@ FEATURE_USER = .libbpf
 FEATURE_TESTS = libelf libelf-getphdrnum libelf-mmap bpf
 FEATURE_DISPLAY = libelf bpf
 
-INCLUDES = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(ARCH)/include/uapi -I$(srctree)/tools/include/uapi
+INCLUDES = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(ARCH)/include/uapi -I$(srctree)/tools/include/uapi -I$(srctree)/tools/perf
 FEATURE_CHECK_CFLAGS-bpf = $(INCLUDES)
 
 check_feat := 1
index 76b36cc16e7fc803debd2985c9508d66804dc4f1..6a8a00097fd8f07956dca01fcb040fc92cb54774 100644 (file)
@@ -91,6 +91,7 @@ int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr)
        attr.btf_fd = create_attr->btf_fd;
        attr.btf_key_id = create_attr->btf_key_id;
        attr.btf_value_id = create_attr->btf_value_id;
+       attr.map_ifindex = create_attr->map_ifindex;
 
        return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
 }
@@ -201,6 +202,7 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
        attr.log_size = 0;
        attr.log_level = 0;
        attr.kern_version = load_attr->kern_version;
+       attr.prog_ifindex = load_attr->prog_ifindex;
        memcpy(attr.prog_name, load_attr->name,
               min(name_len, BPF_OBJ_NAME_LEN - 1));
 
@@ -458,6 +460,16 @@ int bpf_map_get_fd_by_id(__u32 id)
        return sys_bpf(BPF_MAP_GET_FD_BY_ID, &attr, sizeof(attr));
 }
 
+int bpf_btf_get_fd_by_id(__u32 id)
+{
+       union bpf_attr attr;
+
+       bzero(&attr, sizeof(attr));
+       attr.btf_id = id;
+
+       return sys_bpf(BPF_BTF_GET_FD_BY_ID, &attr, sizeof(attr));
+}
+
 int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len)
 {
        union bpf_attr attr;
index 553b11ad52b3052ae3d3276ba5cc17c1a3f2c53d..15bff7728cf1d0131316633d7e60ab27aa3100fe 100644 (file)
@@ -38,6 +38,7 @@ struct bpf_create_map_attr {
        __u32 btf_fd;
        __u32 btf_key_id;
        __u32 btf_value_id;
+       __u32 map_ifindex;
 };
 
 int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr);
@@ -64,6 +65,7 @@ struct bpf_load_program_attr {
        size_t insns_cnt;
        const char *license;
        __u32 kern_version;
+       __u32 prog_ifindex;
 };
 
 /* Recommend log buffer size */
@@ -98,6 +100,7 @@ int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id);
 int bpf_map_get_next_id(__u32 start_id, __u32 *next_id);
 int bpf_prog_get_fd_by_id(__u32 id);
 int bpf_map_get_fd_by_id(__u32 id);
+int bpf_btf_get_fd_by_id(__u32 id);
 int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len);
 int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags,
                   __u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt);
index 58b6255abc7ad410b8302ce49aec7e157a188b9d..2bac710e3194d861ea7fa4b8196146059972e900 100644 (file)
@@ -281,7 +281,7 @@ int32_t btf__find_by_name(const struct btf *btf, const char *type_name)
 
        for (i = 1; i <= btf->nr_types; i++) {
                const struct btf_type *t = btf->types[i];
-               const char *name = btf_name_by_offset(btf, t->name);
+               const char *name = btf_name_by_offset(btf, t->name_off);
 
                if (name && !strcmp(type_name, name))
                        return i;
index 6513e0b08795bec18bf9c21e6660e7320cd6e717..cbdf34a6fb9379fa241629a7cd34d3b48ba3ebb4 100644 (file)
@@ -31,6 +31,7 @@
 #include <unistd.h>
 #include <fcntl.h>
 #include <errno.h>
+#include <perf-sys.h>
 #include <asm/unistd.h>
 #include <linux/err.h>
 #include <linux/kernel.h>
@@ -177,6 +178,7 @@ struct bpf_program {
        /* Index in elf obj file, for relocation use. */
        int idx;
        char *name;
+       int prog_ifindex;
        char *section_name;
        struct bpf_insn *insns;
        size_t insns_cnt, main_prog_cnt;
@@ -212,6 +214,7 @@ struct bpf_map {
        int fd;
        char *name;
        size_t offset;
+       int map_ifindex;
        struct bpf_map_def def;
        uint32_t btf_key_id;
        uint32_t btf_value_id;
@@ -1090,6 +1093,7 @@ bpf_object__create_maps(struct bpf_object *obj)
                int *pfd = &map->fd;
 
                create_attr.name = map->name;
+               create_attr.map_ifindex = map->map_ifindex;
                create_attr.map_type = def->type;
                create_attr.map_flags = def->map_flags;
                create_attr.key_size = def->key_size;
@@ -1272,7 +1276,7 @@ static int bpf_object__collect_reloc(struct bpf_object *obj)
 static int
 load_program(enum bpf_prog_type type, enum bpf_attach_type expected_attach_type,
             const char *name, struct bpf_insn *insns, int insns_cnt,
-            char *license, u32 kern_version, int *pfd)
+            char *license, u32 kern_version, int *pfd, int prog_ifindex)
 {
        struct bpf_load_program_attr load_attr;
        char *log_buf;
@@ -1286,6 +1290,7 @@ load_program(enum bpf_prog_type type, enum bpf_attach_type expected_attach_type,
        load_attr.insns_cnt = insns_cnt;
        load_attr.license = license;
        load_attr.kern_version = kern_version;
+       load_attr.prog_ifindex = prog_ifindex;
 
        if (!load_attr.insns || !load_attr.insns_cnt)
                return -EINVAL;
@@ -1367,7 +1372,8 @@ bpf_program__load(struct bpf_program *prog,
                }
                err = load_program(prog->type, prog->expected_attach_type,
                                   prog->name, prog->insns, prog->insns_cnt,
-                                  license, kern_version, &fd);
+                                  license, kern_version, &fd,
+                                  prog->prog_ifindex);
                if (!err)
                        prog->instances.fds[0] = fd;
                goto out;
@@ -1398,7 +1404,8 @@ bpf_program__load(struct bpf_program *prog,
                err = load_program(prog->type, prog->expected_attach_type,
                                   prog->name, result.new_insn_ptr,
                                   result.new_insn_cnt,
-                                  license, kern_version, &fd);
+                                  license, kern_version, &fd,
+                                  prog->prog_ifindex);
 
                if (err) {
                        pr_warning("Loading the %dth instance of program '%s' failed\n",
@@ -1437,9 +1444,37 @@ bpf_object__load_progs(struct bpf_object *obj)
        return 0;
 }
 
-static int bpf_object__validate(struct bpf_object *obj)
+static bool bpf_prog_type__needs_kver(enum bpf_prog_type type)
+{
+       switch (type) {
+       case BPF_PROG_TYPE_SOCKET_FILTER:
+       case BPF_PROG_TYPE_SCHED_CLS:
+       case BPF_PROG_TYPE_SCHED_ACT:
+       case BPF_PROG_TYPE_XDP:
+       case BPF_PROG_TYPE_CGROUP_SKB:
+       case BPF_PROG_TYPE_CGROUP_SOCK:
+       case BPF_PROG_TYPE_LWT_IN:
+       case BPF_PROG_TYPE_LWT_OUT:
+       case BPF_PROG_TYPE_LWT_XMIT:
+       case BPF_PROG_TYPE_SOCK_OPS:
+       case BPF_PROG_TYPE_SK_SKB:
+       case BPF_PROG_TYPE_CGROUP_DEVICE:
+       case BPF_PROG_TYPE_SK_MSG:
+       case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
+               return false;
+       case BPF_PROG_TYPE_UNSPEC:
+       case BPF_PROG_TYPE_KPROBE:
+       case BPF_PROG_TYPE_TRACEPOINT:
+       case BPF_PROG_TYPE_PERF_EVENT:
+       case BPF_PROG_TYPE_RAW_TRACEPOINT:
+       default:
+               return true;
+       }
+}
+
+static int bpf_object__validate(struct bpf_object *obj, bool needs_kver)
 {
-       if (obj->kern_version == 0) {
+       if (needs_kver && obj->kern_version == 0) {
                pr_warning("%s doesn't provide kernel version\n",
                           obj->path);
                return -LIBBPF_ERRNO__KVERSION;
@@ -1448,7 +1483,8 @@ static int bpf_object__validate(struct bpf_object *obj)
 }
 
 static struct bpf_object *
-__bpf_object__open(const char *path, void *obj_buf, size_t obj_buf_sz)
+__bpf_object__open(const char *path, void *obj_buf, size_t obj_buf_sz,
+                  bool needs_kver)
 {
        struct bpf_object *obj;
        int err;
@@ -1466,7 +1502,7 @@ __bpf_object__open(const char *path, void *obj_buf, size_t obj_buf_sz)
        CHECK_ERR(bpf_object__check_endianness(obj), err, out);
        CHECK_ERR(bpf_object__elf_collect(obj), err, out);
        CHECK_ERR(bpf_object__collect_reloc(obj), err, out);
-       CHECK_ERR(bpf_object__validate(obj), err, out);
+       CHECK_ERR(bpf_object__validate(obj, needs_kver), err, out);
 
        bpf_object__elf_finish(obj);
        return obj;
@@ -1483,7 +1519,7 @@ struct bpf_object *bpf_object__open(const char *path)
 
        pr_debug("loading %s\n", path);
 
-       return __bpf_object__open(path, NULL, 0);
+       return __bpf_object__open(path, NULL, 0, true);
 }
 
 struct bpf_object *bpf_object__open_buffer(void *obj_buf,
@@ -1506,7 +1542,7 @@ struct bpf_object *bpf_object__open_buffer(void *obj_buf,
        pr_debug("loading object '%s' from buffer\n",
                 name);
 
-       return __bpf_object__open(name, obj_buf, obj_buf_sz);
+       return __bpf_object__open(name, obj_buf, obj_buf_sz, true);
 }
 
 int bpf_object__unload(struct bpf_object *obj)
@@ -1961,8 +1997,8 @@ BPF_PROG_TYPE_FNS(raw_tracepoint, BPF_PROG_TYPE_RAW_TRACEPOINT);
 BPF_PROG_TYPE_FNS(xdp, BPF_PROG_TYPE_XDP);
 BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT);
 
-static void bpf_program__set_expected_attach_type(struct bpf_program *prog,
-                                                enum bpf_attach_type type)
+void bpf_program__set_expected_attach_type(struct bpf_program *prog,
+                                          enum bpf_attach_type type)
 {
        prog->expected_attach_type = type;
 }
@@ -2158,14 +2194,18 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
        enum bpf_attach_type expected_attach_type;
        enum bpf_prog_type prog_type;
        struct bpf_object *obj;
+       struct bpf_map *map;
        int section_idx;
        int err;
 
        if (!attr)
                return -EINVAL;
+       if (!attr->file)
+               return -EINVAL;
 
-       obj = bpf_object__open(attr->file);
-       if (IS_ERR(obj))
+       obj = __bpf_object__open(attr->file, NULL, 0,
+                                bpf_prog_type__needs_kver(attr->prog_type));
+       if (IS_ERR_OR_NULL(obj))
                return -ENOENT;
 
        bpf_object__for_each_program(prog, obj) {
@@ -2174,6 +2214,7 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
                 * section name.
                 */
                prog_type = attr->prog_type;
+               prog->prog_ifindex = attr->ifindex;
                expected_attach_type = attr->expected_attach_type;
                if (prog_type == BPF_PROG_TYPE_UNSPEC) {
                        section_idx = bpf_program__identify_section(prog);
@@ -2194,6 +2235,10 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
                        first_prog = prog;
        }
 
+       bpf_map__for_each(map, obj) {
+               map->map_ifindex = attr->ifindex;
+       }
+
        if (!first_prog) {
                pr_warning("object file doesn't contain bpf program\n");
                bpf_object__close(obj);
@@ -2210,3 +2255,63 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
        *prog_fd = bpf_program__fd(first_prog);
        return 0;
 }
+
+enum bpf_perf_event_ret
+bpf_perf_event_read_simple(void *mem, unsigned long size,
+                          unsigned long page_size, void **buf, size_t *buf_len,
+                          bpf_perf_event_print_t fn, void *priv)
+{
+       volatile struct perf_event_mmap_page *header = mem;
+       __u64 data_tail = header->data_tail;
+       __u64 data_head = header->data_head;
+       void *base, *begin, *end;
+       int ret;
+
+       asm volatile("" ::: "memory"); /* in real code it should be smp_rmb() */
+       if (data_head == data_tail)
+               return LIBBPF_PERF_EVENT_CONT;
+
+       base = ((char *)header) + page_size;
+
+       begin = base + data_tail % size;
+       end = base + data_head % size;
+
+       while (begin != end) {
+               struct perf_event_header *ehdr;
+
+               ehdr = begin;
+               if (begin + ehdr->size > base + size) {
+                       long len = base + size - begin;
+
+                       if (*buf_len < ehdr->size) {
+                               free(*buf);
+                               *buf = malloc(ehdr->size);
+                               if (!*buf) {
+                                       ret = LIBBPF_PERF_EVENT_ERROR;
+                                       break;
+                               }
+                               *buf_len = ehdr->size;
+                       }
+
+                       memcpy(*buf, begin, len);
+                       memcpy(*buf + len, base, ehdr->size - len);
+                       ehdr = (void *)*buf;
+                       begin = base + ehdr->size - len;
+               } else if (begin + ehdr->size == base + size) {
+                       begin = base;
+               } else {
+                       begin += ehdr->size;
+               }
+
+               ret = fn(ehdr, priv);
+               if (ret != LIBBPF_PERF_EVENT_CONT)
+                       break;
+
+               data_tail += ehdr->size;
+       }
+
+       __sync_synchronize(); /* smp_mb() */
+       header->data_tail = data_tail;
+
+       return ret;
+}
index d6ac4fa6f472632566118cfee45ab2cc04ae53a2..cd3fd8d782c7bd8813d6b36f0378fc38ac3a370f 100644 (file)
@@ -52,8 +52,8 @@ enum libbpf_errno {
 int libbpf_strerror(int err, char *buf, size_t size);
 
 /*
- * In include/linux/compiler-gcc.h, __printf is defined. However
- * it should be better if libbpf.h doesn't depend on Linux header file.
+ * __printf is defined in include/linux/compiler-gcc.h. However,
+ * it would be better if libbpf.h didn't depend on Linux header files.
  * So instead of __printf, here we use gcc attribute directly.
  */
 typedef int (*libbpf_print_fn_t)(const char *, ...)
@@ -92,7 +92,7 @@ int bpf_object__set_priv(struct bpf_object *obj, void *priv,
                         bpf_object_clear_priv_t clear_priv);
 void *bpf_object__priv(struct bpf_object *prog);
 
-/* Accessors of bpf_program. */
+/* Accessors of bpf_program */
 struct bpf_program;
 struct bpf_program *bpf_program__next(struct bpf_program *prog,
                                      struct bpf_object *obj);
@@ -121,28 +121,28 @@ struct bpf_insn;
 
 /*
  * Libbpf allows callers to adjust BPF programs before being loaded
- * into kernel. One program in an object file can be transform into
- * multiple variants to be attached to different code.
+ * into kernel. One program in an object file can be transformed into
+ * multiple variants to be attached to different hooks.
  *
  * bpf_program_prep_t, bpf_program__set_prep and bpf_program__nth_fd
- * are APIs for this propose.
+ * form an API for this purpose.
  *
  * - bpf_program_prep_t:
- *   It defines 'preprocessor', which is a caller defined function
+ *   Defines a 'preprocessor', which is a caller defined function
  *   passed to libbpf through bpf_program__set_prep(), and will be
  *   called before program is loaded. The processor should adjust
- *   the program one time for each instances according to the number
+ *   the program one time for each instance according to the instance id
  *   passed to it.
  *
  * - bpf_program__set_prep:
- *   Attachs a preprocessor to a BPF program. The number of instances
- *   whould be created is also passed through this function.
+ *   Attaches a preprocessor to a BPF program. The number of instances
+ *   that should be created is also passed through this function.
  *
  * - bpf_program__nth_fd:
- *   After the program is loaded, get resuling fds from bpf program for
- *   each instances.
+ *   After the program is loaded, get resulting FD of a given instance
+ *   of the BPF program.
  *
- * If bpf_program__set_prep() is not used, the program whould be loaded
+ * If bpf_program__set_prep() is not used, the program would be loaded
  * without adjustment during bpf_object__load(). The program has only
  * one instance. In this case bpf_program__fd(prog) is equal to
  * bpf_program__nth_fd(prog, 0).
@@ -156,7 +156,7 @@ struct bpf_prog_prep_result {
        struct bpf_insn *new_insn_ptr;
        int new_insn_cnt;
 
-       /* If not NULL, result fd is set to it */
+       /* If not NULL, result FD is written to it. */
        int *pfd;
 };
 
@@ -169,8 +169,8 @@ struct bpf_prog_prep_result {
  *  - res:     Output parameter, result of transformation.
  *
  * Return value:
- *  - Zero: pre-processing success.
- *  - Non-zero: pre-processing, stop loading.
+ *  - Zero:    pre-processing success.
+ *  - Non-zero:        pre-processing error, stop loading.
  */
 typedef int (*bpf_program_prep_t)(struct bpf_program *prog, int n,
                                  struct bpf_insn *insns, int insns_cnt,
@@ -182,7 +182,7 @@ int bpf_program__set_prep(struct bpf_program *prog, int nr_instance,
 int bpf_program__nth_fd(struct bpf_program *prog, int n);
 
 /*
- * Adjust type of bpf program. Default is kprobe.
+ * Adjust type of BPF program. Default is kprobe.
  */
 int bpf_program__set_socket_filter(struct bpf_program *prog);
 int bpf_program__set_tracepoint(struct bpf_program *prog);
@@ -193,6 +193,8 @@ int bpf_program__set_sched_act(struct bpf_program *prog);
 int bpf_program__set_xdp(struct bpf_program *prog);
 int bpf_program__set_perf_event(struct bpf_program *prog);
 void bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type);
+void bpf_program__set_expected_attach_type(struct bpf_program *prog,
+                                          enum bpf_attach_type type);
 
 bool bpf_program__is_socket_filter(struct bpf_program *prog);
 bool bpf_program__is_tracepoint(struct bpf_program *prog);
@@ -204,10 +206,10 @@ bool bpf_program__is_xdp(struct bpf_program *prog);
 bool bpf_program__is_perf_event(struct bpf_program *prog);
 
 /*
- * We don't need __attribute__((packed)) now since it is
- * unnecessary for 'bpf_map_def' because they are all aligned.
- * In addition, using it will trigger -Wpacked warning message,
- * and will be treated as an error due to -Werror.
+ * No need for __attribute__((packed)), all members of 'bpf_map_def'
+ * are all aligned.  In addition, using __attribute__((packed))
+ * would trigger a -Wpacked warning message, and lead to an error
+ * if -Werror is set.
  */
 struct bpf_map_def {
        unsigned int type;
@@ -218,8 +220,8 @@ struct bpf_map_def {
 };
 
 /*
- * There is another 'struct bpf_map' in include/linux/map.h. However,
- * it is not a uapi header so no need to consider name clash.
+ * The 'struct bpf_map' in include/linux/bpf.h is internal to the kernel,
+ * so no need to worry about a name clash.
  */
 struct bpf_map;
 struct bpf_map *
@@ -227,7 +229,7 @@ bpf_object__find_map_by_name(struct bpf_object *obj, const char *name);
 
 /*
  * Get bpf_map through the offset of corresponding struct bpf_map_def
- * in the bpf object file.
+ * in the BPF object file.
  */
 struct bpf_map *
 bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset);
@@ -257,6 +259,7 @@ struct bpf_prog_load_attr {
        const char *file;
        enum bpf_prog_type prog_type;
        enum bpf_attach_type expected_attach_type;
+       int ifindex;
 };
 
 int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
@@ -265,4 +268,17 @@ int bpf_prog_load(const char *file, enum bpf_prog_type type,
                  struct bpf_object **pobj, int *prog_fd);
 
 int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags);
+
+enum bpf_perf_event_ret {
+       LIBBPF_PERF_EVENT_DONE  = 0,
+       LIBBPF_PERF_EVENT_ERROR = -1,
+       LIBBPF_PERF_EVENT_CONT  = -2,
+};
+
+typedef enum bpf_perf_event_ret (*bpf_perf_event_print_t)(void *event,
+                                                         void *priv);
+int bpf_perf_event_read_simple(void *mem, unsigned long size,
+                              unsigned long page_size,
+                              void **buf, size_t *buf_len,
+                              bpf_perf_event_print_t fn, void *priv);
 #endif
index f6a1babcbac416d0893b72004ce1261d6373d989..cb7154eccbdc1e6a825060b137155ce4560df2a4 100644 (file)
@@ -433,7 +433,7 @@ static int parse_long_opt(struct parse_opt_ctx_t *p, const char *arg,
 
        if (ambiguous_option) {
                 fprintf(stderr,
-                        " Error: Ambiguous option: %s (could be --%s%s or --%s%s)",
+                        " Error: Ambiguous option: %s (could be --%s%s or --%s%s)\n",
                         arg,
                         (ambiguous_flags & OPT_UNSET) ?  "no-" : "",
                         ambiguous_option->long_name,
@@ -458,7 +458,7 @@ static void check_typos(const char *arg, const struct option *options)
                return;
 
        if (strstarts(arg, "no-")) {
-               fprintf(stderr, " Error: did you mean `--%s` (with two dashes ?)", arg);
+               fprintf(stderr, " Error: did you mean `--%s` (with two dashes ?)\n", arg);
                exit(129);
        }
 
@@ -466,7 +466,7 @@ static void check_typos(const char *arg, const struct option *options)
                if (!options->long_name)
                        continue;
                if (strstarts(options->long_name, arg)) {
-                       fprintf(stderr, " Error: did you mean `--%s` (with two dashes ?)", arg);
+                       fprintf(stderr, " Error: did you mean `--%s` (with two dashes ?)\n", arg);
                        exit(129);
                }
        }
index 8ae824dbfca3fea24889064aca1fd46d60f0cbf2..f76d9914686a238a976b7230b4c21b9a20092fbe 100644 (file)
@@ -31,8 +31,8 @@ INCLUDES := -I$(srctree)/tools/include \
            -I$(srctree)/tools/arch/$(HOSTARCH)/include/uapi \
            -I$(srctree)/tools/objtool/arch/$(ARCH)/include
 WARNINGS := $(EXTRA_WARNINGS) -Wno-switch-default -Wno-switch-enum -Wno-packed
-CFLAGS   += -Wall -Werror $(WARNINGS) -fomit-frame-pointer -O2 -g $(INCLUDES)
-LDFLAGS  += -lelf $(LIBSUBCMD)
+CFLAGS   += -Werror $(WARNINGS) $(HOSTCFLAGS) -g $(INCLUDES)
+LDFLAGS  += -lelf $(LIBSUBCMD) $(HOSTLDFLAGS)
 
 # Allow old libelf to be used:
 elfshdr := $(shell echo '$(pound)include <libelf.h>' | $(CC) $(CFLAGS) -x c -E - | grep elf_getshdr)
index b3e32b010ab194ed613034234c403c4067502776..c2c01f84df75f1f9b35a3c898686a82973026d88 100644 (file)
@@ -208,4 +208,22 @@ static inline int insn_offset_immediate(struct insn *insn)
        return insn_offset_displacement(insn) + insn->displacement.nbytes;
 }
 
+#define POP_SS_OPCODE 0x1f
+#define MOV_SREG_OPCODE 0x8e
+
+/*
+ * Intel SDM Vol.3A 6.8.3 states;
+ * "Any single-step trap that would be delivered following the MOV to SS
+ * instruction or POP to SS instruction (because EFLAGS.TF is 1) is
+ * suppressed."
+ * This function returns true if @insn is MOV SS or POP SS. On these
+ * instructions, single stepping is suppressed.
+ */
+static inline int insn_masking_exception(struct insn *insn)
+{
+       return insn->opcode.bytes[0] == POP_SS_OPCODE ||
+               (insn->opcode.bytes[0] == MOV_SREG_OPCODE &&
+                X86_MODRM_REG(insn->modrm.bytes[0]) == 2);
+}
+
 #endif /* _ASM_X86_INSN_H */
index 5409f6f6c48d63128642454572f215717aac22f2..3a31b238f88564a94943ebd4622b887dc0815193 100644 (file)
@@ -59,6 +59,31 @@ static struct instruction *next_insn_same_sec(struct objtool_file *file,
        return next;
 }
 
+static struct instruction *next_insn_same_func(struct objtool_file *file,
+                                              struct instruction *insn)
+{
+       struct instruction *next = list_next_entry(insn, list);
+       struct symbol *func = insn->func;
+
+       if (!func)
+               return NULL;
+
+       if (&next->list != &file->insn_list && next->func == func)
+               return next;
+
+       /* Check if we're already in the subfunction: */
+       if (func == func->cfunc)
+               return NULL;
+
+       /* Move to the subfunction: */
+       return find_insn(file, func->cfunc->sec, func->cfunc->offset);
+}
+
+#define func_for_each_insn_all(file, func, insn)                       \
+       for (insn = find_insn(file, func->sec, func->offset);           \
+            insn;                                                      \
+            insn = next_insn_same_func(file, insn))
+
 #define func_for_each_insn(file, func, insn)                           \
        for (insn = find_insn(file, func->sec, func->offset);           \
             insn && &insn->list != &file->insn_list &&                 \
@@ -149,10 +174,14 @@ static int __dead_end_function(struct objtool_file *file, struct symbol *func,
                        if (!strcmp(func->name, global_noreturns[i]))
                                return 1;
 
-       if (!func->sec)
+       if (!func->len)
                return 0;
 
-       func_for_each_insn(file, func, insn) {
+       insn = find_insn(file, func->sec, func->offset);
+       if (!insn->func)
+               return 0;
+
+       func_for_each_insn_all(file, func, insn) {
                empty = false;
 
                if (insn->type == INSN_RETURN)
@@ -167,35 +196,28 @@ static int __dead_end_function(struct objtool_file *file, struct symbol *func,
         * case, the function's dead-end status depends on whether the target
         * of the sibling call returns.
         */
-       func_for_each_insn(file, func, insn) {
-               if (insn->sec != func->sec ||
-                   insn->offset >= func->offset + func->len)
-                       break;
-
+       func_for_each_insn_all(file, func, insn) {
                if (insn->type == INSN_JUMP_UNCONDITIONAL) {
                        struct instruction *dest = insn->jump_dest;
-                       struct symbol *dest_func;
 
                        if (!dest)
                                /* sibling call to another file */
                                return 0;
 
-                       if (dest->sec != func->sec ||
-                           dest->offset < func->offset ||
-                           dest->offset >= func->offset + func->len) {
-                               /* local sibling call */
-                               dest_func = find_symbol_by_offset(dest->sec,
-                                                                 dest->offset);
-                               if (!dest_func)
-                                       continue;
+                       if (dest->func && dest->func->pfunc != insn->func->pfunc) {
 
+                               /* local sibling call */
                                if (recursion == 5) {
-                                       WARN_FUNC("infinite recursion (objtool bug!)",
-                                                 dest->sec, dest->offset);
-                                       return -1;
+                                       /*
+                                        * Infinite recursion: two functions
+                                        * have sibling calls to each other.
+                                        * This is a very rare case.  It means
+                                        * they aren't dead ends.
+                                        */
+                                       return 0;
                                }
 
-                               return __dead_end_function(file, dest_func,
+                               return __dead_end_function(file, dest->func,
                                                           recursion + 1);
                        }
                }
@@ -422,7 +444,7 @@ static void add_ignores(struct objtool_file *file)
                        if (!ignore_func(file, func))
                                continue;
 
-                       func_for_each_insn(file, func, insn)
+                       func_for_each_insn_all(file, func, insn)
                                insn->ignore = true;
                }
        }
@@ -782,30 +804,35 @@ static int add_special_section_alts(struct objtool_file *file)
        return ret;
 }
 
-static int add_switch_table(struct objtool_file *file, struct symbol *func,
-                           struct instruction *insn, struct rela *table,
-                           struct rela *next_table)
+static int add_switch_table(struct objtool_file *file, struct instruction *insn,
+                           struct rela *table, struct rela *next_table)
 {
        struct rela *rela = table;
        struct instruction *alt_insn;
        struct alternative *alt;
+       struct symbol *pfunc = insn->func->pfunc;
+       unsigned int prev_offset = 0;
 
        list_for_each_entry_from(rela, &file->rodata->rela->rela_list, list) {
                if (rela == next_table)
                        break;
 
-               if (rela->sym->sec != insn->sec ||
-                   rela->addend <= func->offset ||
-                   rela->addend >= func->offset + func->len)
+               /* Make sure the switch table entries are consecutive: */
+               if (prev_offset && rela->offset != prev_offset + 8)
                        break;
 
-               alt_insn = find_insn(file, insn->sec, rela->addend);
-               if (!alt_insn) {
-                       WARN("%s: can't find instruction at %s+0x%x",
-                            file->rodata->rela->name, insn->sec->name,
-                            rela->addend);
-                       return -1;
-               }
+               /* Detect function pointers from contiguous objects: */
+               if (rela->sym->sec == pfunc->sec &&
+                   rela->addend == pfunc->offset)
+                       break;
+
+               alt_insn = find_insn(file, rela->sym->sec, rela->addend);
+               if (!alt_insn)
+                       break;
+
+               /* Make sure the jmp dest is in the function or subfunction: */
+               if (alt_insn->func->pfunc != pfunc)
+                       break;
 
                alt = malloc(sizeof(*alt));
                if (!alt) {
@@ -815,6 +842,13 @@ static int add_switch_table(struct objtool_file *file, struct symbol *func,
 
                alt->insn = alt_insn;
                list_add_tail(&alt->list, &insn->alts);
+               prev_offset = rela->offset;
+       }
+
+       if (!prev_offset) {
+               WARN_FUNC("can't find switch jump table",
+                         insn->sec, insn->offset);
+               return -1;
        }
 
        return 0;
@@ -869,40 +903,21 @@ static struct rela *find_switch_table(struct objtool_file *file,
 {
        struct rela *text_rela, *rodata_rela;
        struct instruction *orig_insn = insn;
+       unsigned long table_offset;
 
-       text_rela = find_rela_by_dest_range(insn->sec, insn->offset, insn->len);
-       if (text_rela && text_rela->sym == file->rodata->sym) {
-               /* case 1 */
-               rodata_rela = find_rela_by_dest(file->rodata,
-                                               text_rela->addend);
-               if (rodata_rela)
-                       return rodata_rela;
-
-               /* case 2 */
-               rodata_rela = find_rela_by_dest(file->rodata,
-                                               text_rela->addend + 4);
-               if (!rodata_rela)
-                       return NULL;
-
-               file->ignore_unreachables = true;
-               return rodata_rela;
-       }
-
-       /* case 3 */
        /*
         * Backward search using the @first_jump_src links, these help avoid
         * much of the 'in between' code. Which avoids us getting confused by
         * it.
         */
-       for (insn = list_prev_entry(insn, list);
-
+       for (;
             &insn->list != &file->insn_list &&
             insn->sec == func->sec &&
             insn->offset >= func->offset;
 
             insn = insn->first_jump_src ?: list_prev_entry(insn, list)) {
 
-               if (insn->type == INSN_JUMP_DYNAMIC)
+               if (insn != orig_insn && insn->type == INSN_JUMP_DYNAMIC)
                        break;
 
                /* allow small jumps within the range */
@@ -918,18 +933,29 @@ static struct rela *find_switch_table(struct objtool_file *file,
                if (!text_rela || text_rela->sym != file->rodata->sym)
                        continue;
 
+               table_offset = text_rela->addend;
+               if (text_rela->type == R_X86_64_PC32)
+                       table_offset += 4;
+
                /*
                 * Make sure the .rodata address isn't associated with a
                 * symbol.  gcc jump tables are anonymous data.
                 */
-               if (find_symbol_containing(file->rodata, text_rela->addend))
+               if (find_symbol_containing(file->rodata, table_offset))
                        continue;
 
-               rodata_rela = find_rela_by_dest(file->rodata, text_rela->addend);
-               if (!rodata_rela)
-                       continue;
+               rodata_rela = find_rela_by_dest(file->rodata, table_offset);
+               if (rodata_rela) {
+                       /*
+                        * Use of RIP-relative switch jumps is quite rare, and
+                        * indicates a rare GCC quirk/bug which can leave dead
+                        * code behind.
+                        */
+                       if (text_rela->type == R_X86_64_PC32)
+                               file->ignore_unreachables = true;
 
-               return rodata_rela;
+                       return rodata_rela;
+               }
        }
 
        return NULL;
@@ -943,7 +969,7 @@ static int add_func_switch_tables(struct objtool_file *file,
        struct rela *rela, *prev_rela = NULL;
        int ret;
 
-       func_for_each_insn(file, func, insn) {
+       func_for_each_insn_all(file, func, insn) {
                if (!last)
                        last = insn;
 
@@ -974,8 +1000,7 @@ static int add_func_switch_tables(struct objtool_file *file,
                 * the beginning of another switch table in the same function.
                 */
                if (prev_jump) {
-                       ret = add_switch_table(file, func, prev_jump, prev_rela,
-                                              rela);
+                       ret = add_switch_table(file, prev_jump, prev_rela, rela);
                        if (ret)
                                return ret;
                }
@@ -985,7 +1010,7 @@ static int add_func_switch_tables(struct objtool_file *file,
        }
 
        if (prev_jump) {
-               ret = add_switch_table(file, func, prev_jump, prev_rela, NULL);
+               ret = add_switch_table(file, prev_jump, prev_rela, NULL);
                if (ret)
                        return ret;
        }
@@ -1749,15 +1774,13 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
        while (1) {
                next_insn = next_insn_same_sec(file, insn);
 
-
-               if (file->c_file && func && insn->func && func != insn->func) {
+               if (file->c_file && func && insn->func && func != insn->func->pfunc) {
                        WARN("%s() falls through to next function %s()",
                             func->name, insn->func->name);
                        return 1;
                }
 
-               if (insn->func)
-                       func = insn->func;
+               func = insn->func ? insn->func->pfunc : NULL;
 
                if (func && insn->ignore) {
                        WARN_FUNC("BUG: why am I validating an ignored function?",
@@ -1778,7 +1801,7 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
 
                                i = insn;
                                save_insn = NULL;
-                               func_for_each_insn_continue_reverse(file, func, i) {
+                               func_for_each_insn_continue_reverse(file, insn->func, i) {
                                        if (i->save) {
                                                save_insn = i;
                                                break;
@@ -1865,7 +1888,7 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
                case INSN_JUMP_UNCONDITIONAL:
                        if (insn->jump_dest &&
                            (!func || !insn->jump_dest->func ||
-                            func == insn->jump_dest->func)) {
+                            insn->jump_dest->func->pfunc == func)) {
                                ret = validate_branch(file, insn->jump_dest,
                                                      state);
                                if (ret)
@@ -2060,7 +2083,7 @@ static int validate_functions(struct objtool_file *file)
 
        for_each_sec(file, sec) {
                list_for_each_entry(func, &sec->symbol_list, list) {
-                       if (func->type != STT_FUNC)
+                       if (func->type != STT_FUNC || func->pfunc != func)
                                continue;
 
                        insn = find_insn(file, sec, func->offset);
index c1c338661699788c8189becaab8465ed1bdcd775..4e60e105583ee803916589ca56df0e81e12b8fb3 100644 (file)
@@ -79,6 +79,19 @@ struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset)
        return NULL;
 }
 
+struct symbol *find_symbol_by_name(struct elf *elf, const char *name)
+{
+       struct section *sec;
+       struct symbol *sym;
+
+       list_for_each_entry(sec, &elf->sections, list)
+               list_for_each_entry(sym, &sec->symbol_list, list)
+                       if (!strcmp(sym->name, name))
+                               return sym;
+
+       return NULL;
+}
+
 struct symbol *find_symbol_containing(struct section *sec, unsigned long offset)
 {
        struct symbol *sym;
@@ -203,10 +216,11 @@ static int read_sections(struct elf *elf)
 
 static int read_symbols(struct elf *elf)
 {
-       struct section *symtab;
-       struct symbol *sym;
+       struct section *symtab, *sec;
+       struct symbol *sym, *pfunc;
        struct list_head *entry, *tmp;
        int symbols_nr, i;
+       char *coldstr;
 
        symtab = find_section_by_name(elf, ".symtab");
        if (!symtab) {
@@ -281,6 +295,30 @@ static int read_symbols(struct elf *elf)
                hash_add(sym->sec->symbol_hash, &sym->hash, sym->idx);
        }
 
+       /* Create parent/child links for any cold subfunctions */
+       list_for_each_entry(sec, &elf->sections, list) {
+               list_for_each_entry(sym, &sec->symbol_list, list) {
+                       if (sym->type != STT_FUNC)
+                               continue;
+                       sym->pfunc = sym->cfunc = sym;
+                       coldstr = strstr(sym->name, ".cold.");
+                       if (coldstr) {
+                               coldstr[0] = '\0';
+                               pfunc = find_symbol_by_name(elf, sym->name);
+                               coldstr[0] = '.';
+
+                               if (!pfunc) {
+                                       WARN("%s(): can't find parent function",
+                                            sym->name);
+                                       goto err;
+                               }
+
+                               sym->pfunc = pfunc;
+                               pfunc->cfunc = sym;
+                       }
+               }
+       }
+
        return 0;
 
 err:
index d86e2ff14466148d3b8ae46065274956a5b3c4f8..de5cd2ddded987bf524be46e446bd1e814422761 100644 (file)
@@ -61,6 +61,7 @@ struct symbol {
        unsigned char bind, type;
        unsigned long offset;
        unsigned int len;
+       struct symbol *pfunc, *cfunc;
 };
 
 struct rela {
@@ -86,6 +87,7 @@ struct elf {
 struct elf *elf_open(const char *name, int flags);
 struct section *find_section_by_name(struct elf *elf, const char *name);
 struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset);
+struct symbol *find_symbol_by_name(struct elf *elf, const char *name);
 struct symbol *find_symbol_containing(struct section *sec, unsigned long offset);
 struct rela *find_rela_by_dest(struct section *sec, unsigned long offset);
 struct rela *find_rela_by_dest_range(struct section *sec, unsigned long offset,
index 5b4fff3adc4be4ca66a30a9e8b85b55e1a472f4a..32f4a898e3f2f323fcd66021728192d8961b89f9 100644 (file)
@@ -334,6 +334,11 @@ annotate.*::
 
                99.93 │      mov    %eax,%eax
 
+       annotate.offset_level::
+               Default is '1', meaning just jump targets will have offsets show right beside
+               the instruction. When set to '2' 'call' instructions will also have its offsets
+               shown, 3 or higher will show offsets for all instructions.
+
 hist.*::
        hist.percentage::
                This option control the way to calculate overhead of filtered entries -
index b0211410969b39473dcc43158e195a005de16075..f8d2167cf3e7a2221b8688aff0f0fa55968db369 100644 (file)
@@ -28,29 +28,46 @@ OPTIONS
 <command>...::
        Any command you can specify in a shell.
 
+-i::
+--input=<file>::
+       Input file name.
+
 -f::
 --force::
        Don't do ownership validation
 
 -t::
---type=::
+--type=<type>::
        Select the memory operation type: load or store (default: load,store)
 
 -D::
---dump-raw-samples=::
+--dump-raw-samples::
        Dump the raw decoded samples on the screen in a format that is easy to parse with
        one sample per line.
 
 -x::
---field-separator::
+--field-separator=<separator>::
        Specify the field separator used when dump raw samples (-D option). By default,
        The separator is the space character.
 
 -C::
---cpu-list::
-       Restrict dump of raw samples to those provided via this option. Note that the same
-       option can be passed in record mode. It will be interpreted the same way as perf
-       record.
+--cpu=<cpu>::
+       Monitor only on the list of CPUs provided. Multiple CPUs can be provided as a
+        comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2. Default
+        is to monitor all CPUS.
+-U::
+--hide-unresolved::
+       Only display entries resolved to a symbol.
+
+-p::
+--phys-data::
+       Record/Report sample physical addresses
+
+RECORD OPTIONS
+--------------
+-e::
+--event <event>::
+       Event selector. Use 'perf mem record -e list' to list available events.
 
 -K::
 --all-kernel::
@@ -60,12 +77,15 @@ OPTIONS
 --all-user::
        Configure all used events to run in user space.
 
---ldload::
+-v::
+--verbose::
+       Be more verbose (show counter open errors, etc)
+
+--ldlat <n>::
        Specify desired latency for loads event.
 
--p::
---phys-data::
-       Record/Report sample physical addresses
+In addition, for report all perf report options are valid, and for record
+all perf record options.
 
 SEE ALSO
 --------
index bb33601a823b4e26f13f3fa218ebef2548af2a67..63f938b887dd135d31cc0e8928f650e23bbd3c0b 100644 (file)
@@ -104,8 +104,8 @@ OPTIONS for 'perf sched timehist'
     kallsyms pathname
 
 -g::
---no-call-graph::
-       Do not display call chains if present.
+--call-graph::
+       Display call chains if present (default on).
 
 --max-stack::
        Maximum number of functions to display in backtrace, default 5.
index 36ec0257f8d3ce1007de2fed447d72565c70c25d..afdafe2110a17adea848871e033a85f85972b4a1 100644 (file)
@@ -228,14 +228,15 @@ OPTIONS
        For sample events it's possible to display misc field with -F +misc option,
        following letters are displayed for each bit:
 
-         PERF_RECORD_MISC_KERNEL        K
-         PERF_RECORD_MISC_USER          U
-         PERF_RECORD_MISC_HYPERVISOR    H
-         PERF_RECORD_MISC_GUEST_KERNEL  G
-         PERF_RECORD_MISC_GUEST_USER    g
-         PERF_RECORD_MISC_MMAP_DATA*    M
-         PERF_RECORD_MISC_COMM_EXEC     E
-         PERF_RECORD_MISC_SWITCH_OUT    S
+         PERF_RECORD_MISC_KERNEL               K
+         PERF_RECORD_MISC_USER                 U
+         PERF_RECORD_MISC_HYPERVISOR           H
+         PERF_RECORD_MISC_GUEST_KERNEL         G
+         PERF_RECORD_MISC_GUEST_USER           g
+         PERF_RECORD_MISC_MMAP_DATA*           M
+         PERF_RECORD_MISC_COMM_EXEC            E
+         PERF_RECORD_MISC_SWITCH_OUT           S
+         PERF_RECORD_MISC_SWITCH_OUT_PREEMPT   Sp
 
          $ perf script -F +misc ...
           sched-messaging  1414 K     28690.636582:       4590 cycles ...
index f15b306be1834e33da6a58c1b5dd4e8201dc82cd..e6c3b4e555c257f033fd7bf2ef2635173664dac5 100644 (file)
@@ -153,7 +153,7 @@ perf stat --repeat 10 --null --sync --pre 'make -s O=defconfig-build/clean' -- m
 
 -I msecs::
 --interval-print msecs::
-Print count deltas every N milliseconds (minimum: 10ms)
+Print count deltas every N milliseconds (minimum: 1ms)
 The overhead percentage could be high in some cases, for instance with small, sub 100ms intervals.  Use with caution.
        example: 'perf stat -I 1000 -e cycles -a sleep 5'
 
index c7abd83a8e19d7adf42cc825443b69583edbf5b5..ae7dc46e8f8a3f4e74b53dcf1040a4e0b69a96b9 100644 (file)
@@ -68,7 +68,7 @@ ifeq ($(NO_PERF_REGS),0)
 endif
 
 ifneq ($(NO_SYSCALL_TABLE),1)
-  CFLAGS += -DHAVE_SYSCALL_TABLE
+  CFLAGS += -DHAVE_SYSCALL_TABLE_SUPPORT
 endif
 
 # So far there's only x86 and arm libdw unwind support merged in perf.
@@ -847,7 +847,7 @@ ifndef NO_JVMTI
   ifeq ($(feature-jvmti), 1)
     $(call detected_var,JDIR)
   else
-    $(warning No openjdk development package found, please install JDK package)
+    $(warning No openjdk development package found, please install JDK package, e.g. openjdk-8-jdk, java-1.8.0-openjdk-devel)
     NO_JVMTI := 1
   endif
 endif
diff --git a/tools/perf/arch/arm/include/arch-tests.h b/tools/perf/arch/arm/include/arch-tests.h
new file mode 100644 (file)
index 0000000..90ec4c8
--- /dev/null
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ARCH_TESTS_H
+#define ARCH_TESTS_H
+
+#ifdef HAVE_DWARF_UNWIND_SUPPORT
+struct thread;
+struct perf_sample;
+#endif
+
+extern struct test arch_tests[];
+
+#endif
index b30eff9bcc83fd14026100f8626e913fd8f954e9..883c57ff0c084fb00a5b4dffa6275840f97dc814 100644 (file)
@@ -1,2 +1,4 @@
 libperf-y += regs_load.o
 libperf-y += dwarf-unwind.o
+
+libperf-y += arch-tests.o
diff --git a/tools/perf/arch/arm/tests/arch-tests.c b/tools/perf/arch/arm/tests/arch-tests.c
new file mode 100644 (file)
index 0000000..5b1543c
--- /dev/null
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <string.h>
+#include "tests/tests.h"
+#include "arch-tests.h"
+
+struct test arch_tests[] = {
+#ifdef HAVE_DWARF_UNWIND_SUPPORT
+       {
+               .desc = "DWARF unwind",
+               .func = test__dwarf_unwind,
+       },
+#endif
+       {
+               .func = NULL,
+       },
+};
index fa639e3e52acb37b13dd63920eb963f509c9431b..1ce6bdbda561588fdc1ff4a6c8e99cf6ce7ebeeb 100644 (file)
@@ -1,18 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright(C) 2015 Linaro Limited. All rights reserved.
  * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include <stdbool.h>
index 5c655ad4621e7a8befc7e35aba12720062e3f561..2f595cd73da662be982a71f130f045f734c29fce 100644 (file)
@@ -1,18 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright(C) 2015 Linaro Limited. All rights reserved.
  * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include <api/fs/fs.h>
index 5256741be5496f2e6f42c1026b94ec93653f33c7..1a12e64f51279293dd1f515b84444116d794afbc 100644 (file)
@@ -1,18 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright(C) 2015 Linaro Limited. All rights reserved.
  * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef INCLUDE__PERF_CS_ETM_H__
index ac4dffc807b89f81d29bb70517471ff827755984..e047571e60800e34aeb4fc8f141289132ef22387 100644 (file)
@@ -1,18 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright(C) 2015 Linaro Limited. All rights reserved.
  * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include <string.h>
index 6cb48e4cffd9a1602525510a5ea4129acd69958a..3afe8256eff275ef94c277dfae9a8c865615d681 100644 (file)
@@ -87,6 +87,7 @@ struct auxtrace_record *auxtrace_record__init(struct perf_evlist *evlist,
        struct perf_evsel *pos;
        int diagnose = 0;
 
+       *err = 0;
        if (evlist->nr_entries == 0)
                return NULL;
 
index a4c30f1c70bec19d2175a35477e3ee3ab7bc77d3..163b92f339980e4d75208bf682607d6104c4f072 100644 (file)
@@ -146,21 +146,3 @@ char *get_cpuid_str(struct perf_pmu *pmu __maybe_unused)
                zfree(&buf);
        return buf;
 }
-
-/*
- * Compare the cpuid string returned by get_cpuid() function
- * with the name generated by the jevents file read from
- * pmu-events/arch/s390/mapfile.csv.
- *
- * Parameter mapcpuid is the cpuid as stored in the
- * pmu-events/arch/s390/mapfile.csv. This is just the type number.
- * Parameter cpuid is the cpuid returned by function get_cpuid().
- */
-int strcmp_cpuid_str(const char *mapcpuid, const char *cpuid)
-{
-       char *cp = strchr(cpuid, ',');
-
-       if (cp == NULL)
-               return -1;
-       return strncmp(cp + 1, mapcpuid, strlen(mapcpuid));
-}
index d74eaa7aa927d5a3abf5b5cdfe054e0381572f1b..1a38e78117ce6f410cc0521e09fedeaa31cf8519 100644 (file)
@@ -21,7 +21,7 @@ _dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)')
 $(header): $(sys)/syscall_64.tbl $(systbl)
        @(test -d ../../kernel -a -d ../../tools -a -d ../perf && ( \
         (diff -B arch/x86/entry/syscalls/syscall_64.tbl ../../arch/x86/entry/syscalls/syscall_64.tbl >/dev/null) \
-        || echo "Warning: Kernel ABI header at 'tools/arch/x86/entry/syscalls/syscall_64.tbl' differs from latest version at 'arch/x86/entry/syscalls/syscall_64.tbl'" >&2 )) || true
+        || echo "Warning: Kernel ABI header at 'tools/perf/arch/x86/entry/syscalls/syscall_64.tbl' differs from latest version at 'arch/x86/entry/syscalls/syscall_64.tbl'" >&2 )) || true
        $(Q)$(SHELL) '$(systbl)' $(sys)/syscall_64.tbl 'x86_64' > $@
 
 clean::
index 5bd1ba8c02829e8f571e7d6453b6ea76165d8445..44f5aba78210e9892bedb25590a8983c1e5ae2d0 100644 (file)
@@ -1,21 +1,43 @@
 // SPDX-License-Identifier: GPL-2.0
 static struct ins x86__instructions[] = {
+       { .name = "adc",        .ops = &mov_ops,  },
+       { .name = "adcb",       .ops = &mov_ops,  },
+       { .name = "adcl",       .ops = &mov_ops,  },
        { .name = "add",        .ops = &mov_ops,  },
        { .name = "addl",       .ops = &mov_ops,  },
        { .name = "addq",       .ops = &mov_ops,  },
+       { .name = "addsd",      .ops = &mov_ops,  },
        { .name = "addw",       .ops = &mov_ops,  },
        { .name = "and",        .ops = &mov_ops,  },
+       { .name = "andb",       .ops = &mov_ops,  },
+       { .name = "andl",       .ops = &mov_ops,  },
+       { .name = "andpd",      .ops = &mov_ops,  },
+       { .name = "andps",      .ops = &mov_ops,  },
+       { .name = "andq",       .ops = &mov_ops,  },
+       { .name = "andw",       .ops = &mov_ops,  },
+       { .name = "bsr",        .ops = &mov_ops,  },
+       { .name = "bt",         .ops = &mov_ops,  },
+       { .name = "btr",        .ops = &mov_ops,  },
        { .name = "bts",        .ops = &mov_ops,  },
+       { .name = "btsq",       .ops = &mov_ops,  },
        { .name = "call",       .ops = &call_ops, },
        { .name = "callq",      .ops = &call_ops, },
+       { .name = "cmovbe",     .ops = &mov_ops,  },
+       { .name = "cmove",      .ops = &mov_ops,  },
+       { .name = "cmovae",     .ops = &mov_ops,  },
        { .name = "cmp",        .ops = &mov_ops,  },
        { .name = "cmpb",       .ops = &mov_ops,  },
        { .name = "cmpl",       .ops = &mov_ops,  },
        { .name = "cmpq",       .ops = &mov_ops,  },
        { .name = "cmpw",       .ops = &mov_ops,  },
        { .name = "cmpxch",     .ops = &mov_ops,  },
+       { .name = "cmpxchg",    .ops = &mov_ops,  },
+       { .name = "cs",         .ops = &mov_ops,  },
        { .name = "dec",        .ops = &dec_ops,  },
        { .name = "decl",       .ops = &dec_ops,  },
+       { .name = "divsd",      .ops = &mov_ops,  },
+       { .name = "divss",      .ops = &mov_ops,  },
+       { .name = "gs",         .ops = &mov_ops,  },
        { .name = "imul",       .ops = &mov_ops,  },
        { .name = "inc",        .ops = &dec_ops,  },
        { .name = "incl",       .ops = &dec_ops,  },
@@ -57,25 +79,68 @@ static struct ins x86__instructions[] = {
        { .name = "lea",        .ops = &mov_ops,  },
        { .name = "lock",       .ops = &lock_ops, },
        { .name = "mov",        .ops = &mov_ops,  },
+       { .name = "movapd",     .ops = &mov_ops,  },
+       { .name = "movaps",     .ops = &mov_ops,  },
        { .name = "movb",       .ops = &mov_ops,  },
        { .name = "movdqa",     .ops = &mov_ops,  },
+       { .name = "movdqu",     .ops = &mov_ops,  },
        { .name = "movl",       .ops = &mov_ops,  },
        { .name = "movq",       .ops = &mov_ops,  },
+       { .name = "movsd",      .ops = &mov_ops,  },
        { .name = "movslq",     .ops = &mov_ops,  },
+       { .name = "movss",      .ops = &mov_ops,  },
+       { .name = "movupd",     .ops = &mov_ops,  },
+       { .name = "movups",     .ops = &mov_ops,  },
+       { .name = "movw",       .ops = &mov_ops,  },
        { .name = "movzbl",     .ops = &mov_ops,  },
        { .name = "movzwl",     .ops = &mov_ops,  },
+       { .name = "mulsd",      .ops = &mov_ops,  },
+       { .name = "mulss",      .ops = &mov_ops,  },
        { .name = "nop",        .ops = &nop_ops,  },
        { .name = "nopl",       .ops = &nop_ops,  },
        { .name = "nopw",       .ops = &nop_ops,  },
        { .name = "or",         .ops = &mov_ops,  },
+       { .name = "orb",        .ops = &mov_ops,  },
        { .name = "orl",        .ops = &mov_ops,  },
+       { .name = "orps",       .ops = &mov_ops,  },
+       { .name = "orq",        .ops = &mov_ops,  },
+       { .name = "pand",       .ops = &mov_ops,  },
+       { .name = "paddq",      .ops = &mov_ops,  },
+       { .name = "pcmpeqb",    .ops = &mov_ops,  },
+       { .name = "por",        .ops = &mov_ops,  },
+       { .name = "rclb",       .ops = &mov_ops,  },
+       { .name = "rcll",       .ops = &mov_ops,  },
+       { .name = "retq",       .ops = &ret_ops,  },
+       { .name = "sbb",        .ops = &mov_ops,  },
+       { .name = "sbbl",       .ops = &mov_ops,  },
+       { .name = "sete",       .ops = &mov_ops,  },
+       { .name = "sub",        .ops = &mov_ops,  },
+       { .name = "subl",       .ops = &mov_ops,  },
+       { .name = "subq",       .ops = &mov_ops,  },
+       { .name = "subsd",      .ops = &mov_ops,  },
+       { .name = "subw",       .ops = &mov_ops,  },
        { .name = "test",       .ops = &mov_ops,  },
        { .name = "testb",      .ops = &mov_ops,  },
        { .name = "testl",      .ops = &mov_ops,  },
+       { .name = "ucomisd",    .ops = &mov_ops,  },
+       { .name = "ucomiss",    .ops = &mov_ops,  },
+       { .name = "vaddsd",     .ops = &mov_ops,  },
+       { .name = "vandpd",     .ops = &mov_ops,  },
+       { .name = "vmovdqa",    .ops = &mov_ops,  },
+       { .name = "vmovq",      .ops = &mov_ops,  },
+       { .name = "vmovsd",     .ops = &mov_ops,  },
+       { .name = "vmulsd",     .ops = &mov_ops,  },
+       { .name = "vorpd",      .ops = &mov_ops,  },
+       { .name = "vsubsd",     .ops = &mov_ops,  },
+       { .name = "vucomisd",   .ops = &mov_ops,  },
        { .name = "xadd",       .ops = &mov_ops,  },
        { .name = "xbeginl",    .ops = &jump_ops, },
        { .name = "xbeginq",    .ops = &jump_ops, },
-       { .name = "retq",       .ops = &ret_ops,  },
+       { .name = "xchg",       .ops = &mov_ops,  },
+       { .name = "xor",        .ops = &mov_ops, },
+       { .name = "xorb",       .ops = &mov_ops, },
+       { .name = "xorpd",      .ops = &mov_ops, },
+       { .name = "xorps",      .ops = &mov_ops, },
 };
 
 static bool x86__ins_is_fused(struct arch *arch, const char *ins1,
index 5aef183e2f85c5f6c45e44d4e9a68c9ea62c0d74..4dfe42666d0ce6e20214e70f0c2a6a3884106290 100644 (file)
 # The format is:
 # <number> <abi> <name> <entry point>
 #
+# The __x64_sys_*() stubs are created on-the-fly for sys_*() system calls
+#
 # The abi is "common", "64" or "x32" for this file.
 #
-0      common  read                    sys_read
-1      common  write                   sys_write
-2      common  open                    sys_open
-3      common  close                   sys_close
-4      common  stat                    sys_newstat
-5      common  fstat                   sys_newfstat
-6      common  lstat                   sys_newlstat
-7      common  poll                    sys_poll
-8      common  lseek                   sys_lseek
-9      common  mmap                    sys_mmap
-10     common  mprotect                sys_mprotect
-11     common  munmap                  sys_munmap
-12     common  brk                     sys_brk
-13     64      rt_sigaction            sys_rt_sigaction
-14     common  rt_sigprocmask          sys_rt_sigprocmask
-15     64      rt_sigreturn            sys_rt_sigreturn/ptregs
-16     64      ioctl                   sys_ioctl
-17     common  pread64                 sys_pread64
-18     common  pwrite64                sys_pwrite64
-19     64      readv                   sys_readv
-20     64      writev                  sys_writev
-21     common  access                  sys_access
-22     common  pipe                    sys_pipe
-23     common  select                  sys_select
-24     common  sched_yield             sys_sched_yield
-25     common  mremap                  sys_mremap
-26     common  msync                   sys_msync
-27     common  mincore                 sys_mincore
-28     common  madvise                 sys_madvise
-29     common  shmget                  sys_shmget
-30     common  shmat                   sys_shmat
-31     common  shmctl                  sys_shmctl
-32     common  dup                     sys_dup
-33     common  dup2                    sys_dup2
-34     common  pause                   sys_pause
-35     common  nanosleep               sys_nanosleep
-36     common  getitimer               sys_getitimer
-37     common  alarm                   sys_alarm
-38     common  setitimer               sys_setitimer
-39     common  getpid                  sys_getpid
-40     common  sendfile                sys_sendfile64
-41     common  socket                  sys_socket
-42     common  connect                 sys_connect
-43     common  accept                  sys_accept
-44     common  sendto                  sys_sendto
-45     64      recvfrom                sys_recvfrom
-46     64      sendmsg                 sys_sendmsg
-47     64      recvmsg                 sys_recvmsg
-48     common  shutdown                sys_shutdown
-49     common  bind                    sys_bind
-50     common  listen                  sys_listen
-51     common  getsockname             sys_getsockname
-52     common  getpeername             sys_getpeername
-53     common  socketpair              sys_socketpair
-54     64      setsockopt              sys_setsockopt
-55     64      getsockopt              sys_getsockopt
-56     common  clone                   sys_clone/ptregs
-57     common  fork                    sys_fork/ptregs
-58     common  vfork                   sys_vfork/ptregs
-59     64      execve                  sys_execve/ptregs
-60     common  exit                    sys_exit
-61     common  wait4                   sys_wait4
-62     common  kill                    sys_kill
-63     common  uname                   sys_newuname
-64     common  semget                  sys_semget
-65     common  semop                   sys_semop
-66     common  semctl                  sys_semctl
-67     common  shmdt                   sys_shmdt
-68     common  msgget                  sys_msgget
-69     common  msgsnd                  sys_msgsnd
-70     common  msgrcv                  sys_msgrcv
-71     common  msgctl                  sys_msgctl
-72     common  fcntl                   sys_fcntl
-73     common  flock                   sys_flock
-74     common  fsync                   sys_fsync
-75     common  fdatasync               sys_fdatasync
-76     common  truncate                sys_truncate
-77     common  ftruncate               sys_ftruncate
-78     common  getdents                sys_getdents
-79     common  getcwd                  sys_getcwd
-80     common  chdir                   sys_chdir
-81     common  fchdir                  sys_fchdir
-82     common  rename                  sys_rename
-83     common  mkdir                   sys_mkdir
-84     common  rmdir                   sys_rmdir
-85     common  creat                   sys_creat
-86     common  link                    sys_link
-87     common  unlink                  sys_unlink
-88     common  symlink                 sys_symlink
-89     common  readlink                sys_readlink
-90     common  chmod                   sys_chmod
-91     common  fchmod                  sys_fchmod
-92     common  chown                   sys_chown
-93     common  fchown                  sys_fchown
-94     common  lchown                  sys_lchown
-95     common  umask                   sys_umask
-96     common  gettimeofday            sys_gettimeofday
-97     common  getrlimit               sys_getrlimit
-98     common  getrusage               sys_getrusage
-99     common  sysinfo                 sys_sysinfo
-100    common  times                   sys_times
-101    64      ptrace                  sys_ptrace
-102    common  getuid                  sys_getuid
-103    common  syslog                  sys_syslog
-104    common  getgid                  sys_getgid
-105    common  setuid                  sys_setuid
-106    common  setgid                  sys_setgid
-107    common  geteuid                 sys_geteuid
-108    common  getegid                 sys_getegid
-109    common  setpgid                 sys_setpgid
-110    common  getppid                 sys_getppid
-111    common  getpgrp                 sys_getpgrp
-112    common  setsid                  sys_setsid
-113    common  setreuid                sys_setreuid
-114    common  setregid                sys_setregid
-115    common  getgroups               sys_getgroups
-116    common  setgroups               sys_setgroups
-117    common  setresuid               sys_setresuid
-118    common  getresuid               sys_getresuid
-119    common  setresgid               sys_setresgid
-120    common  getresgid               sys_getresgid
-121    common  getpgid                 sys_getpgid
-122    common  setfsuid                sys_setfsuid
-123    common  setfsgid                sys_setfsgid
-124    common  getsid                  sys_getsid
-125    common  capget                  sys_capget
-126    common  capset                  sys_capset
-127    64      rt_sigpending           sys_rt_sigpending
-128    64      rt_sigtimedwait         sys_rt_sigtimedwait
-129    64      rt_sigqueueinfo         sys_rt_sigqueueinfo
-130    common  rt_sigsuspend           sys_rt_sigsuspend
-131    64      sigaltstack             sys_sigaltstack
-132    common  utime                   sys_utime
-133    common  mknod                   sys_mknod
+0      common  read                    __x64_sys_read
+1      common  write                   __x64_sys_write
+2      common  open                    __x64_sys_open
+3      common  close                   __x64_sys_close
+4      common  stat                    __x64_sys_newstat
+5      common  fstat                   __x64_sys_newfstat
+6      common  lstat                   __x64_sys_newlstat
+7      common  poll                    __x64_sys_poll
+8      common  lseek                   __x64_sys_lseek
+9      common  mmap                    __x64_sys_mmap
+10     common  mprotect                __x64_sys_mprotect
+11     common  munmap                  __x64_sys_munmap
+12     common  brk                     __x64_sys_brk
+13     64      rt_sigaction            __x64_sys_rt_sigaction
+14     common  rt_sigprocmask          __x64_sys_rt_sigprocmask
+15     64      rt_sigreturn            __x64_sys_rt_sigreturn/ptregs
+16     64      ioctl                   __x64_sys_ioctl
+17     common  pread64                 __x64_sys_pread64
+18     common  pwrite64                __x64_sys_pwrite64
+19     64      readv                   __x64_sys_readv
+20     64      writev                  __x64_sys_writev
+21     common  access                  __x64_sys_access
+22     common  pipe                    __x64_sys_pipe
+23     common  select                  __x64_sys_select
+24     common  sched_yield             __x64_sys_sched_yield
+25     common  mremap                  __x64_sys_mremap
+26     common  msync                   __x64_sys_msync
+27     common  mincore                 __x64_sys_mincore
+28     common  madvise                 __x64_sys_madvise
+29     common  shmget                  __x64_sys_shmget
+30     common  shmat                   __x64_sys_shmat
+31     common  shmctl                  __x64_sys_shmctl
+32     common  dup                     __x64_sys_dup
+33     common  dup2                    __x64_sys_dup2
+34     common  pause                   __x64_sys_pause
+35     common  nanosleep               __x64_sys_nanosleep
+36     common  getitimer               __x64_sys_getitimer
+37     common  alarm                   __x64_sys_alarm
+38     common  setitimer               __x64_sys_setitimer
+39     common  getpid                  __x64_sys_getpid
+40     common  sendfile                __x64_sys_sendfile64
+41     common  socket                  __x64_sys_socket
+42     common  connect                 __x64_sys_connect
+43     common  accept                  __x64_sys_accept
+44     common  sendto                  __x64_sys_sendto
+45     64      recvfrom                __x64_sys_recvfrom
+46     64      sendmsg                 __x64_sys_sendmsg
+47     64      recvmsg                 __x64_sys_recvmsg
+48     common  shutdown                __x64_sys_shutdown
+49     common  bind                    __x64_sys_bind
+50     common  listen                  __x64_sys_listen
+51     common  getsockname             __x64_sys_getsockname
+52     common  getpeername             __x64_sys_getpeername
+53     common  socketpair              __x64_sys_socketpair
+54     64      setsockopt              __x64_sys_setsockopt
+55     64      getsockopt              __x64_sys_getsockopt
+56     common  clone                   __x64_sys_clone/ptregs
+57     common  fork                    __x64_sys_fork/ptregs
+58     common  vfork                   __x64_sys_vfork/ptregs
+59     64      execve                  __x64_sys_execve/ptregs
+60     common  exit                    __x64_sys_exit
+61     common  wait4                   __x64_sys_wait4
+62     common  kill                    __x64_sys_kill
+63     common  uname                   __x64_sys_newuname
+64     common  semget                  __x64_sys_semget
+65     common  semop                   __x64_sys_semop
+66     common  semctl                  __x64_sys_semctl
+67     common  shmdt                   __x64_sys_shmdt
+68     common  msgget                  __x64_sys_msgget
+69     common  msgsnd                  __x64_sys_msgsnd
+70     common  msgrcv                  __x64_sys_msgrcv
+71     common  msgctl                  __x64_sys_msgctl
+72     common  fcntl                   __x64_sys_fcntl
+73     common  flock                   __x64_sys_flock
+74     common  fsync                   __x64_sys_fsync
+75     common  fdatasync               __x64_sys_fdatasync
+76     common  truncate                __x64_sys_truncate
+77     common  ftruncate               __x64_sys_ftruncate
+78     common  getdents                __x64_sys_getdents
+79     common  getcwd                  __x64_sys_getcwd
+80     common  chdir                   __x64_sys_chdir
+81     common  fchdir                  __x64_sys_fchdir
+82     common  rename                  __x64_sys_rename
+83     common  mkdir                   __x64_sys_mkdir
+84     common  rmdir                   __x64_sys_rmdir
+85     common  creat                   __x64_sys_creat
+86     common  link                    __x64_sys_link
+87     common  unlink                  __x64_sys_unlink
+88     common  symlink                 __x64_sys_symlink
+89     common  readlink                __x64_sys_readlink
+90     common  chmod                   __x64_sys_chmod
+91     common  fchmod                  __x64_sys_fchmod
+92     common  chown                   __x64_sys_chown
+93     common  fchown                  __x64_sys_fchown
+94     common  lchown                  __x64_sys_lchown
+95     common  umask                   __x64_sys_umask
+96     common  gettimeofday            __x64_sys_gettimeofday
+97     common  getrlimit               __x64_sys_getrlimit
+98     common  getrusage               __x64_sys_getrusage
+99     common  sysinfo                 __x64_sys_sysinfo
+100    common  times                   __x64_sys_times
+101    64      ptrace                  __x64_sys_ptrace
+102    common  getuid                  __x64_sys_getuid
+103    common  syslog                  __x64_sys_syslog
+104    common  getgid                  __x64_sys_getgid
+105    common  setuid                  __x64_sys_setuid
+106    common  setgid                  __x64_sys_setgid
+107    common  geteuid                 __x64_sys_geteuid
+108    common  getegid                 __x64_sys_getegid
+109    common  setpgid                 __x64_sys_setpgid
+110    common  getppid                 __x64_sys_getppid
+111    common  getpgrp                 __x64_sys_getpgrp
+112    common  setsid                  __x64_sys_setsid
+113    common  setreuid                __x64_sys_setreuid
+114    common  setregid                __x64_sys_setregid
+115    common  getgroups               __x64_sys_getgroups
+116    common  setgroups               __x64_sys_setgroups
+117    common  setresuid               __x64_sys_setresuid
+118    common  getresuid               __x64_sys_getresuid
+119    common  setresgid               __x64_sys_setresgid
+120    common  getresgid               __x64_sys_getresgid
+121    common  getpgid                 __x64_sys_getpgid
+122    common  setfsuid                __x64_sys_setfsuid
+123    common  setfsgid                __x64_sys_setfsgid
+124    common  getsid                  __x64_sys_getsid
+125    common  capget                  __x64_sys_capget
+126    common  capset                  __x64_sys_capset
+127    64      rt_sigpending           __x64_sys_rt_sigpending
+128    64      rt_sigtimedwait         __x64_sys_rt_sigtimedwait
+129    64      rt_sigqueueinfo         __x64_sys_rt_sigqueueinfo
+130    common  rt_sigsuspend           __x64_sys_rt_sigsuspend
+131    64      sigaltstack             __x64_sys_sigaltstack
+132    common  utime                   __x64_sys_utime
+133    common  mknod                   __x64_sys_mknod
 134    64      uselib
-135    common  personality             sys_personality
-136    common  ustat                   sys_ustat
-137    common  statfs                  sys_statfs
-138    common  fstatfs                 sys_fstatfs
-139    common  sysfs                   sys_sysfs
-140    common  getpriority             sys_getpriority
-141    common  setpriority             sys_setpriority
-142    common  sched_setparam          sys_sched_setparam
-143    common  sched_getparam          sys_sched_getparam
-144    common  sched_setscheduler      sys_sched_setscheduler
-145    common  sched_getscheduler      sys_sched_getscheduler
-146    common  sched_get_priority_max  sys_sched_get_priority_max
-147    common  sched_get_priority_min  sys_sched_get_priority_min
-148    common  sched_rr_get_interval   sys_sched_rr_get_interval
-149    common  mlock                   sys_mlock
-150    common  munlock                 sys_munlock
-151    common  mlockall                sys_mlockall
-152    common  munlockall              sys_munlockall
-153    common  vhangup                 sys_vhangup
-154    common  modify_ldt              sys_modify_ldt
-155    common  pivot_root              sys_pivot_root
-156    64      _sysctl                 sys_sysctl
-157    common  prctl                   sys_prctl
-158    common  arch_prctl              sys_arch_prctl
-159    common  adjtimex                sys_adjtimex
-160    common  setrlimit               sys_setrlimit
-161    common  chroot                  sys_chroot
-162    common  sync                    sys_sync
-163    common  acct                    sys_acct
-164    common  settimeofday            sys_settimeofday
-165    common  mount                   sys_mount
-166    common  umount2                 sys_umount
-167    common  swapon                  sys_swapon
-168    common  swapoff                 sys_swapoff
-169    common  reboot                  sys_reboot
-170    common  sethostname             sys_sethostname
-171    common  setdomainname           sys_setdomainname
-172    common  iopl                    sys_iopl/ptregs
-173    common  ioperm                  sys_ioperm
+135    common  personality             __x64_sys_personality
+136    common  ustat                   __x64_sys_ustat
+137    common  statfs                  __x64_sys_statfs
+138    common  fstatfs                 __x64_sys_fstatfs
+139    common  sysfs                   __x64_sys_sysfs
+140    common  getpriority             __x64_sys_getpriority
+141    common  setpriority             __x64_sys_setpriority
+142    common  sched_setparam          __x64_sys_sched_setparam
+143    common  sched_getparam          __x64_sys_sched_getparam
+144    common  sched_setscheduler      __x64_sys_sched_setscheduler
+145    common  sched_getscheduler      __x64_sys_sched_getscheduler
+146    common  sched_get_priority_max  __x64_sys_sched_get_priority_max
+147    common  sched_get_priority_min  __x64_sys_sched_get_priority_min
+148    common  sched_rr_get_interval   __x64_sys_sched_rr_get_interval
+149    common  mlock                   __x64_sys_mlock
+150    common  munlock                 __x64_sys_munlock
+151    common  mlockall                __x64_sys_mlockall
+152    common  munlockall              __x64_sys_munlockall
+153    common  vhangup                 __x64_sys_vhangup
+154    common  modify_ldt              __x64_sys_modify_ldt
+155    common  pivot_root              __x64_sys_pivot_root
+156    64      _sysctl                 __x64_sys_sysctl
+157    common  prctl                   __x64_sys_prctl
+158    common  arch_prctl              __x64_sys_arch_prctl
+159    common  adjtimex                __x64_sys_adjtimex
+160    common  setrlimit               __x64_sys_setrlimit
+161    common  chroot                  __x64_sys_chroot
+162    common  sync                    __x64_sys_sync
+163    common  acct                    __x64_sys_acct
+164    common  settimeofday            __x64_sys_settimeofday
+165    common  mount                   __x64_sys_mount
+166    common  umount2                 __x64_sys_umount
+167    common  swapon                  __x64_sys_swapon
+168    common  swapoff                 __x64_sys_swapoff
+169    common  reboot                  __x64_sys_reboot
+170    common  sethostname             __x64_sys_sethostname
+171    common  setdomainname           __x64_sys_setdomainname
+172    common  iopl                    __x64_sys_iopl/ptregs
+173    common  ioperm                  __x64_sys_ioperm
 174    64      create_module
-175    common  init_module             sys_init_module
-176    common  delete_module           sys_delete_module
+175    common  init_module             __x64_sys_init_module
+176    common  delete_module           __x64_sys_delete_module
 177    64      get_kernel_syms
 178    64      query_module
-179    common  quotactl                sys_quotactl
+179    common  quotactl                __x64_sys_quotactl
 180    64      nfsservctl
 181    common  getpmsg
 182    common  putpmsg
 183    common  afs_syscall
 184    common  tuxcall
 185    common  security
-186    common  gettid                  sys_gettid
-187    common  readahead               sys_readahead
-188    common  setxattr                sys_setxattr
-189    common  lsetxattr               sys_lsetxattr
-190    common  fsetxattr               sys_fsetxattr
-191    common  getxattr                sys_getxattr
-192    common  lgetxattr               sys_lgetxattr
-193    common  fgetxattr               sys_fgetxattr
-194    common  listxattr               sys_listxattr
-195    common  llistxattr              sys_llistxattr
-196    common  flistxattr              sys_flistxattr
-197    common  removexattr             sys_removexattr
-198    common  lremovexattr            sys_lremovexattr
-199    common  fremovexattr            sys_fremovexattr
-200    common  tkill                   sys_tkill
-201    common  time                    sys_time
-202    common  futex                   sys_futex
-203    common  sched_setaffinity       sys_sched_setaffinity
-204    common  sched_getaffinity       sys_sched_getaffinity
+186    common  gettid                  __x64_sys_gettid
+187    common  readahead               __x64_sys_readahead
+188    common  setxattr                __x64_sys_setxattr
+189    common  lsetxattr               __x64_sys_lsetxattr
+190    common  fsetxattr               __x64_sys_fsetxattr
+191    common  getxattr                __x64_sys_getxattr
+192    common  lgetxattr               __x64_sys_lgetxattr
+193    common  fgetxattr               __x64_sys_fgetxattr
+194    common  listxattr               __x64_sys_listxattr
+195    common  llistxattr              __x64_sys_llistxattr
+196    common  flistxattr              __x64_sys_flistxattr
+197    common  removexattr             __x64_sys_removexattr
+198    common  lremovexattr            __x64_sys_lremovexattr
+199    common  fremovexattr            __x64_sys_fremovexattr
+200    common  tkill                   __x64_sys_tkill
+201    common  time                    __x64_sys_time
+202    common  futex                   __x64_sys_futex
+203    common  sched_setaffinity       __x64_sys_sched_setaffinity
+204    common  sched_getaffinity       __x64_sys_sched_getaffinity
 205    64      set_thread_area
-206    64      io_setup                sys_io_setup
-207    common  io_destroy              sys_io_destroy
-208    common  io_getevents            sys_io_getevents
-209    64      io_submit               sys_io_submit
-210    common  io_cancel               sys_io_cancel
+206    64      io_setup                __x64_sys_io_setup
+207    common  io_destroy              __x64_sys_io_destroy
+208    common  io_getevents            __x64_sys_io_getevents
+209    64      io_submit               __x64_sys_io_submit
+210    common  io_cancel               __x64_sys_io_cancel
 211    64      get_thread_area
-212    common  lookup_dcookie          sys_lookup_dcookie
-213    common  epoll_create            sys_epoll_create
+212    common  lookup_dcookie          __x64_sys_lookup_dcookie
+213    common  epoll_create            __x64_sys_epoll_create
 214    64      epoll_ctl_old
 215    64      epoll_wait_old
-216    common  remap_file_pages        sys_remap_file_pages
-217    common  getdents64              sys_getdents64
-218    common  set_tid_address         sys_set_tid_address
-219    common  restart_syscall         sys_restart_syscall
-220    common  semtimedop              sys_semtimedop
-221    common  fadvise64               sys_fadvise64
-222    64      timer_create            sys_timer_create
-223    common  timer_settime           sys_timer_settime
-224    common  timer_gettime           sys_timer_gettime
-225    common  timer_getoverrun        sys_timer_getoverrun
-226    common  timer_delete            sys_timer_delete
-227    common  clock_settime           sys_clock_settime
-228    common  clock_gettime           sys_clock_gettime
-229    common  clock_getres            sys_clock_getres
-230    common  clock_nanosleep         sys_clock_nanosleep
-231    common  exit_group              sys_exit_group
-232    common  epoll_wait              sys_epoll_wait
-233    common  epoll_ctl               sys_epoll_ctl
-234    common  tgkill                  sys_tgkill
-235    common  utimes                  sys_utimes
+216    common  remap_file_pages        __x64_sys_remap_file_pages
+217    common  getdents64              __x64_sys_getdents64
+218    common  set_tid_address         __x64_sys_set_tid_address
+219    common  restart_syscall         __x64_sys_restart_syscall
+220    common  semtimedop              __x64_sys_semtimedop
+221    common  fadvise64               __x64_sys_fadvise64
+222    64      timer_create            __x64_sys_timer_create
+223    common  timer_settime           __x64_sys_timer_settime
+224    common  timer_gettime           __x64_sys_timer_gettime
+225    common  timer_getoverrun        __x64_sys_timer_getoverrun
+226    common  timer_delete            __x64_sys_timer_delete
+227    common  clock_settime           __x64_sys_clock_settime
+228    common  clock_gettime           __x64_sys_clock_gettime
+229    common  clock_getres            __x64_sys_clock_getres
+230    common  clock_nanosleep         __x64_sys_clock_nanosleep
+231    common  exit_group              __x64_sys_exit_group
+232    common  epoll_wait              __x64_sys_epoll_wait
+233    common  epoll_ctl               __x64_sys_epoll_ctl
+234    common  tgkill                  __x64_sys_tgkill
+235    common  utimes                  __x64_sys_utimes
 236    64      vserver
-237    common  mbind                   sys_mbind
-238    common  set_mempolicy           sys_set_mempolicy
-239    common  get_mempolicy           sys_get_mempolicy
-240    common  mq_open                 sys_mq_open
-241    common  mq_unlink               sys_mq_unlink
-242    common  mq_timedsend            sys_mq_timedsend
-243    common  mq_timedreceive         sys_mq_timedreceive
-244    64      mq_notify               sys_mq_notify
-245    common  mq_getsetattr           sys_mq_getsetattr
-246    64      kexec_load              sys_kexec_load
-247    64      waitid                  sys_waitid
-248    common  add_key                 sys_add_key
-249    common  request_key             sys_request_key
-250    common  keyctl                  sys_keyctl
-251    common  ioprio_set              sys_ioprio_set
-252    common  ioprio_get              sys_ioprio_get
-253    common  inotify_init            sys_inotify_init
-254    common  inotify_add_watch       sys_inotify_add_watch
-255    common  inotify_rm_watch        sys_inotify_rm_watch
-256    common  migrate_pages           sys_migrate_pages
-257    common  openat                  sys_openat
-258    common  mkdirat                 sys_mkdirat
-259    common  mknodat                 sys_mknodat
-260    common  fchownat                sys_fchownat
-261    common  futimesat               sys_futimesat
-262    common  newfstatat              sys_newfstatat
-263    common  unlinkat                sys_unlinkat
-264    common  renameat                sys_renameat
-265    common  linkat                  sys_linkat
-266    common  symlinkat               sys_symlinkat
-267    common  readlinkat              sys_readlinkat
-268    common  fchmodat                sys_fchmodat
-269    common  faccessat               sys_faccessat
-270    common  pselect6                sys_pselect6
-271    common  ppoll                   sys_ppoll
-272    common  unshare                 sys_unshare
-273    64      set_robust_list         sys_set_robust_list
-274    64      get_robust_list         sys_get_robust_list
-275    common  splice                  sys_splice
-276    common  tee                     sys_tee
-277    common  sync_file_range         sys_sync_file_range
-278    64      vmsplice                sys_vmsplice
-279    64      move_pages              sys_move_pages
-280    common  utimensat               sys_utimensat
-281    common  epoll_pwait             sys_epoll_pwait
-282    common  signalfd                sys_signalfd
-283    common  timerfd_create          sys_timerfd_create
-284    common  eventfd                 sys_eventfd
-285    common  fallocate               sys_fallocate
-286    common  timerfd_settime         sys_timerfd_settime
-287    common  timerfd_gettime         sys_timerfd_gettime
-288    common  accept4                 sys_accept4
-289    common  signalfd4               sys_signalfd4
-290    common  eventfd2                sys_eventfd2
-291    common  epoll_create1           sys_epoll_create1
-292    common  dup3                    sys_dup3
-293    common  pipe2                   sys_pipe2
-294    common  inotify_init1           sys_inotify_init1
-295    64      preadv                  sys_preadv
-296    64      pwritev                 sys_pwritev
-297    64      rt_tgsigqueueinfo       sys_rt_tgsigqueueinfo
-298    common  perf_event_open         sys_perf_event_open
-299    64      recvmmsg                sys_recvmmsg
-300    common  fanotify_init           sys_fanotify_init
-301    common  fanotify_mark           sys_fanotify_mark
-302    common  prlimit64               sys_prlimit64
-303    common  name_to_handle_at       sys_name_to_handle_at
-304    common  open_by_handle_at       sys_open_by_handle_at
-305    common  clock_adjtime           sys_clock_adjtime
-306    common  syncfs                  sys_syncfs
-307    64      sendmmsg                sys_sendmmsg
-308    common  setns                   sys_setns
-309    common  getcpu                  sys_getcpu
-310    64      process_vm_readv        sys_process_vm_readv
-311    64      process_vm_writev       sys_process_vm_writev
-312    common  kcmp                    sys_kcmp
-313    common  finit_module            sys_finit_module
-314    common  sched_setattr           sys_sched_setattr
-315    common  sched_getattr           sys_sched_getattr
-316    common  renameat2               sys_renameat2
-317    common  seccomp                 sys_seccomp
-318    common  getrandom               sys_getrandom
-319    common  memfd_create            sys_memfd_create
-320    common  kexec_file_load         sys_kexec_file_load
-321    common  bpf                     sys_bpf
-322    64      execveat                sys_execveat/ptregs
-323    common  userfaultfd             sys_userfaultfd
-324    common  membarrier              sys_membarrier
-325    common  mlock2                  sys_mlock2
-326    common  copy_file_range         sys_copy_file_range
-327    64      preadv2                 sys_preadv2
-328    64      pwritev2                sys_pwritev2
-329    common  pkey_mprotect           sys_pkey_mprotect
-330    common  pkey_alloc              sys_pkey_alloc
-331    common  pkey_free               sys_pkey_free
-332    common  statx                   sys_statx
+237    common  mbind                   __x64_sys_mbind
+238    common  set_mempolicy           __x64_sys_set_mempolicy
+239    common  get_mempolicy           __x64_sys_get_mempolicy
+240    common  mq_open                 __x64_sys_mq_open
+241    common  mq_unlink               __x64_sys_mq_unlink
+242    common  mq_timedsend            __x64_sys_mq_timedsend
+243    common  mq_timedreceive         __x64_sys_mq_timedreceive
+244    64      mq_notify               __x64_sys_mq_notify
+245    common  mq_getsetattr           __x64_sys_mq_getsetattr
+246    64      kexec_load              __x64_sys_kexec_load
+247    64      waitid                  __x64_sys_waitid
+248    common  add_key                 __x64_sys_add_key
+249    common  request_key             __x64_sys_request_key
+250    common  keyctl                  __x64_sys_keyctl
+251    common  ioprio_set              __x64_sys_ioprio_set
+252    common  ioprio_get              __x64_sys_ioprio_get
+253    common  inotify_init            __x64_sys_inotify_init
+254    common  inotify_add_watch       __x64_sys_inotify_add_watch
+255    common  inotify_rm_watch        __x64_sys_inotify_rm_watch
+256    common  migrate_pages           __x64_sys_migrate_pages
+257    common  openat                  __x64_sys_openat
+258    common  mkdirat                 __x64_sys_mkdirat
+259    common  mknodat                 __x64_sys_mknodat
+260    common  fchownat                __x64_sys_fchownat
+261    common  futimesat               __x64_sys_futimesat
+262    common  newfstatat              __x64_sys_newfstatat
+263    common  unlinkat                __x64_sys_unlinkat
+264    common  renameat                __x64_sys_renameat
+265    common  linkat                  __x64_sys_linkat
+266    common  symlinkat               __x64_sys_symlinkat
+267    common  readlinkat              __x64_sys_readlinkat
+268    common  fchmodat                __x64_sys_fchmodat
+269    common  faccessat               __x64_sys_faccessat
+270    common  pselect6                __x64_sys_pselect6
+271    common  ppoll                   __x64_sys_ppoll
+272    common  unshare                 __x64_sys_unshare
+273    64      set_robust_list         __x64_sys_set_robust_list
+274    64      get_robust_list         __x64_sys_get_robust_list
+275    common  splice                  __x64_sys_splice
+276    common  tee                     __x64_sys_tee
+277    common  sync_file_range         __x64_sys_sync_file_range
+278    64      vmsplice                __x64_sys_vmsplice
+279    64      move_pages              __x64_sys_move_pages
+280    common  utimensat               __x64_sys_utimensat
+281    common  epoll_pwait             __x64_sys_epoll_pwait
+282    common  signalfd                __x64_sys_signalfd
+283    common  timerfd_create          __x64_sys_timerfd_create
+284    common  eventfd                 __x64_sys_eventfd
+285    common  fallocate               __x64_sys_fallocate
+286    common  timerfd_settime         __x64_sys_timerfd_settime
+287    common  timerfd_gettime         __x64_sys_timerfd_gettime
+288    common  accept4                 __x64_sys_accept4
+289    common  signalfd4               __x64_sys_signalfd4
+290    common  eventfd2                __x64_sys_eventfd2
+291    common  epoll_create1           __x64_sys_epoll_create1
+292    common  dup3                    __x64_sys_dup3
+293    common  pipe2                   __x64_sys_pipe2
+294    common  inotify_init1           __x64_sys_inotify_init1
+295    64      preadv                  __x64_sys_preadv
+296    64      pwritev                 __x64_sys_pwritev
+297    64      rt_tgsigqueueinfo       __x64_sys_rt_tgsigqueueinfo
+298    common  perf_event_open         __x64_sys_perf_event_open
+299    64      recvmmsg                __x64_sys_recvmmsg
+300    common  fanotify_init           __x64_sys_fanotify_init
+301    common  fanotify_mark           __x64_sys_fanotify_mark
+302    common  prlimit64               __x64_sys_prlimit64
+303    common  name_to_handle_at       __x64_sys_name_to_handle_at
+304    common  open_by_handle_at       __x64_sys_open_by_handle_at
+305    common  clock_adjtime           __x64_sys_clock_adjtime
+306    common  syncfs                  __x64_sys_syncfs
+307    64      sendmmsg                __x64_sys_sendmmsg
+308    common  setns                   __x64_sys_setns
+309    common  getcpu                  __x64_sys_getcpu
+310    64      process_vm_readv        __x64_sys_process_vm_readv
+311    64      process_vm_writev       __x64_sys_process_vm_writev
+312    common  kcmp                    __x64_sys_kcmp
+313    common  finit_module            __x64_sys_finit_module
+314    common  sched_setattr           __x64_sys_sched_setattr
+315    common  sched_getattr           __x64_sys_sched_getattr
+316    common  renameat2               __x64_sys_renameat2
+317    common  seccomp                 __x64_sys_seccomp
+318    common  getrandom               __x64_sys_getrandom
+319    common  memfd_create            __x64_sys_memfd_create
+320    common  kexec_file_load         __x64_sys_kexec_file_load
+321    common  bpf                     __x64_sys_bpf
+322    64      execveat                __x64_sys_execveat/ptregs
+323    common  userfaultfd             __x64_sys_userfaultfd
+324    common  membarrier              __x64_sys_membarrier
+325    common  mlock2                  __x64_sys_mlock2
+326    common  copy_file_range         __x64_sys_copy_file_range
+327    64      preadv2                 __x64_sys_preadv2
+328    64      pwritev2                __x64_sys_pwritev2
+329    common  pkey_mprotect           __x64_sys_pkey_mprotect
+330    common  pkey_alloc              __x64_sys_pkey_alloc
+331    common  pkey_free               __x64_sys_pkey_free
+332    common  statx                   __x64_sys_statx
 
 #
 # x32-specific system call numbers start at 512 to avoid cache impact
-# for native 64-bit operation.
+# for native 64-bit operation. The __x32_compat_sys stubs are created
+# on-the-fly for compat_sys_*() compatibility system calls if X86_X32
+# is defined.
 #
-512    x32     rt_sigaction            compat_sys_rt_sigaction
+512    x32     rt_sigaction            __x32_compat_sys_rt_sigaction
 513    x32     rt_sigreturn            sys32_x32_rt_sigreturn
-514    x32     ioctl                   compat_sys_ioctl
-515    x32     readv                   compat_sys_readv
-516    x32     writev                  compat_sys_writev
-517    x32     recvfrom                compat_sys_recvfrom
-518    x32     sendmsg                 compat_sys_sendmsg
-519    x32     recvmsg                 compat_sys_recvmsg
-520    x32     execve                  compat_sys_execve/ptregs
-521    x32     ptrace                  compat_sys_ptrace
-522    x32     rt_sigpending           compat_sys_rt_sigpending
-523    x32     rt_sigtimedwait         compat_sys_rt_sigtimedwait
-524    x32     rt_sigqueueinfo         compat_sys_rt_sigqueueinfo
-525    x32     sigaltstack             compat_sys_sigaltstack
-526    x32     timer_create            compat_sys_timer_create
-527    x32     mq_notify               compat_sys_mq_notify
-528    x32     kexec_load              compat_sys_kexec_load
-529    x32     waitid                  compat_sys_waitid
-530    x32     set_robust_list         compat_sys_set_robust_list
-531    x32     get_robust_list         compat_sys_get_robust_list
-532    x32     vmsplice                compat_sys_vmsplice
-533    x32     move_pages              compat_sys_move_pages
-534    x32     preadv                  compat_sys_preadv64
-535    x32     pwritev                 compat_sys_pwritev64
-536    x32     rt_tgsigqueueinfo       compat_sys_rt_tgsigqueueinfo
-537    x32     recvmmsg                compat_sys_recvmmsg
-538    x32     sendmmsg                compat_sys_sendmmsg
-539    x32     process_vm_readv        compat_sys_process_vm_readv
-540    x32     process_vm_writev       compat_sys_process_vm_writev
-541    x32     setsockopt              compat_sys_setsockopt
-542    x32     getsockopt              compat_sys_getsockopt
-543    x32     io_setup                compat_sys_io_setup
-544    x32     io_submit               compat_sys_io_submit
-545    x32     execveat                compat_sys_execveat/ptregs
-546    x32     preadv2                 compat_sys_preadv64v2
-547    x32     pwritev2                compat_sys_pwritev64v2
+514    x32     ioctl                   __x32_compat_sys_ioctl
+515    x32     readv                   __x32_compat_sys_readv
+516    x32     writev                  __x32_compat_sys_writev
+517    x32     recvfrom                __x32_compat_sys_recvfrom
+518    x32     sendmsg                 __x32_compat_sys_sendmsg
+519    x32     recvmsg                 __x32_compat_sys_recvmsg
+520    x32     execve                  __x32_compat_sys_execve/ptregs
+521    x32     ptrace                  __x32_compat_sys_ptrace
+522    x32     rt_sigpending           __x32_compat_sys_rt_sigpending
+523    x32     rt_sigtimedwait         __x32_compat_sys_rt_sigtimedwait
+524    x32     rt_sigqueueinfo         __x32_compat_sys_rt_sigqueueinfo
+525    x32     sigaltstack             __x32_compat_sys_sigaltstack
+526    x32     timer_create            __x32_compat_sys_timer_create
+527    x32     mq_notify               __x32_compat_sys_mq_notify
+528    x32     kexec_load              __x32_compat_sys_kexec_load
+529    x32     waitid                  __x32_compat_sys_waitid
+530    x32     set_robust_list         __x32_compat_sys_set_robust_list
+531    x32     get_robust_list         __x32_compat_sys_get_robust_list
+532    x32     vmsplice                __x32_compat_sys_vmsplice
+533    x32     move_pages              __x32_compat_sys_move_pages
+534    x32     preadv                  __x32_compat_sys_preadv64
+535    x32     pwritev                 __x32_compat_sys_pwritev64
+536    x32     rt_tgsigqueueinfo       __x32_compat_sys_rt_tgsigqueueinfo
+537    x32     recvmmsg                __x32_compat_sys_recvmmsg
+538    x32     sendmmsg                __x32_compat_sys_sendmmsg
+539    x32     process_vm_readv        __x32_compat_sys_process_vm_readv
+540    x32     process_vm_writev       __x32_compat_sys_process_vm_writev
+541    x32     setsockopt              __x32_compat_sys_setsockopt
+542    x32     getsockopt              __x32_compat_sys_getsockopt
+543    x32     io_setup                __x32_compat_sys_io_setup
+544    x32     io_submit               __x32_compat_sys_io_submit
+545    x32     execveat                __x32_compat_sys_execveat/ptregs
+546    x32     preadv2                 __x32_compat_sys_preadv64v2
+547    x32     pwritev2                __x32_compat_sys_pwritev64v2
index 944070e98a2cd9dc4903e3be02278b1a7f8f3500..63eb49082774c94dfbabe7a18db73bdc2403fb6a 100644 (file)
@@ -175,7 +175,7 @@ static const struct option options[] = {
        OPT_UINTEGER('s', "nr_secs"     , &p0.nr_secs,          "max number of seconds to run (default: 5 secs)"),
        OPT_UINTEGER('u', "usleep"      , &p0.sleep_usecs,      "usecs to sleep per loop iteration"),
 
-       OPT_BOOLEAN('R', "data_reads"   , &p0.data_reads,       "access the data via writes (can be mixed with -W)"),
+       OPT_BOOLEAN('R', "data_reads"   , &p0.data_reads,       "access the data via reads (can be mixed with -W)"),
        OPT_BOOLEAN('W', "data_writes"  , &p0.data_writes,      "access the data via writes (can be mixed with -R)"),
        OPT_BOOLEAN('B', "data_backwards", &p0.data_backwards,  "access the data backwards as well"),
        OPT_BOOLEAN('Z', "data_zero_memset", &p0.data_zero_memset,"access the data via glibc bzero only"),
index 4aca13f23b9d2f4005846d89c68833705576d8d1..1c41b4eaf73cd407ebe0e86e673d7c82dfb9d501 100644 (file)
@@ -439,7 +439,7 @@ int cmd_help(int argc, const char **argv)
 #ifdef HAVE_LIBELF_SUPPORT
                "probe",
 #endif
-#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE)
+#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE_SUPPORT)
                "trace",
 #endif
        NULL };
index 506564651cda983c4dd996f0acba0f8c1e66f9a1..57393e94d1561f8e9a7611f787f30147f25ca6c8 100644 (file)
@@ -83,7 +83,7 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem)
        };
 
        argc = parse_options(argc, argv, options, record_mem_usage,
-                            PARSE_OPT_STOP_AT_NON_OPTION);
+                            PARSE_OPT_KEEP_UNKNOWN);
 
        rec_argc = argc + 9; /* max number of arguments */
        rec_argv = calloc(rec_argc + 1, sizeof(char *));
@@ -436,7 +436,7 @@ int cmd_mem(int argc, const char **argv)
        }
 
        argc = parse_options_subcommand(argc, argv, mem_options, mem_subcommands,
-                                       mem_usage, PARSE_OPT_STOP_AT_NON_OPTION);
+                                       mem_usage, PARSE_OPT_KEEP_UNKNOWN);
 
        if (!argc || !(strncmp(argv[0], "rec", 3) || mem.operation))
                usage_with_options(mem_usage, mem_options);
index 313c424233938cc118f59e6e55950a5e0de22faa..e0a9845b6cbc57db5e13426d536a18b771202717 100644 (file)
@@ -657,8 +657,11 @@ static int perf_sample__fprintf_start(struct perf_sample *sample,
                        break;
                case PERF_RECORD_SWITCH:
                case PERF_RECORD_SWITCH_CPU_WIDE:
-                       if (has(SWITCH_OUT))
+                       if (has(SWITCH_OUT)) {
                                ret += fprintf(fp, "S");
+                               if (sample->misc & PERF_RECORD_MISC_SWITCH_OUT_PREEMPT)
+                                       ret += fprintf(fp, "p");
+                       }
                default:
                        break;
                }
@@ -2801,11 +2804,11 @@ int find_scripts(char **scripts_array, char **scripts_path_array)
        for_each_lang(scripts_path, scripts_dir, lang_dirent) {
                scnprintf(lang_path, MAXPATHLEN, "%s/%s", scripts_path,
                          lang_dirent->d_name);
-#ifdef NO_LIBPERL
+#ifndef HAVE_LIBPERL_SUPPORT
                if (strstr(lang_path, "perl"))
                        continue;
 #endif
-#ifdef NO_LIBPYTHON
+#ifndef HAVE_LIBPYTHON_SUPPORT
                if (strstr(lang_path, "python"))
                        continue;
 #endif
index f5c45485590886442c85b349ee448c9189fe5889..f17dc601b0f39aaff4f5c92a27b93b0075df5741 100644 (file)
@@ -172,6 +172,7 @@ static bool                 interval_count;
 static const char              *output_name;
 static int                     output_fd;
 static int                     print_free_counters_hint;
+static int                     print_mixed_hw_group_error;
 
 struct perf_stat {
        bool                     record;
@@ -1126,6 +1127,30 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
                fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
 }
 
+static bool is_mixed_hw_group(struct perf_evsel *counter)
+{
+       struct perf_evlist *evlist = counter->evlist;
+       u32 pmu_type = counter->attr.type;
+       struct perf_evsel *pos;
+
+       if (counter->nr_members < 2)
+               return false;
+
+       evlist__for_each_entry(evlist, pos) {
+               /* software events can be part of any hardware group */
+               if (pos->attr.type == PERF_TYPE_SOFTWARE)
+                       continue;
+               if (pmu_type == PERF_TYPE_SOFTWARE) {
+                       pmu_type = pos->attr.type;
+                       continue;
+               }
+               if (pmu_type != pos->attr.type)
+                       return true;
+       }
+
+       return false;
+}
+
 static void printout(int id, int nr, struct perf_evsel *counter, double uval,
                     char *prefix, u64 run, u64 ena, double noise,
                     struct runtime_stat *st)
@@ -1178,8 +1203,11 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval,
                        counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
                        csv_sep);
 
-               if (counter->supported)
+               if (counter->supported) {
                        print_free_counters_hint = 1;
+                       if (is_mixed_hw_group(counter))
+                               print_mixed_hw_group_error = 1;
+               }
 
                fprintf(stat_config.output, "%-*s%s",
                        csv_output ? 0 : unit_width,
@@ -1256,7 +1284,8 @@ static void uniquify_event_name(struct perf_evsel *counter)
        char *new_name;
        char *config;
 
-       if (!counter->pmu_name || !strncmp(counter->name, counter->pmu_name,
+       if (counter->uniquified_name ||
+           !counter->pmu_name || !strncmp(counter->name, counter->pmu_name,
                                           strlen(counter->pmu_name)))
                return;
 
@@ -1274,6 +1303,8 @@ static void uniquify_event_name(struct perf_evsel *counter)
                        counter->name = new_name;
                }
        }
+
+       counter->uniquified_name = true;
 }
 
 static void collect_all_aliases(struct perf_evsel *counter,
@@ -1757,6 +1788,11 @@ static void print_footer(void)
 "      echo 0 > /proc/sys/kernel/nmi_watchdog\n"
 "      perf stat ...\n"
 "      echo 1 > /proc/sys/kernel/nmi_watchdog\n");
+
+       if (print_mixed_hw_group_error)
+               fprintf(output,
+                       "The events in group usually have to be from "
+                       "the same PMU. Try reorganizing the group.\n");
 }
 
 static void print_counters(struct timespec *ts, int argc, const char **argv)
@@ -1943,7 +1979,8 @@ static const struct option stat_options[] = {
        OPT_STRING(0, "post", &post_cmd, "command",
                        "command to run after to the measured command"),
        OPT_UINTEGER('I', "interval-print", &stat_config.interval,
-                   "print counts at regular interval in ms (>= 10)"),
+                   "print counts at regular interval in ms "
+                   "(overhead is possible for values <= 100ms)"),
        OPT_INTEGER(0, "interval-count", &stat_config.times,
                    "print counts for fixed number of times"),
        OPT_UINTEGER(0, "timeout", &stat_config.timeout,
@@ -2923,17 +2960,6 @@ int cmd_stat(int argc, const char **argv)
                }
        }
 
-       if (interval && interval < 100) {
-               if (interval < 10) {
-                       pr_err("print interval must be >= 10ms\n");
-                       parse_options_usage(stat_usage, stat_options, "I", 1);
-                       goto out;
-               } else
-                       pr_warning("print interval < 100ms. "
-                                  "The overhead percentage could be high in some cases. "
-                                  "Please proceed with caution.\n");
-       }
-
        if (stat_config.times && interval)
                interval_count = true;
        else if (stat_config.times && !interval) {
index 2abe3910d6b6413e99246a49a1cfd41a5cc30703..50df168be326d84cba4e5cfbc26ea8a119d392cf 100644 (file)
@@ -60,7 +60,10 @@ static void library_status(void)
        STATUS(HAVE_DWARF_GETLOCATIONS_SUPPORT, dwarf_getlocations);
        STATUS(HAVE_GLIBC_SUPPORT, glibc);
        STATUS(HAVE_GTK2_SUPPORT, gtk2);
+#ifndef HAVE_SYSCALL_TABLE_SUPPORT
        STATUS(HAVE_LIBAUDIT_SUPPORT, libaudit);
+#endif
+       STATUS(HAVE_SYSCALL_TABLE_SUPPORT, syscall_table);
        STATUS(HAVE_LIBBFD_SUPPORT, libbfd);
        STATUS(HAVE_LIBELF_SUPPORT, libelf);
        STATUS(HAVE_LIBNUMA_SUPPORT, libnuma);
index 1659029d03fccc03db57b0e94ec342571675b8e3..20a08cb323329be20348fbf513091d40780ac12a 100644 (file)
@@ -73,7 +73,7 @@ static struct cmd_struct commands[] = {
        { "lock",       cmd_lock,       0 },
        { "kvm",        cmd_kvm,        0 },
        { "test",       cmd_test,       0 },
-#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE)
+#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE_SUPPORT)
        { "trace",      cmd_trace,      0 },
 #endif
        { "inject",     cmd_inject,     0 },
@@ -491,7 +491,7 @@ int main(int argc, const char **argv)
                argv[0] = cmd;
        }
        if (strstarts(cmd, "trace")) {
-#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE)
+#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE_SUPPORT)
                setup_path();
                argv[0] = "trace";
                return cmd_trace(argc, argv);
index ca7682748a4b8d34d0dd2b87eaea1e980264291d..78bcf7f8e2066e470e5868c976aaa2a1b305d207 100644 (file)
@@ -1,6 +1,6 @@
 Family-model,Version,Filename,EventType
-209[78],1,cf_z10,core
-281[78],1,cf_z196,core
-282[78],1,cf_zec12,core
-296[45],1,cf_z13,core
-3906,3,cf_z14,core
+^IBM.209[78].*[13]\.[1-5].[[:xdigit:]]+$,1,cf_z10,core
+^IBM.281[78].*[13]\.[1-5].[[:xdigit:]]+$,1,cf_z196,core
+^IBM.282[78].*[13]\.[1-5].[[:xdigit:]]+$,1,cf_zec12,core
+^IBM.296[45].*[13]\.[1-5].[[:xdigit:]]+$,1,cf_z13,core
+^IBM.390[67].*[13]\.[1-5].[[:xdigit:]]+$,3,cf_z14,core
index 93656f2fd53a025a23ad88183bfc6da9e96023b1..7e3cce3bcf3b8c69bb0553c4caf700b81bb2e1bd 100644 (file)
@@ -29,7 +29,6 @@ GenuineIntel-6-4D,v13,silvermont,core
 GenuineIntel-6-4C,v13,silvermont,core
 GenuineIntel-6-2A,v15,sandybridge,core
 GenuineIntel-6-2C,v2,westmereep-dp,core
-GenuineIntel-6-2C,v2,westmereep-dp,core
 GenuineIntel-6-25,v2,westmereep-sp,core
 GenuineIntel-6-2F,v2,westmereex,core
 GenuineIntel-6-55,v1,skylakex,core
index f906b793196fc7594a96f795617bdbb73ae42639..8a33ca4f9e1f7feed87159d755ba3b4797a987b6 100644 (file)
@@ -35,3 +35,6 @@ inherit=0
 # sampling disabled
 sample_freq=0
 sample_period=0
+freq=0
+write_backward=0
+sample_id_all=0
index e4123c1b0e8839054188c08d2df3c45052c85b49..1ca5106df5f15a58a20500294c038db954304c3a 100644 (file)
@@ -31,7 +31,7 @@ struct bpf_map_def SEC("maps") flip_table = {
        .max_entries = 1,
 };
 
-SEC("func=SyS_epoll_pwait")
+SEC("func=do_epoll_wait")
 int bpf_func__SyS_epoll_pwait(void *ctx)
 {
        int ind =0;
index 3626924740d8092107a196eb944e18651727c9e3..ff3ec8337f0a2eeb8062ccf6064f677fdd4078a3 100644 (file)
@@ -9,7 +9,6 @@
 #define SEC(NAME) __attribute__((section(NAME), used))
 
 #include <uapi/linux/fs.h>
-#include <uapi/asm/ptrace.h>
 
 SEC("func=vfs_llseek")
 int bpf_func__vfs_llseek(void *ctx)
index 625f5a6772af7c8790d5e60265225688dd642a13..cac8f8889bc3b540355963ee793fb0e4aadbfd12 100644 (file)
@@ -118,6 +118,7 @@ static struct test generic_tests[] = {
        {
                .desc = "Breakpoint accounting",
                .func = test__bp_accounting,
+               .is_supported = test__bp_signal_is_supported,
        },
        {
                .desc = "Number of exit events of a simple workload",
index bb8e6bcb0d967a22291df3896fc90a43bd81bc78..0919b0793e5b681d876630ac5dc7834bed9269c6 100644 (file)
@@ -75,7 +75,7 @@ int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unuse
                snprintf(name, sizeof(name), "sys_enter_%s", syscall_names[i]);
                evsels[i] = perf_evsel__newtp("syscalls", name);
                if (IS_ERR(evsels[i])) {
-                       pr_debug("perf_evsel__new\n");
+                       pr_debug("perf_evsel__new(%s)\n", name);
                        goto out_delete_evlist;
                }
 
index 1ecc1f0ff84a19e771a7027419b91df728f693f5..ee86473643be59d117184e645f6233df858a5cea 100755 (executable)
@@ -16,15 +16,13 @@ nm -g $libc 2>/dev/null | fgrep -q inet_pton || exit 254
 trace_libc_inet_pton_backtrace() {
        idx=0
        expected[0]="ping[][0-9 \.:]+probe_libc:inet_pton: \([[:xdigit:]]+\)"
-       expected[1]=".*inet_pton[[:space:]]\($libc\)$"
+       expected[1]=".*inet_pton[[:space:]]\($libc|inlined\)$"
        case "$(uname -m)" in
        s390x)
-               eventattr='call-graph=dwarf'
+               eventattr='call-graph=dwarf,max-stack=4'
                expected[2]="gaih_inet.*[[:space:]]\($libc|inlined\)$"
-               expected[3]="__GI_getaddrinfo[[:space:]]\($libc|inlined\)$"
+               expected[3]="(__GI_)?getaddrinfo[[:space:]]\($libc|inlined\)$"
                expected[4]="main[[:space:]]\(.*/bin/ping.*\)$"
-               expected[5]="__libc_start_main[[:space:]]\($libc\)$"
-               expected[6]="_start[[:space:]]\(.*/bin/ping.*\)$"
                ;;
        *)
                eventattr='max-stack=3'
index 417e3ecfe9d730fc1c02e2c2e24fe36e23fbf83a..9f68077b241b9c59475cb5d8403e0002f06769a3 100644 (file)
@@ -54,6 +54,9 @@ static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
        P_MMAP_FLAG(EXECUTABLE);
        P_MMAP_FLAG(FILE);
        P_MMAP_FLAG(FIXED);
+#ifdef MAP_FIXED_NOREPLACE
+       P_MMAP_FLAG(FIXED_NOREPLACE);
+#endif
        P_MMAP_FLAG(GROWSDOWN);
        P_MMAP_FLAG(HUGETLB);
        P_MMAP_FLAG(LOCKED);
index 12c099a87f8baefa1036f7cebc0a8496a89f3fd7..3781d74088a744042ff6dc2d8588154a01110e62 100644 (file)
@@ -692,6 +692,7 @@ static int annotate_browser__run(struct annotate_browser *browser,
                "J             Toggle showing number of jump sources on targets\n"
                "n             Search next string\n"
                "o             Toggle disassembler output/simplified view\n"
+               "O             Bump offset level (jump targets -> +call -> all -> cycle thru)\n"
                "s             Toggle source code view\n"
                "t             Circulate percent, total period, samples view\n"
                "/             Search string\n"
@@ -719,6 +720,10 @@ static int annotate_browser__run(struct annotate_browser *browser,
                        notes->options->use_offset = !notes->options->use_offset;
                        annotation__update_column_widths(notes);
                        continue;
+               case 'O':
+                       if (++notes->options->offset_level > ANNOTATION__MAX_OFFSET_LEVEL)
+                               notes->options->offset_level = ANNOTATION__MIN_OFFSET_LEVEL;
+                       continue;
                case 'j':
                        notes->options->jump_arrows = !notes->options->jump_arrows;
                        continue;
index 0eec06c105c6f4a721a4d29897e73c50d217bab4..e5f247247daa870fdca3497991a886b993938a17 100644 (file)
@@ -2714,7 +2714,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
        "h/?/F1        Show this window\n"                              \
        "UP/DOWN/PGUP\n"                                                \
        "PGDN/SPACE    Navigate\n"                                      \
-       "q/ESC/CTRL+C  Exit browser\n\n"                                \
+       "q/ESC/CTRL+C  Exit browser or go back to previous screen\n\n"  \
        "For multiple event sessions:\n\n"                              \
        "TAB/UNTAB     Switch events\n\n"                               \
        "For symbolic views (--sort has sym):\n\n"                      \
index fbad8dfbb186f52818c0237d490f71c78ab749fe..5d74a30fe00f1f96873c5123e65631c1f8c51d5a 100644 (file)
@@ -46,6 +46,7 @@
 struct annotation_options annotation__default_options = {
        .use_offset     = true,
        .jump_arrows    = true,
+       .offset_level   = ANNOTATION__OFFSET_JUMP_TARGETS,
 };
 
 const char     *disassembler_style;
@@ -1262,6 +1263,9 @@ annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start
                                max_percent = sample->percent;
                }
 
+               if (al->samples_nr > nr_percent)
+                       nr_percent = al->samples_nr;
+
                if (max_percent < min_pcnt)
                        return -1;
 
@@ -2512,7 +2516,8 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati
                if (!notes->options->use_offset) {
                        printed = scnprintf(bf, sizeof(bf), "%" PRIx64 ": ", addr);
                } else {
-                       if (al->jump_sources) {
+                       if (al->jump_sources &&
+                           notes->options->offset_level >= ANNOTATION__OFFSET_JUMP_TARGETS) {
                                if (notes->options->show_nr_jumps) {
                                        int prev;
                                        printed = scnprintf(bf, sizeof(bf), "%*d ",
@@ -2523,9 +2528,14 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati
                                        obj__printf(obj, bf);
                                        obj__set_color(obj, prev);
                                }
-
+print_addr:
                                printed = scnprintf(bf, sizeof(bf), "%*" PRIx64 ": ",
                                                    notes->widths.target, addr);
+                       } else if (ins__is_call(&disasm_line(al)->ins) &&
+                                  notes->options->offset_level >= ANNOTATION__OFFSET_CALL) {
+                               goto print_addr;
+                       } else if (notes->options->offset_level == ANNOTATION__MAX_OFFSET_LEVEL) {
+                               goto print_addr;
                        } else {
                                printed = scnprintf(bf, sizeof(bf), "%-*s  ",
                                                    notes->widths.addr, " ");
@@ -2642,10 +2652,11 @@ int __annotation__scnprintf_samples_period(struct annotation *notes,
  */
 static struct annotation_config {
        const char *name;
-       bool *value;
+       void *value;
 } annotation__configs[] = {
        ANNOTATION__CFG(hide_src_code),
        ANNOTATION__CFG(jump_arrows),
+       ANNOTATION__CFG(offset_level),
        ANNOTATION__CFG(show_linenr),
        ANNOTATION__CFG(show_nr_jumps),
        ANNOTATION__CFG(show_nr_samples),
@@ -2677,8 +2688,16 @@ static int annotation__config(const char *var, const char *value,
 
        if (cfg == NULL)
                pr_debug("%s variable unknown, ignoring...", var);
-       else
-               *cfg->value = perf_config_bool(name, value);
+       else if (strcmp(var, "annotate.offset_level") == 0) {
+               perf_config_int(cfg->value, name, value);
+
+               if (*(int *)cfg->value > ANNOTATION__MAX_OFFSET_LEVEL)
+                       *(int *)cfg->value = ANNOTATION__MAX_OFFSET_LEVEL;
+               else if (*(int *)cfg->value < ANNOTATION__MIN_OFFSET_LEVEL)
+                       *(int *)cfg->value = ANNOTATION__MIN_OFFSET_LEVEL;
+       } else {
+               *(bool *)cfg->value = perf_config_bool(name, value);
+       }
        return 0;
 }
 
index db8d09bea07eb5e28a6820e192ce976e29734c1b..f28a9e43421d9c364519db7dd9267a23ea89d8e9 100644 (file)
@@ -70,8 +70,17 @@ struct annotation_options {
             show_nr_jumps,
             show_nr_samples,
             show_total_period;
+       u8   offset_level;
 };
 
+enum {
+       ANNOTATION__OFFSET_JUMP_TARGETS = 1,
+       ANNOTATION__OFFSET_CALL,
+       ANNOTATION__MAX_OFFSET_LEVEL,
+};
+
+#define ANNOTATION__MIN_OFFSET_LEVEL ANNOTATION__OFFSET_JUMP_TARGETS
+
 extern struct annotation_options annotation__default_options;
 
 struct annotation;
index 640af88331b4ecd7a4509d1120b5bef5b6990ff3..c8b98fa2299736a2f76decf2d3f5458d41974131 100644 (file)
@@ -1,6 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
- * SPDX-License-Identifier: GPL-2.0
- *
  * Copyright(C) 2015-2018 Linaro Limited.
  *
  * Author: Tor Jeremiassen <tor@ti.com>
index 1b0d422373be5e8dce853cd15a312b22355147ce..bf16dc9ee507d98bbb8a762f4f4221bbe7733548 100644 (file)
@@ -1,6 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
- * SPDX-License-Identifier: GPL-2.0
- *
  * Copyright(C) 2015-2018 Linaro Limited.
  *
  * Author: Tor Jeremiassen <tor@ti.com>
@@ -240,6 +239,7 @@ static void cs_etm__free(struct perf_session *session)
        for (i = 0; i < aux->num_cpu; i++)
                zfree(&aux->metadata[i]);
 
+       thread__zput(aux->unknown_thread);
        zfree(&aux->metadata);
        zfree(&aux);
 }
@@ -613,8 +613,8 @@ cs_etm__get_trace(struct cs_etm_buffer *buff, struct cs_etm_queue *etmq)
        return buff->len;
 }
 
-static void  cs_etm__set_pid_tid_cpu(struct cs_etm_auxtrace *etm,
-                                    struct auxtrace_queue *queue)
+static void cs_etm__set_pid_tid_cpu(struct cs_etm_auxtrace *etm,
+                                   struct auxtrace_queue *queue)
 {
        struct cs_etm_queue *etmq = queue->priv;
 
@@ -1358,6 +1358,23 @@ int cs_etm__process_auxtrace_info(union perf_event *event,
        etm->auxtrace.free = cs_etm__free;
        session->auxtrace = &etm->auxtrace;
 
+       etm->unknown_thread = thread__new(999999999, 999999999);
+       if (!etm->unknown_thread)
+               goto err_free_queues;
+
+       /*
+        * Initialize list node so that at thread__zput() we can avoid
+        * segmentation fault at list_del_init().
+        */
+       INIT_LIST_HEAD(&etm->unknown_thread->node);
+
+       err = thread__set_comm(etm->unknown_thread, "unknown", 0);
+       if (err)
+               goto err_delete_thread;
+
+       if (thread__init_map_groups(etm->unknown_thread, etm->machine))
+               goto err_delete_thread;
+
        if (dump_trace) {
                cs_etm__print_auxtrace_info(auxtrace_info->priv, num_cpu);
                return 0;
@@ -1372,16 +1389,18 @@ int cs_etm__process_auxtrace_info(union perf_event *event,
 
        err = cs_etm__synth_events(etm, session);
        if (err)
-               goto err_free_queues;
+               goto err_delete_thread;
 
        err = auxtrace_queues__process_index(&etm->queues, session);
        if (err)
-               goto err_free_queues;
+               goto err_delete_thread;
 
        etm->data_queued = etm->queues.populated;
 
        return 0;
 
+err_delete_thread:
+       thread__zput(etm->unknown_thread);
 err_free_queues:
        auxtrace_queues__free(&etm->queues);
        session->auxtrace = NULL;
index 5864d5dca616e9df9d3b256e0928033c7d835c3e..37f8d48179caef841128d0d7f7a8c635bb395769 100644 (file)
@@ -1,18 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright(C) 2015 Linaro Limited. All rights reserved.
  * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef INCLUDE__UTIL_PERF_CS_ETM_H__
index f0a6cbd033cc7196afa30bb161bc545e95742bb2..98ff3a6a3d507f9ba41079b19423dbf49aa013b3 100644 (file)
@@ -1421,7 +1421,9 @@ size_t perf_event__fprintf_itrace_start(union perf_event *event, FILE *fp)
 size_t perf_event__fprintf_switch(union perf_event *event, FILE *fp)
 {
        bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
-       const char *in_out = out ? "OUT" : "IN ";
+       const char *in_out = !out ? "IN         " :
+               !(event->header.misc & PERF_RECORD_MISC_SWITCH_OUT_PREEMPT) ?
+                                   "OUT        " : "OUT preempt";
 
        if (event->header.type == PERF_RECORD_SWITCH)
                return fprintf(fp, " %s\n", in_out);
index 1ac8d9236efddaadb0de61a78bc6fbe4cc869a2c..4cd2cf93f7263e97307b69ec65d0e5c752be51fa 100644 (file)
@@ -930,8 +930,11 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts,
         * than leader in case leader 'leads' the sampling.
         */
        if ((leader != evsel) && leader->sample_read) {
-               attr->sample_freq   = 0;
-               attr->sample_period = 0;
+               attr->freq           = 0;
+               attr->sample_freq    = 0;
+               attr->sample_period  = 0;
+               attr->write_backward = 0;
+               attr->sample_id_all  = 0;
        }
 
        if (opts->no_samples)
@@ -1922,7 +1925,8 @@ int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
                goto fallback_missing_features;
        } else if (!perf_missing_features.group_read &&
                    evsel->attr.inherit &&
-                  (evsel->attr.read_format & PERF_FORMAT_GROUP)) {
+                  (evsel->attr.read_format & PERF_FORMAT_GROUP) &&
+                  perf_evsel__is_group_leader(evsel)) {
                perf_missing_features.group_read = true;
                pr_debug2("switching off group read\n");
                goto fallback_missing_features;
@@ -2754,8 +2758,14 @@ bool perf_evsel__fallback(struct perf_evsel *evsel, int err,
                   (paranoid = perf_event_paranoid()) > 1) {
                const char *name = perf_evsel__name(evsel);
                char *new_name;
+               const char *sep = ":";
 
-               if (asprintf(&new_name, "%s%su", name, strchr(name, ':') ? "" : ":") < 0)
+               /* Is there already the separator in the name. */
+               if (strchr(name, '/') ||
+                   strchr(name, ':'))
+                       sep = "";
+
+               if (asprintf(&new_name, "%s%su", name, sep) < 0)
                        return false;
 
                if (evsel->name)
@@ -2870,8 +2880,7 @@ int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target,
 #if defined(__i386__) || defined(__x86_64__)
                if (evsel->attr.type == PERF_TYPE_HARDWARE)
                        return scnprintf(msg, size, "%s",
-       "No hardware sampling interrupt available.\n"
-       "No APIC? If so then you can boot the kernel with the \"lapic\" boot parameter to force-enable it.");
+       "No hardware sampling interrupt available.\n");
 #endif
                break;
        case EBUSY:
@@ -2894,8 +2903,7 @@ int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target,
 
        return scnprintf(msg, size,
        "The sys_perf_event_open() syscall returned with %d (%s) for event (%s).\n"
-       "/bin/dmesg may provide additional information.\n"
-       "No CONFIG_PERF_EVENTS=y kernel support configured?",
+       "/bin/dmesg | grep -i perf may provide additional information.\n",
                         err, str_error_r(err, sbuf, sizeof(sbuf)),
                         perf_evsel__name(evsel));
 }
index d3ee3af618ef5226ec199d0c7c2df80f7726a485..92ec009a292d3efd4cf4738d51533e7e4048e22d 100644 (file)
@@ -115,6 +115,7 @@ struct perf_evsel {
        unsigned int            sample_size;
        int                     id_pos;
        int                     is_pos;
+       bool                    uniquified_name;
        bool                    snapshot;
        bool                    supported;
        bool                    needs_swap;
index ff17920a5ebc62a04fcaeca722da2e1951b82afa..c3cef36d417683581ed7405ac842854057f8ff7b 100755 (executable)
@@ -38,7 +38,7 @@ do
 done
 echo "#endif /* HAVE_LIBELF_SUPPORT */"
 
-echo "#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE)"
+echo "#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE_SUPPORT)"
 sed -n -e 's/^perf-\([^        ]*\)[   ].* audit*/\1/p' command-list.txt |
 sort |
 while read cmd
index 121df1683c36ac770aa91febb346477742eef444..a8bff2178fbc0cbe5c8c56e6548f7027c09ca56f 100644 (file)
@@ -1320,7 +1320,8 @@ static int build_mem_topology(struct memory_node *nodes, u64 size, u64 *cntp)
 
        dir = opendir(path);
        if (!dir) {
-               pr_warning("failed: can't open node sysfs data\n");
+               pr_debug2("%s: could't read %s, does this arch have topology information?\n",
+                         __func__, path);
                return -1;
        }
 
index 2eca8478e24f0d8b5bf60658f4190bb6213579d0..32d50492505d4b5018dc3fe3794e284e519f8346 100644 (file)
@@ -1019,13 +1019,6 @@ int machine__load_vmlinux_path(struct machine *machine, enum map_type type)
        return ret;
 }
 
-static void map_groups__fixup_end(struct map_groups *mg)
-{
-       int i;
-       for (i = 0; i < MAP__NR_TYPES; ++i)
-               __map_groups__fixup_end(mg, i);
-}
-
 static char *get_kernel_version(const char *root_dir)
 {
        char version[PATH_MAX];
@@ -1233,6 +1226,7 @@ int machine__create_kernel_maps(struct machine *machine)
 {
        struct dso *kernel = machine__get_kernel(machine);
        const char *name = NULL;
+       struct map *map;
        u64 addr = 0;
        int ret;
 
@@ -1259,13 +1253,25 @@ int machine__create_kernel_maps(struct machine *machine)
                        machine__destroy_kernel_maps(machine);
                        return -1;
                }
-               machine__set_kernel_mmap(machine, addr, 0);
+
+               /* we have a real start address now, so re-order the kmaps */
+               map = machine__kernel_map(machine);
+
+               map__get(map);
+               map_groups__remove(&machine->kmaps, map);
+
+               /* assume it's the last in the kmaps */
+               machine__set_kernel_mmap(machine, addr, ~0ULL);
+
+               map_groups__insert(&machine->kmaps, map);
+               map__put(map);
        }
 
-       /*
-        * Now that we have all the maps created, just set the ->end of them:
-        */
-       map_groups__fixup_end(&machine->kmaps);
+       /* update end address of the kernel map using adjacent module address */
+       map = map__next(machine__kernel_map(machine));
+       if (map)
+               machine__set_kernel_mmap(machine, addr, map->start);
+
        return 0;
 }
 
index 2fb0272146d8db3c4d2397f0497d85620df28310..b8b8a9558d325c1b904d312e2c96f82699fd82a8 100644 (file)
@@ -1715,7 +1715,7 @@ int parse_events(struct perf_evlist *evlist, const char *str,
                struct perf_evsel *last;
 
                if (list_empty(&parse_state.list)) {
-                       WARN_ONCE(true, "WARNING: event parser found nothing");
+                       WARN_ONCE(true, "WARNING: event parser found nothing\n");
                        return -1;
                }
 
index 064bdcb7bd78dcce2303bd24563bddcf42dc86d2..d2fb597c9a8c78d8e8fd8a9890e67f8b8f4432d7 100644 (file)
@@ -539,9 +539,10 @@ static bool pmu_is_uncore(const char *name)
 
 /*
  *  PMU CORE devices have different name other than cpu in sysfs on some
- *  platforms. looking for possible sysfs files to identify as core device.
+ *  platforms.
+ *  Looking for possible sysfs files to identify the arm core device.
  */
-static int is_pmu_core(const char *name)
+static int is_arm_pmu_core(const char *name)
 {
        struct stat st;
        char path[PATH_MAX];
@@ -550,18 +551,18 @@ static int is_pmu_core(const char *name)
        if (!sysfs)
                return 0;
 
-       /* Look for cpu sysfs (x86 and others) */
-       scnprintf(path, PATH_MAX, "%s/bus/event_source/devices/cpu", sysfs);
-       if ((stat(path, &st) == 0) &&
-                       (strncmp(name, "cpu", strlen("cpu")) == 0))
-               return 1;
-
        /* Look for cpu sysfs (specific to arm) */
        scnprintf(path, PATH_MAX, "%s/bus/event_source/devices/%s/cpus",
                                sysfs, name);
        if (stat(path, &st) == 0)
                return 1;
 
+       /* Look for cpu sysfs (specific to s390) */
+       scnprintf(path, PATH_MAX, "%s/bus/event_source/devices/%s",
+                 sysfs, name);
+       if (stat(path, &st) == 0 && !strncmp(name, "cpum_", 5))
+               return 1;
+
        return 0;
 }
 
@@ -580,7 +581,7 @@ char * __weak get_cpuid_str(struct perf_pmu *pmu __maybe_unused)
  * cpuid string generated on this platform.
  * Otherwise return non-zero.
  */
-int __weak strcmp_cpuid_str(const char *mapcpuid, const char *cpuid)
+int strcmp_cpuid_str(const char *mapcpuid, const char *cpuid)
 {
        regex_t re;
        regmatch_t pmatch[1];
@@ -662,6 +663,7 @@ static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu)
        struct pmu_events_map *map;
        struct pmu_event *pe;
        const char *name = pmu->name;
+       const char *pname;
 
        map = perf_pmu__find_map(pmu);
        if (!map)
@@ -680,11 +682,9 @@ static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu)
                        break;
                }
 
-               if (!is_pmu_core(name)) {
-                       /* check for uncore devices */
-                       if (pe->pmu == NULL)
-                               continue;
-                       if (strncmp(pe->pmu, name, strlen(pe->pmu)))
+               if (!is_arm_pmu_core(name)) {
+                       pname = pe->pmu ? pe->pmu : "cpu";
+                       if (strncmp(pname, name, strlen(pname)))
                                continue;
                }
 
index 62b2dd2253eb5d8b694ac53b76fb4738ca7c7766..1466814ebada5d30d3eb2dad96a6345c10eee6af 100644 (file)
@@ -2091,16 +2091,14 @@ static bool symbol__read_kptr_restrict(void)
 
 int symbol__annotation_init(void)
 {
+       if (symbol_conf.init_annotation)
+               return 0;
+
        if (symbol_conf.initialized) {
                pr_err("Annotation needs to be init before symbol__init()\n");
                return -1;
        }
 
-       if (symbol_conf.init_annotation) {
-               pr_warning("Annotation being initialized multiple times\n");
-               return 0;
-       }
-
        symbol_conf.priv_size += sizeof(struct annotation);
        symbol_conf.init_annotation = true;
        return 0;
index 895122d638dd84394eadc603221b241707488221..0ee7f568d60cced8ab428c3f73cae9d1e7b67620 100644 (file)
@@ -17,7 +17,7 @@
 #include <stdlib.h>
 #include <linux/compiler.h>
 
-#ifdef HAVE_SYSCALL_TABLE
+#ifdef HAVE_SYSCALL_TABLE_SUPPORT
 #include <string.h>
 #include "string2.h"
 #include "util.h"
@@ -139,7 +139,7 @@ int syscalltbl__strglobmatch_first(struct syscalltbl *tbl, const char *syscall_g
        return syscalltbl__strglobmatch_next(tbl, syscall_glob, idx);
 }
 
-#else /* HAVE_SYSCALL_TABLE */
+#else /* HAVE_SYSCALL_TABLE_SUPPORT */
 
 #include <libaudit.h>
 
@@ -176,4 +176,4 @@ int syscalltbl__strglobmatch_first(struct syscalltbl *tbl, const char *syscall_g
 {
        return syscalltbl__strglobmatch_next(tbl, syscall_glob, idx);
 }
-#endif /* HAVE_SYSCALL_TABLE */
+#endif /* HAVE_SYSCALL_TABLE_SUPPORT */
index 0ac9077f62a2ce2e9a9c4e398583623633af82e7..b1e5c3a2b8e36cada0370ec30db338cb0fc45081 100644 (file)
@@ -98,7 +98,7 @@ static void register_python_scripting(struct scripting_ops *scripting_ops)
        }
 }
 
-#ifdef NO_LIBPYTHON
+#ifndef HAVE_LIBPYTHON_SUPPORT
 void setup_python_scripting(void)
 {
        register_python_scripting(&python_scripting_unsupported_ops);
@@ -161,7 +161,7 @@ static void register_perl_scripting(struct scripting_ops *scripting_ops)
        }
 }
 
-#ifdef NO_LIBPERL
+#ifndef HAVE_LIBPERL_SUPPORT
 void setup_perl_scripting(void)
 {
        register_perl_scripting(&perl_scripting_unsupported_ops);
index 2cccbba644187fd7815e2a3fb38f91308cf04de4..f304be71c278ca0c7bc1850f3daf37b41ca19f43 100644 (file)
@@ -56,6 +56,7 @@ INSTALL_SCRIPT = ${INSTALL_PROGRAM}
 # to compile vs uClibc, that can be done here as well.
 CROSS = #/usr/i386-linux-uclibc/usr/bin/i386-uclibc-
 CROSS_COMPILE ?= $(CROSS)
+LD = $(CC)
 HOSTCC = gcc
 
 # check if compiler option is supported
index cb166be4918d7713f801e95204559c0abd9e8d62..4ea385be528fc80080bb4b24ed9639896ba7d0c4 100644 (file)
@@ -138,6 +138,7 @@ static u32 handle[] = {
 };
 
 static unsigned long dimm_fail_cmd_flags[NUM_DCR];
+static int dimm_fail_cmd_code[NUM_DCR];
 
 struct nfit_test_fw {
        enum intel_fw_update_state state;
@@ -892,8 +893,11 @@ static int get_dimm(struct nfit_mem *nfit_mem, unsigned int func)
        if (i >= ARRAY_SIZE(handle))
                return -ENXIO;
 
-       if ((1 << func) & dimm_fail_cmd_flags[i])
+       if ((1 << func) & dimm_fail_cmd_flags[i]) {
+               if (dimm_fail_cmd_code[i])
+                       return dimm_fail_cmd_code[i];
                return -EIO;
+       }
 
        return i;
 }
@@ -1162,12 +1166,12 @@ static int ars_state_init(struct device *dev, struct ars_state *ars_state)
 
 static void put_dimms(void *data)
 {
-       struct device **dimm_dev = data;
+       struct nfit_test *t = data;
        int i;
 
-       for (i = 0; i < NUM_DCR; i++)
-               if (dimm_dev[i])
-                       device_unregister(dimm_dev[i]);
+       for (i = 0; i < t->num_dcr; i++)
+               if (t->dimm_dev[i])
+                       device_unregister(t->dimm_dev[i]);
 }
 
 static struct class *nfit_test_dimm;
@@ -1176,13 +1180,11 @@ static int dimm_name_to_id(struct device *dev)
 {
        int dimm;
 
-       if (sscanf(dev_name(dev), "test_dimm%d", &dimm) != 1
-                       || dimm >= NUM_DCR || dimm < 0)
+       if (sscanf(dev_name(dev), "test_dimm%d", &dimm) != 1)
                return -ENXIO;
        return dimm;
 }
 
-
 static ssize_t handle_show(struct device *dev, struct device_attribute *attr,
                char *buf)
 {
@@ -1191,7 +1193,7 @@ static ssize_t handle_show(struct device *dev, struct device_attribute *attr,
        if (dimm < 0)
                return dimm;
 
-       return sprintf(buf, "%#x", handle[dimm]);
+       return sprintf(buf, "%#x\n", handle[dimm]);
 }
 DEVICE_ATTR_RO(handle);
 
@@ -1225,8 +1227,39 @@ static ssize_t fail_cmd_store(struct device *dev, struct device_attribute *attr,
 }
 static DEVICE_ATTR_RW(fail_cmd);
 
+static ssize_t fail_cmd_code_show(struct device *dev, struct device_attribute *attr,
+               char *buf)
+{
+       int dimm = dimm_name_to_id(dev);
+
+       if (dimm < 0)
+               return dimm;
+
+       return sprintf(buf, "%d\n", dimm_fail_cmd_code[dimm]);
+}
+
+static ssize_t fail_cmd_code_store(struct device *dev, struct device_attribute *attr,
+               const char *buf, size_t size)
+{
+       int dimm = dimm_name_to_id(dev);
+       unsigned long val;
+       ssize_t rc;
+
+       if (dimm < 0)
+               return dimm;
+
+       rc = kstrtol(buf, 0, &val);
+       if (rc)
+               return rc;
+
+       dimm_fail_cmd_code[dimm] = val;
+       return size;
+}
+static DEVICE_ATTR_RW(fail_cmd_code);
+
 static struct attribute *nfit_test_dimm_attributes[] = {
        &dev_attr_fail_cmd.attr,
+       &dev_attr_fail_cmd_code.attr,
        &dev_attr_handle.attr,
        NULL,
 };
@@ -1240,6 +1273,23 @@ static const struct attribute_group *nfit_test_dimm_attribute_groups[] = {
        NULL,
 };
 
+static int nfit_test_dimm_init(struct nfit_test *t)
+{
+       int i;
+
+       if (devm_add_action_or_reset(&t->pdev.dev, put_dimms, t))
+               return -ENOMEM;
+       for (i = 0; i < t->num_dcr; i++) {
+               t->dimm_dev[i] = device_create_with_groups(nfit_test_dimm,
+                               &t->pdev.dev, 0, NULL,
+                               nfit_test_dimm_attribute_groups,
+                               "test_dimm%d", i + t->dcr_idx);
+               if (!t->dimm_dev[i])
+                       return -ENOMEM;
+       }
+       return 0;
+}
+
 static void smart_init(struct nfit_test *t)
 {
        int i;
@@ -1335,17 +1385,8 @@ static int nfit_test0_alloc(struct nfit_test *t)
        if (!t->_fit)
                return -ENOMEM;
 
-       if (devm_add_action_or_reset(&t->pdev.dev, put_dimms, t->dimm_dev))
+       if (nfit_test_dimm_init(t))
                return -ENOMEM;
-       for (i = 0; i < NUM_DCR; i++) {
-               t->dimm_dev[i] = device_create_with_groups(nfit_test_dimm,
-                               &t->pdev.dev, 0, NULL,
-                               nfit_test_dimm_attribute_groups,
-                               "test_dimm%d", i);
-               if (!t->dimm_dev[i])
-                       return -ENOMEM;
-       }
-
        smart_init(t);
        return ars_state_init(&t->pdev.dev, &t->ars_state);
 }
@@ -1377,6 +1418,8 @@ static int nfit_test1_alloc(struct nfit_test *t)
        if (!t->spa_set[1])
                return -ENOMEM;
 
+       if (nfit_test_dimm_init(t))
+               return -ENOMEM;
        smart_init(t);
        return ars_state_init(&t->pdev.dev, &t->ars_state);
 }
@@ -2222,6 +2265,9 @@ static void nfit_test1_setup(struct nfit_test *t)
        set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_cmd_force_en);
        set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_cmd_force_en);
        set_bit(ND_INTEL_ENABLE_LSS_STATUS, &acpi_desc->dimm_cmd_force_en);
+       set_bit(ND_CMD_GET_CONFIG_SIZE, &acpi_desc->dimm_cmd_force_en);
+       set_bit(ND_CMD_GET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en);
+       set_bit(ND_CMD_SET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en);
 }
 
 static int nfit_test_blk_do_io(struct nd_blk_region *ndbr, resource_size_t dpa,
index fa7ee369b3c919c534e610a621c1bdd54bf1e986..db66f8a0d4bed71aa581cdbf82c9acf422daef05 100644 (file)
@@ -17,7 +17,7 @@ ifeq ($(BUILD), 32)
        LDFLAGS += -m32
 endif
 
-targets: mapshift $(TARGETS)
+targets: generated/map-shift.h $(TARGETS)
 
 main:  $(OFILES)
 
@@ -42,9 +42,7 @@ radix-tree.c: ../../../lib/radix-tree.c
 idr.c: ../../../lib/idr.c
        sed -e 's/^static //' -e 's/__always_inline //' -e 's/inline //' < $< > $@
 
-.PHONY: mapshift
-
-mapshift:
+generated/map-shift.h:
        @if ! grep -qws $(SHIFT) generated/map-shift.h; then            \
                echo "#define RADIX_TREE_MAP_SHIFT $(SHIFT)" >          \
                                generated/map-shift.h;                  \
index 59245b3d587c35c039c77e673bdd706319368167..7bf405638b0beef5d7ca578645e4918c02c06560 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/radix-tree.h>
 #include <linux/slab.h>
 #include <linux/errno.h>
+#include <pthread.h>
 
 #include "test.h"
 
@@ -624,6 +625,67 @@ static void multiorder_account(void)
        item_kill_tree(&tree);
 }
 
+bool stop_iteration = false;
+
+static void *creator_func(void *ptr)
+{
+       /* 'order' is set up to ensure we have sibling entries */
+       unsigned int order = RADIX_TREE_MAP_SHIFT - 1;
+       struct radix_tree_root *tree = ptr;
+       int i;
+
+       for (i = 0; i < 10000; i++) {
+               item_insert_order(tree, 0, order);
+               item_delete_rcu(tree, 0);
+       }
+
+       stop_iteration = true;
+       return NULL;
+}
+
+static void *iterator_func(void *ptr)
+{
+       struct radix_tree_root *tree = ptr;
+       struct radix_tree_iter iter;
+       struct item *item;
+       void **slot;
+
+       while (!stop_iteration) {
+               rcu_read_lock();
+               radix_tree_for_each_slot(slot, tree, &iter, 0) {
+                       item = radix_tree_deref_slot(slot);
+
+                       if (!item)
+                               continue;
+                       if (radix_tree_deref_retry(item)) {
+                               slot = radix_tree_iter_retry(&iter);
+                               continue;
+                       }
+
+                       item_sanity(item, iter.index);
+               }
+               rcu_read_unlock();
+       }
+       return NULL;
+}
+
+static void multiorder_iteration_race(void)
+{
+       const int num_threads = sysconf(_SC_NPROCESSORS_ONLN);
+       pthread_t worker_thread[num_threads];
+       RADIX_TREE(tree, GFP_KERNEL);
+       int i;
+
+       pthread_create(&worker_thread[0], NULL, &creator_func, &tree);
+       for (i = 1; i < num_threads; i++)
+               pthread_create(&worker_thread[i], NULL, &iterator_func, &tree);
+
+       for (i = 0; i < num_threads; i++)
+               pthread_join(worker_thread[i], NULL);
+
+       item_kill_tree(&tree);
+}
+
 void multiorder_checks(void)
 {
        int i;
@@ -644,6 +706,7 @@ void multiorder_checks(void)
        multiorder_join();
        multiorder_split();
        multiorder_account();
+       multiorder_iteration_race();
 
        radix_tree_cpu_dead(0);
 }
index 5978ab1f403d97f135ae7f026a07caab71113212..def6015570b22a46274cd51b6f2135c6b6157090 100644 (file)
@@ -75,6 +75,25 @@ int item_delete(struct radix_tree_root *root, unsigned long index)
        return 0;
 }
 
+static void item_free_rcu(struct rcu_head *head)
+{
+       struct item *item = container_of(head, struct item, rcu_head);
+
+       free(item);
+}
+
+int item_delete_rcu(struct radix_tree_root *root, unsigned long index)
+{
+       struct item *item = radix_tree_delete(root, index);
+
+       if (item) {
+               item_sanity(item, index);
+               call_rcu(&item->rcu_head, item_free_rcu);
+               return 1;
+       }
+       return 0;
+}
+
 void item_check_present(struct radix_tree_root *root, unsigned long index)
 {
        struct item *item;
index d9c031dbeb1a9634451608a29592a74002958dc3..31f1d9b6f506ef884c978b568aa9dc5d89fd8312 100644 (file)
@@ -5,6 +5,7 @@
 #include <linux/rcupdate.h>
 
 struct item {
+       struct rcu_head rcu_head;
        unsigned long index;
        unsigned int order;
 };
@@ -12,9 +13,11 @@ struct item {
 struct item *item_create(unsigned long index, unsigned int order);
 int __item_insert(struct radix_tree_root *root, struct item *item);
 int item_insert(struct radix_tree_root *root, unsigned long index);
+void item_sanity(struct item *item, unsigned long index);
 int item_insert_order(struct radix_tree_root *root, unsigned long index,
                        unsigned order);
 int item_delete(struct radix_tree_root *root, unsigned long index);
+int item_delete_rcu(struct radix_tree_root *root, unsigned long index);
 struct item *item_lookup(struct radix_tree_root *root, unsigned long index);
 
 void item_check_present(struct radix_tree_root *root, unsigned long index);
index 9cf83f895d98873ec81044ce4c4c0ecb89ac5ec3..adc8e5474b66ccda0b0d656c044bf5adc19f724f 100644 (file)
@@ -12,3 +12,8 @@ test_tcpbpf_user
 test_verifier_log
 feature
 test_libbpf_open
+test_sock
+test_sock_addr
+urandom_read
+test_btf
+test_sockmap
index 0b72cc7596f182a553a9c0fe70b69cf812569c5b..1eb0fa2aba92fd5598c4b2e555abf173dd4e23cd 100644 (file)
@@ -10,7 +10,7 @@ ifneq ($(wildcard $(GENHDR)),)
   GENFLAGS := -DHAVE_GENHDR
 endif
 
-CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include
+CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(BPFDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include
 LDLIBS += -lcap -lelf -lrt -lpthread
 
 TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read
@@ -19,12 +19,12 @@ all: $(TEST_CUSTOM_PROGS)
 $(TEST_CUSTOM_PROGS): urandom_read
 
 urandom_read: urandom_read.c
-       $(CC) -o $(TEST_CUSTOM_PROGS) -static $<
+       $(CC) -o $(TEST_CUSTOM_PROGS) -static $< -Wl,--build-id
 
 # Order correspond to 'make run_tests' order
 TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
        test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \
-       test_sock test_sock_addr test_btf
+       test_sock test_btf test_sockmap
 
 TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \
        test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o     \
@@ -32,7 +32,8 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test
        test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o \
        sample_map_ret0.o test_tcpbpf_kern.o test_stacktrace_build_id.o \
        sockmap_tcp_msg_prog.o connect4_prog.o connect6_prog.o test_adjust_tail.o \
-       test_btf_haskv.o test_btf_nokv.o
+       test_btf_haskv.o test_btf_nokv.o test_sockmap_kern.o test_tunnel_kern.o \
+       test_get_stack_rawtp.o test_sockmap_kern.o test_sockhash_kern.o
 
 # Order correspond to 'make run_tests' order
 TEST_PROGS := test_kmod.sh \
@@ -40,10 +41,11 @@ TEST_PROGS := test_kmod.sh \
        test_xdp_redirect.sh \
        test_xdp_meta.sh \
        test_offload.py \
-       test_sock_addr.sh
+       test_sock_addr.sh \
+       test_tunnel.sh
 
 # Compile but not part of 'make run_tests'
-TEST_GEN_PROGS_EXTENDED = test_libbpf_open
+TEST_GEN_PROGS_EXTENDED = test_libbpf_open test_sock_addr
 
 include ../lib.mk
 
@@ -56,6 +58,8 @@ $(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/libbpf.a
 $(OUTPUT)/test_dev_cgroup: cgroup_helpers.c
 $(OUTPUT)/test_sock: cgroup_helpers.c
 $(OUTPUT)/test_sock_addr: cgroup_helpers.c
+$(OUTPUT)/test_sockmap: cgroup_helpers.c
+$(OUTPUT)/test_progs: trace_helpers.c
 
 .PHONY: force
 
@@ -86,9 +90,9 @@ CLANG_FLAGS = -I. -I./include/uapi -I../../../include/uapi \
 $(OUTPUT)/test_l4lb_noinline.o: CLANG_FLAGS += -fno-inline
 $(OUTPUT)/test_xdp_noinline.o: CLANG_FLAGS += -fno-inline
 
-BTF_LLC_PROBE := $(shell $(LLC) -march=bpf -mattr=help |& grep dwarfris)
-BTF_PAHOLE_PROBE := $(shell $(BTF_PAHOLE) --help |& grep BTF)
-BTF_OBJCOPY_PROBE := $(shell $(LLVM_OBJCOPY) --version |& grep LLVM)
+BTF_LLC_PROBE := $(shell $(LLC) -march=bpf -mattr=help 2>&1 | grep dwarfris)
+BTF_PAHOLE_PROBE := $(shell $(BTF_PAHOLE) --help 2>&1 | grep BTF)
+BTF_OBJCOPY_PROBE := $(shell $(LLVM_OBJCOPY) --version 2>&1 | grep LLVM)
 
 ifneq ($(BTF_LLC_PROBE),)
 ifneq ($(BTF_PAHOLE_PROBE),)
index 9271576bdc8f26410a1df1a5ab679dd5cab768d8..8f143dfb370032c9309069db7e6ee21815367875 100644 (file)
@@ -75,9 +75,14 @@ static int (*bpf_sock_ops_cb_flags_set)(void *ctx, int flags) =
        (void *) BPF_FUNC_sock_ops_cb_flags_set;
 static int (*bpf_sk_redirect_map)(void *ctx, void *map, int key, int flags) =
        (void *) BPF_FUNC_sk_redirect_map;
+static int (*bpf_sk_redirect_hash)(void *ctx, void *map, void *key, int flags) =
+       (void *) BPF_FUNC_sk_redirect_hash;
 static int (*bpf_sock_map_update)(void *map, void *key, void *value,
                                  unsigned long long flags) =
        (void *) BPF_FUNC_sock_map_update;
+static int (*bpf_sock_hash_update)(void *map, void *key, void *value,
+                                  unsigned long long flags) =
+       (void *) BPF_FUNC_sock_hash_update;
 static int (*bpf_perf_event_read_value)(void *map, unsigned long long flags,
                                        void *buf, unsigned int buf_size) =
        (void *) BPF_FUNC_perf_event_read_value;
@@ -88,6 +93,9 @@ static int (*bpf_override_return)(void *ctx, unsigned long rc) =
        (void *) BPF_FUNC_override_return;
 static int (*bpf_msg_redirect_map)(void *ctx, void *map, int key, int flags) =
        (void *) BPF_FUNC_msg_redirect_map;
+static int (*bpf_msg_redirect_hash)(void *ctx,
+                                   void *map, void *key, int flags) =
+       (void *) BPF_FUNC_msg_redirect_hash;
 static int (*bpf_msg_apply_bytes)(void *ctx, int len) =
        (void *) BPF_FUNC_msg_apply_bytes;
 static int (*bpf_msg_cork_bytes)(void *ctx, int len) =
@@ -98,7 +106,14 @@ static int (*bpf_bind)(void *ctx, void *addr, int addr_len) =
        (void *) BPF_FUNC_bind;
 static int (*bpf_xdp_adjust_tail)(void *ctx, int offset) =
        (void *) BPF_FUNC_xdp_adjust_tail;
-
+static int (*bpf_skb_get_xfrm_state)(void *ctx, int index, void *state,
+                                    int size, int flags) =
+       (void *) BPF_FUNC_skb_get_xfrm_state;
+static int (*bpf_get_stack)(void *ctx, void *buf, int size, int flags) =
+       (void *) BPF_FUNC_get_stack;
+static int (*bpf_fib_lookup)(void *ctx, struct bpf_fib_lookup *params,
+                            int plen, __u32 flags) =
+       (void *) BPF_FUNC_fib_lookup;
 
 /* llvm builtin functions that eBPF C program may use to
  * emit BPF_LD_ABS and BPF_LD_IND instructions
diff --git a/tools/testing/selftests/bpf/bpf_rand.h b/tools/testing/selftests/bpf/bpf_rand.h
new file mode 100644 (file)
index 0000000..59bf3e1
--- /dev/null
@@ -0,0 +1,80 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __BPF_RAND__
+#define __BPF_RAND__
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <time.h>
+
+static inline uint64_t bpf_rand_mask(uint64_t mask)
+{
+       return (((uint64_t)(uint32_t)rand()) |
+               ((uint64_t)(uint32_t)rand() << 32)) & mask;
+}
+
+#define bpf_rand_ux(x, m)                      \
+static inline uint64_t bpf_rand_u##x(int shift)        \
+{                                              \
+       return bpf_rand_mask((m)) << shift;     \
+}
+
+bpf_rand_ux( 8,               0xffULL)
+bpf_rand_ux(16,             0xffffULL)
+bpf_rand_ux(24,           0xffffffULL)
+bpf_rand_ux(32,         0xffffffffULL)
+bpf_rand_ux(40,       0xffffffffffULL)
+bpf_rand_ux(48,     0xffffffffffffULL)
+bpf_rand_ux(56,   0xffffffffffffffULL)
+bpf_rand_ux(64, 0xffffffffffffffffULL)
+
+static inline void bpf_semi_rand_init(void)
+{
+       srand(time(NULL));
+}
+
+static inline uint64_t bpf_semi_rand_get(void)
+{
+       switch (rand() % 39) {
+       case  0: return 0x000000ff00000000ULL | bpf_rand_u8(0);
+       case  1: return 0xffffffff00000000ULL | bpf_rand_u16(0);
+       case  2: return 0x00000000ffff0000ULL | bpf_rand_u16(0);
+       case  3: return 0x8000000000000000ULL | bpf_rand_u32(0);
+       case  4: return 0x00000000f0000000ULL | bpf_rand_u32(0);
+       case  5: return 0x0000000100000000ULL | bpf_rand_u24(0);
+       case  6: return 0x800ff00000000000ULL | bpf_rand_u32(0);
+       case  7: return 0x7fffffff00000000ULL | bpf_rand_u32(0);
+       case  8: return 0xffffffffffffff00ULL ^ bpf_rand_u32(24);
+       case  9: return 0xffffffffffffff00ULL | bpf_rand_u8(0);
+       case 10: return 0x0000000010000000ULL | bpf_rand_u32(0);
+       case 11: return 0xf000000000000000ULL | bpf_rand_u8(0);
+       case 12: return 0x0000f00000000000ULL | bpf_rand_u8(8);
+       case 13: return 0x000000000f000000ULL | bpf_rand_u8(16);
+       case 14: return 0x0000000000000f00ULL | bpf_rand_u8(32);
+       case 15: return 0x00fff00000000f00ULL | bpf_rand_u8(48);
+       case 16: return 0x00007fffffffffffULL ^ bpf_rand_u32(1);
+       case 17: return 0xffff800000000000ULL | bpf_rand_u8(4);
+       case 18: return 0xffff800000000000ULL | bpf_rand_u8(20);
+       case 19: return (0xffffffc000000000ULL + 0x80000ULL) | bpf_rand_u32(0);
+       case 20: return (0xffffffc000000000ULL - 0x04000000ULL) | bpf_rand_u32(0);
+       case 21: return 0x0000000000000000ULL | bpf_rand_u8(55) | bpf_rand_u32(20);
+       case 22: return 0xffffffffffffffffULL ^ bpf_rand_u8(3) ^ bpf_rand_u32(40);
+       case 23: return 0x0000000000000000ULL | bpf_rand_u8(bpf_rand_u8(0) % 64);
+       case 24: return 0x0000000000000000ULL | bpf_rand_u16(bpf_rand_u8(0) % 64);
+       case 25: return 0xffffffffffffffffULL ^ bpf_rand_u8(bpf_rand_u8(0) % 64);
+       case 26: return 0xffffffffffffffffULL ^ bpf_rand_u40(bpf_rand_u8(0) % 64);
+       case 27: return 0x0000800000000000ULL;
+       case 28: return 0x8000000000000000ULL;
+       case 29: return 0x0000000000000000ULL;
+       case 30: return 0xffffffffffffffffULL;
+       case 31: return bpf_rand_u16(bpf_rand_u8(0) % 64);
+       case 32: return bpf_rand_u24(bpf_rand_u8(0) % 64);
+       case 33: return bpf_rand_u32(bpf_rand_u8(0) % 64);
+       case 34: return bpf_rand_u40(bpf_rand_u8(0) % 64);
+       case 35: return bpf_rand_u48(bpf_rand_u8(0) % 64);
+       case 36: return bpf_rand_u56(bpf_rand_u8(0) % 64);
+       case 37: return bpf_rand_u64(bpf_rand_u8(0) % 64);
+       default: return bpf_rand_u64(0);
+       }
+}
+
+#endif /* __BPF_RAND__ */
index 7b39b1f712a1a461ddfdf07caee2238b57845c8e..c8bceae7ec02fb8002677a74ca88cff16ce8e07f 100644 (file)
 
 #include "bpf_rlimit.h"
 
+static uint32_t pass_cnt;
+static uint32_t error_cnt;
+static uint32_t skip_cnt;
+
+#define CHECK(condition, format...) ({                                 \
+       int __ret = !!(condition);                                      \
+       if (__ret) {                                                    \
+               fprintf(stderr, "%s:%d:FAIL ", __func__, __LINE__);     \
+               fprintf(stderr, format);                                \
+       }                                                               \
+       __ret;                                                          \
+})
+
+static int count_result(int err)
+{
+       if (err)
+               error_cnt++;
+       else
+               pass_cnt++;
+
+       fprintf(stderr, "\n");
+       return err;
+}
+
 #define min(a, b) ((a) < (b) ? (a) : (b))
 #define __printf(a, b) __attribute__((format(printf, a, b)))
 
@@ -894,17 +918,13 @@ static void *btf_raw_create(const struct btf_header *hdr,
        void *raw_btf;
 
        type_sec_size = get_type_sec_size(raw_types);
-       if (type_sec_size < 0) {
-               fprintf(stderr, "Cannot get nr_raw_types\n");
+       if (CHECK(type_sec_size < 0, "Cannot get nr_raw_types"))
                return NULL;
-       }
 
        size_needed = sizeof(*hdr) + type_sec_size + str_sec_size;
        raw_btf = malloc(size_needed);
-       if (!raw_btf) {
-               fprintf(stderr, "Cannot allocate memory for raw_btf\n");
+       if (CHECK(!raw_btf, "Cannot allocate memory for raw_btf"))
                return NULL;
-       }
 
        /* Copy header */
        memcpy(raw_btf, hdr, sizeof(*hdr));
@@ -915,8 +935,7 @@ static void *btf_raw_create(const struct btf_header *hdr,
        for (i = 0; i < type_sec_size / sizeof(raw_types[0]); i++) {
                if (raw_types[i] == NAME_TBD) {
                        next_str = get_next_str(next_str, end_str);
-                       if (!next_str) {
-                               fprintf(stderr, "Error in getting next_str\n");
+                       if (CHECK(!next_str, "Error in getting next_str")) {
                                free(raw_btf);
                                return NULL;
                        }
@@ -973,9 +992,8 @@ static int do_test_raw(unsigned int test_num)
        free(raw_btf);
 
        err = ((btf_fd == -1) != test->btf_load_err);
-       if (err)
-               fprintf(stderr, "btf_load_err:%d btf_fd:%d\n",
-                       test->btf_load_err, btf_fd);
+       CHECK(err, "btf_fd:%d test->btf_load_err:%u",
+             btf_fd, test->btf_load_err);
 
        if (err || btf_fd == -1)
                goto done;
@@ -992,16 +1010,15 @@ static int do_test_raw(unsigned int test_num)
        map_fd = bpf_create_map_xattr(&create_attr);
 
        err = ((map_fd == -1) != test->map_create_err);
-       if (err)
-               fprintf(stderr, "map_create_err:%d map_fd:%d\n",
-                       test->map_create_err, map_fd);
+       CHECK(err, "map_fd:%d test->map_create_err:%u",
+             map_fd, test->map_create_err);
 
 done:
        if (!err)
-               fprintf(stderr, "OK\n");
+               fprintf(stderr, "OK");
 
        if (*btf_log_buf && (err || args.always_log))
-               fprintf(stderr, "%s\n", btf_log_buf);
+               fprintf(stderr, "\n%s", btf_log_buf);
 
        if (btf_fd != -1)
                close(btf_fd);
@@ -1017,10 +1034,10 @@ static int test_raw(void)
        int err = 0;
 
        if (args.raw_test_num)
-               return do_test_raw(args.raw_test_num);
+               return count_result(do_test_raw(args.raw_test_num));
 
        for (i = 1; i <= ARRAY_SIZE(raw_tests); i++)
-               err |= do_test_raw(i);
+               err |= count_result(do_test_raw(i));
 
        return err;
 }
@@ -1030,9 +1047,13 @@ struct btf_get_info_test {
        const char *str_sec;
        __u32 raw_types[MAX_NR_RAW_TYPES];
        __u32 str_sec_size;
-       int info_size_delta;
+       int btf_size_delta;
+       int (*special_test)(unsigned int test_num);
 };
 
+static int test_big_btf_info(unsigned int test_num);
+static int test_btf_id(unsigned int test_num);
+
 const struct btf_get_info_test get_info_tests[] = {
 {
        .descr = "== raw_btf_size+1",
@@ -1043,7 +1064,7 @@ const struct btf_get_info_test get_info_tests[] = {
        },
        .str_sec = "",
        .str_sec_size = sizeof(""),
-       .info_size_delta = 1,
+       .btf_size_delta = 1,
 },
 {
        .descr = "== raw_btf_size-3",
@@ -1054,20 +1075,274 @@ const struct btf_get_info_test get_info_tests[] = {
        },
        .str_sec = "",
        .str_sec_size = sizeof(""),
-       .info_size_delta = -3,
+       .btf_size_delta = -3,
+},
+{
+       .descr = "Large bpf_btf_info",
+       .raw_types = {
+               /* int */                               /* [1] */
+               BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+               BTF_END_RAW,
+       },
+       .str_sec = "",
+       .str_sec_size = sizeof(""),
+       .special_test = test_big_btf_info,
+},
+{
+       .descr = "BTF ID",
+       .raw_types = {
+               /* int */                               /* [1] */
+               BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+               /* unsigned int */                      /* [2] */
+               BTF_TYPE_INT_ENC(0, 0, 0, 32, 4),
+               BTF_END_RAW,
+       },
+       .str_sec = "",
+       .str_sec_size = sizeof(""),
+       .special_test = test_btf_id,
 },
 };
 
+static inline __u64 ptr_to_u64(const void *ptr)
+{
+       return (__u64)(unsigned long)ptr;
+}
+
+static int test_big_btf_info(unsigned int test_num)
+{
+       const struct btf_get_info_test *test = &get_info_tests[test_num - 1];
+       uint8_t *raw_btf = NULL, *user_btf = NULL;
+       unsigned int raw_btf_size;
+       struct {
+               struct bpf_btf_info info;
+               uint64_t garbage;
+       } info_garbage;
+       struct bpf_btf_info *info;
+       int btf_fd = -1, err;
+       uint32_t info_len;
+
+       raw_btf = btf_raw_create(&hdr_tmpl,
+                                test->raw_types,
+                                test->str_sec,
+                                test->str_sec_size,
+                                &raw_btf_size);
+
+       if (!raw_btf)
+               return -1;
+
+       *btf_log_buf = '\0';
+
+       user_btf = malloc(raw_btf_size);
+       if (CHECK(!user_btf, "!user_btf")) {
+               err = -1;
+               goto done;
+       }
+
+       btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
+                             btf_log_buf, BTF_LOG_BUF_SIZE,
+                             args.always_log);
+       if (CHECK(btf_fd == -1, "errno:%d", errno)) {
+               err = -1;
+               goto done;
+       }
+
+       /*
+        * GET_INFO should error out if the userspace info
+        * has non zero tailing bytes.
+        */
+       info = &info_garbage.info;
+       memset(info, 0, sizeof(*info));
+       info_garbage.garbage = 0xdeadbeef;
+       info_len = sizeof(info_garbage);
+       info->btf = ptr_to_u64(user_btf);
+       info->btf_size = raw_btf_size;
+
+       err = bpf_obj_get_info_by_fd(btf_fd, info, &info_len);
+       if (CHECK(!err, "!err")) {
+               err = -1;
+               goto done;
+       }
+
+       /*
+        * GET_INFO should succeed even info_len is larger than
+        * the kernel supported as long as tailing bytes are zero.
+        * The kernel supported info len should also be returned
+        * to userspace.
+        */
+       info_garbage.garbage = 0;
+       err = bpf_obj_get_info_by_fd(btf_fd, info, &info_len);
+       if (CHECK(err || info_len != sizeof(*info),
+                 "err:%d errno:%d info_len:%u sizeof(*info):%lu",
+                 err, errno, info_len, sizeof(*info))) {
+               err = -1;
+               goto done;
+       }
+
+       fprintf(stderr, "OK");
+
+done:
+       if (*btf_log_buf && (err || args.always_log))
+               fprintf(stderr, "\n%s", btf_log_buf);
+
+       free(raw_btf);
+       free(user_btf);
+
+       if (btf_fd != -1)
+               close(btf_fd);
+
+       return err;
+}
+
+static int test_btf_id(unsigned int test_num)
+{
+       const struct btf_get_info_test *test = &get_info_tests[test_num - 1];
+       struct bpf_create_map_attr create_attr = {};
+       uint8_t *raw_btf = NULL, *user_btf[2] = {};
+       int btf_fd[2] = {-1, -1}, map_fd = -1;
+       struct bpf_map_info map_info = {};
+       struct bpf_btf_info info[2] = {};
+       unsigned int raw_btf_size;
+       uint32_t info_len;
+       int err, i, ret;
+
+       raw_btf = btf_raw_create(&hdr_tmpl,
+                                test->raw_types,
+                                test->str_sec,
+                                test->str_sec_size,
+                                &raw_btf_size);
+
+       if (!raw_btf)
+               return -1;
+
+       *btf_log_buf = '\0';
+
+       for (i = 0; i < 2; i++) {
+               user_btf[i] = malloc(raw_btf_size);
+               if (CHECK(!user_btf[i], "!user_btf[%d]", i)) {
+                       err = -1;
+                       goto done;
+               }
+               info[i].btf = ptr_to_u64(user_btf[i]);
+               info[i].btf_size = raw_btf_size;
+       }
+
+       btf_fd[0] = bpf_load_btf(raw_btf, raw_btf_size,
+                                btf_log_buf, BTF_LOG_BUF_SIZE,
+                                args.always_log);
+       if (CHECK(btf_fd[0] == -1, "errno:%d", errno)) {
+               err = -1;
+               goto done;
+       }
+
+       /* Test BPF_OBJ_GET_INFO_BY_ID on btf_id */
+       info_len = sizeof(info[0]);
+       err = bpf_obj_get_info_by_fd(btf_fd[0], &info[0], &info_len);
+       if (CHECK(err, "errno:%d", errno)) {
+               err = -1;
+               goto done;
+       }
+
+       btf_fd[1] = bpf_btf_get_fd_by_id(info[0].id);
+       if (CHECK(btf_fd[1] == -1, "errno:%d", errno)) {
+               err = -1;
+               goto done;
+       }
+
+       ret = 0;
+       err = bpf_obj_get_info_by_fd(btf_fd[1], &info[1], &info_len);
+       if (CHECK(err || info[0].id != info[1].id ||
+                 info[0].btf_size != info[1].btf_size ||
+                 (ret = memcmp(user_btf[0], user_btf[1], info[0].btf_size)),
+                 "err:%d errno:%d id0:%u id1:%u btf_size0:%u btf_size1:%u memcmp:%d",
+                 err, errno, info[0].id, info[1].id,
+                 info[0].btf_size, info[1].btf_size, ret)) {
+               err = -1;
+               goto done;
+       }
+
+       /* Test btf members in struct bpf_map_info */
+       create_attr.name = "test_btf_id";
+       create_attr.map_type = BPF_MAP_TYPE_ARRAY;
+       create_attr.key_size = sizeof(int);
+       create_attr.value_size = sizeof(unsigned int);
+       create_attr.max_entries = 4;
+       create_attr.btf_fd = btf_fd[0];
+       create_attr.btf_key_id = 1;
+       create_attr.btf_value_id = 2;
+
+       map_fd = bpf_create_map_xattr(&create_attr);
+       if (CHECK(map_fd == -1, "errno:%d", errno)) {
+               err = -1;
+               goto done;
+       }
+
+       info_len = sizeof(map_info);
+       err = bpf_obj_get_info_by_fd(map_fd, &map_info, &info_len);
+       if (CHECK(err || map_info.btf_id != info[0].id ||
+                 map_info.btf_key_id != 1 || map_info.btf_value_id != 2,
+                 "err:%d errno:%d info.id:%u btf_id:%u btf_key_id:%u btf_value_id:%u",
+                 err, errno, info[0].id, map_info.btf_id, map_info.btf_key_id,
+                 map_info.btf_value_id)) {
+               err = -1;
+               goto done;
+       }
+
+       for (i = 0; i < 2; i++) {
+               close(btf_fd[i]);
+               btf_fd[i] = -1;
+       }
+
+       /* Test BTF ID is removed from the kernel */
+       btf_fd[0] = bpf_btf_get_fd_by_id(map_info.btf_id);
+       if (CHECK(btf_fd[0] == -1, "errno:%d", errno)) {
+               err = -1;
+               goto done;
+       }
+       close(btf_fd[0]);
+       btf_fd[0] = -1;
+
+       /* The map holds the last ref to BTF and its btf_id */
+       close(map_fd);
+       map_fd = -1;
+       btf_fd[0] = bpf_btf_get_fd_by_id(map_info.btf_id);
+       if (CHECK(btf_fd[0] != -1, "BTF lingers")) {
+               err = -1;
+               goto done;
+       }
+
+       fprintf(stderr, "OK");
+
+done:
+       if (*btf_log_buf && (err || args.always_log))
+               fprintf(stderr, "\n%s", btf_log_buf);
+
+       free(raw_btf);
+       if (map_fd != -1)
+               close(map_fd);
+       for (i = 0; i < 2; i++) {
+               free(user_btf[i]);
+               if (btf_fd[i] != -1)
+                       close(btf_fd[i]);
+       }
+
+       return err;
+}
+
 static int do_test_get_info(unsigned int test_num)
 {
        const struct btf_get_info_test *test = &get_info_tests[test_num - 1];
        unsigned int raw_btf_size, user_btf_size, expected_nbytes;
        uint8_t *raw_btf = NULL, *user_btf = NULL;
-       int btf_fd = -1, err;
+       struct bpf_btf_info info = {};
+       int btf_fd = -1, err, ret;
+       uint32_t info_len;
 
-       fprintf(stderr, "BTF GET_INFO_BY_ID test[%u] (%s): ",
+       fprintf(stderr, "BTF GET_INFO test[%u] (%s): ",
                test_num, test->descr);
 
+       if (test->special_test)
+               return test->special_test(test_num);
+
        raw_btf = btf_raw_create(&hdr_tmpl,
                                 test->raw_types,
                                 test->str_sec,
@@ -1080,8 +1355,7 @@ static int do_test_get_info(unsigned int test_num)
        *btf_log_buf = '\0';
 
        user_btf = malloc(raw_btf_size);
-       if (!user_btf) {
-               fprintf(stderr, "Cannot allocate memory for user_btf\n");
+       if (CHECK(!user_btf, "!user_btf")) {
                err = -1;
                goto done;
        }
@@ -1089,45 +1363,48 @@ static int do_test_get_info(unsigned int test_num)
        btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
                              btf_log_buf, BTF_LOG_BUF_SIZE,
                              args.always_log);
-       if (btf_fd == -1) {
-               fprintf(stderr, "bpf_load_btf:%s(%d)\n",
-                       strerror(errno), errno);
+       if (CHECK(btf_fd == -1, "errno:%d", errno)) {
                err = -1;
                goto done;
        }
 
-       user_btf_size = (int)raw_btf_size + test->info_size_delta;
+       user_btf_size = (int)raw_btf_size + test->btf_size_delta;
        expected_nbytes = min(raw_btf_size, user_btf_size);
        if (raw_btf_size > expected_nbytes)
                memset(user_btf + expected_nbytes, 0xff,
                       raw_btf_size - expected_nbytes);
 
-       err = bpf_obj_get_info_by_fd(btf_fd, user_btf, &user_btf_size);
-       if (err || user_btf_size != raw_btf_size ||
-           memcmp(raw_btf, user_btf, expected_nbytes)) {
-               fprintf(stderr,
-                       "err:%d(errno:%d) raw_btf_size:%u user_btf_size:%u expected_nbytes:%u memcmp:%d\n",
-                       err, errno,
-                       raw_btf_size, user_btf_size, expected_nbytes,
-                       memcmp(raw_btf, user_btf, expected_nbytes));
+       info_len = sizeof(info);
+       info.btf = ptr_to_u64(user_btf);
+       info.btf_size = user_btf_size;
+
+       ret = 0;
+       err = bpf_obj_get_info_by_fd(btf_fd, &info, &info_len);
+       if (CHECK(err || !info.id || info_len != sizeof(info) ||
+                 info.btf_size != raw_btf_size ||
+                 (ret = memcmp(raw_btf, user_btf, expected_nbytes)),
+                 "err:%d errno:%d info.id:%u info_len:%u sizeof(info):%lu raw_btf_size:%u info.btf_size:%u expected_nbytes:%u memcmp:%d",
+                 err, errno, info.id, info_len, sizeof(info),
+                 raw_btf_size, info.btf_size, expected_nbytes, ret)) {
                err = -1;
                goto done;
        }
 
        while (expected_nbytes < raw_btf_size) {
                fprintf(stderr, "%u...", expected_nbytes);
-               if (user_btf[expected_nbytes++] != 0xff) {
-                       fprintf(stderr, "!= 0xff\n");
+               if (CHECK(user_btf[expected_nbytes++] != 0xff,
+                         "user_btf[%u]:%x != 0xff", expected_nbytes - 1,
+                         user_btf[expected_nbytes - 1])) {
                        err = -1;
                        goto done;
                }
        }
 
-       fprintf(stderr, "OK\n");
+       fprintf(stderr, "OK");
 
 done:
        if (*btf_log_buf && (err || args.always_log))
-               fprintf(stderr, "%s\n", btf_log_buf);
+               fprintf(stderr, "\n%s", btf_log_buf);
 
        free(raw_btf);
        free(user_btf);
@@ -1144,10 +1421,10 @@ static int test_get_info(void)
        int err = 0;
 
        if (args.get_info_test_num)
-               return do_test_get_info(args.get_info_test_num);
+               return count_result(do_test_get_info(args.get_info_test_num));
 
        for (i = 1; i <= ARRAY_SIZE(get_info_tests); i++)
-               err |= do_test_get_info(i);
+               err |= count_result(do_test_get_info(i));
 
        return err;
 }
@@ -1175,28 +1452,21 @@ static int file_has_btf_elf(const char *fn)
        Elf *elf;
        int ret;
 
-       if (elf_version(EV_CURRENT) == EV_NONE) {
-               fprintf(stderr, "Failed to init libelf\n");
+       if (CHECK(elf_version(EV_CURRENT) == EV_NONE,
+                 "elf_version(EV_CURRENT) == EV_NONE"))
                return -1;
-       }
 
        elf_fd = open(fn, O_RDONLY);
-       if (elf_fd == -1) {
-               fprintf(stderr, "Cannot open file %s: %s(%d)\n",
-                       fn, strerror(errno), errno);
+       if (CHECK(elf_fd == -1, "open(%s): errno:%d", fn, errno))
                return -1;
-       }
 
        elf = elf_begin(elf_fd, ELF_C_READ, NULL);
-       if (!elf) {
-               fprintf(stderr, "Failed to read ELF from %s. %s\n", fn,
-                       elf_errmsg(elf_errno()));
+       if (CHECK(!elf, "elf_begin(%s): %s", fn, elf_errmsg(elf_errno()))) {
                ret = -1;
                goto done;
        }
 
-       if (!gelf_getehdr(elf, &ehdr)) {
-               fprintf(stderr, "Failed to get EHDR from %s\n", fn);
+       if (CHECK(!gelf_getehdr(elf, &ehdr), "!gelf_getehdr(%s)", fn)) {
                ret = -1;
                goto done;
        }
@@ -1205,9 +1475,8 @@ static int file_has_btf_elf(const char *fn)
                const char *sh_name;
                GElf_Shdr sh;
 
-               if (gelf_getshdr(scn, &sh) != &sh) {
-                       fprintf(stderr,
-                               "Failed to get section header from %s\n", fn);
+               if (CHECK(gelf_getshdr(scn, &sh) != &sh,
+                         "file:%s gelf_getshdr != &sh", fn)) {
                        ret = -1;
                        goto done;
                }
@@ -1243,53 +1512,44 @@ static int do_test_file(unsigned int test_num)
                return err;
 
        if (err == 0) {
-               fprintf(stderr, "SKIP. No ELF %s found\n", BTF_ELF_SEC);
+               fprintf(stderr, "SKIP. No ELF %s found", BTF_ELF_SEC);
+               skip_cnt++;
                return 0;
        }
 
        obj = bpf_object__open(test->file);
-       if (IS_ERR(obj))
+       if (CHECK(IS_ERR(obj), "obj: %ld", PTR_ERR(obj)))
                return PTR_ERR(obj);
 
        err = bpf_object__btf_fd(obj);
-       if (err == -1) {
-               fprintf(stderr, "bpf_object__btf_fd: -1\n");
+       if (CHECK(err == -1, "bpf_object__btf_fd: -1"))
                goto done;
-       }
 
        prog = bpf_program__next(NULL, obj);
-       if (!prog) {
-               fprintf(stderr, "Cannot find bpf_prog\n");
+       if (CHECK(!prog, "Cannot find bpf_prog")) {
                err = -1;
                goto done;
        }
 
        bpf_program__set_type(prog, BPF_PROG_TYPE_TRACEPOINT);
        err = bpf_object__load(obj);
-       if (err < 0) {
-               fprintf(stderr, "bpf_object__load: %d\n", err);
+       if (CHECK(err < 0, "bpf_object__load: %d", err))
                goto done;
-       }
 
        map = bpf_object__find_map_by_name(obj, "btf_map");
-       if (!map) {
-               fprintf(stderr, "btf_map not found\n");
+       if (CHECK(!map, "btf_map not found")) {
                err = -1;
                goto done;
        }
 
        err = (bpf_map__btf_key_id(map) == 0 || bpf_map__btf_value_id(map) == 0)
                != test->btf_kv_notfound;
-       if (err) {
-               fprintf(stderr,
-                       "btf_kv_notfound:%u btf_key_id:%u btf_value_id:%u\n",
-                       test->btf_kv_notfound,
-                       bpf_map__btf_key_id(map),
-                       bpf_map__btf_value_id(map));
+       if (CHECK(err, "btf_key_id:%u btf_value_id:%u test->btf_kv_notfound:%u",
+                 bpf_map__btf_key_id(map), bpf_map__btf_value_id(map),
+                 test->btf_kv_notfound))
                goto done;
-       }
 
-       fprintf(stderr, "OK\n");
+       fprintf(stderr, "OK");
 
 done:
        bpf_object__close(obj);
@@ -1302,10 +1562,10 @@ static int test_file(void)
        int err = 0;
 
        if (args.file_test_num)
-               return do_test_file(args.file_test_num);
+               return count_result(do_test_file(args.file_test_num));
 
        for (i = 1; i <= ARRAY_SIZE(file_tests); i++)
-               err |= do_test_file(i);
+               err |= count_result(do_test_file(i));
 
        return err;
 }
@@ -1425,7 +1685,7 @@ static int test_pprint(void)
        unsigned int key;
        uint8_t *raw_btf;
        ssize_t nread;
-       int err;
+       int err, ret;
 
        fprintf(stderr, "%s......", test->descr);
        raw_btf = btf_raw_create(&hdr_tmpl, test->raw_types,
@@ -1441,10 +1701,8 @@ static int test_pprint(void)
                              args.always_log);
        free(raw_btf);
 
-       if (btf_fd == -1) {
+       if (CHECK(btf_fd == -1, "errno:%d", errno)) {
                err = -1;
-               fprintf(stderr, "bpf_load_btf: %s(%d)\n",
-                       strerror(errno), errno);
                goto done;
        }
 
@@ -1458,26 +1716,23 @@ static int test_pprint(void)
        create_attr.btf_value_id = test->value_id;
 
        map_fd = bpf_create_map_xattr(&create_attr);
-       if (map_fd == -1) {
+       if (CHECK(map_fd == -1, "errno:%d", errno)) {
                err = -1;
-               fprintf(stderr, "bpf_creat_map_btf: %s(%d)\n",
-                       strerror(errno), errno);
                goto done;
        }
 
-       if (snprintf(pin_path, sizeof(pin_path), "%s/%s",
-                    "/sys/fs/bpf", test->map_name) == sizeof(pin_path)) {
+       ret = snprintf(pin_path, sizeof(pin_path), "%s/%s",
+                      "/sys/fs/bpf", test->map_name);
+
+       if (CHECK(ret == sizeof(pin_path), "pin_path %s/%s is too long",
+                 "/sys/fs/bpf", test->map_name)) {
                err = -1;
-               fprintf(stderr, "pin_path is too long\n");
                goto done;
        }
 
        err = bpf_obj_pin(map_fd, pin_path);
-       if (err) {
-               fprintf(stderr, "Cannot pin to %s. %s(%d).\n", pin_path,
-                       strerror(errno), errno);
+       if (CHECK(err, "bpf_obj_pin(%s): errno:%d.", pin_path, errno))
                goto done;
-       }
 
        for (key = 0; key < test->max_entries; key++) {
                set_pprint_mapv(&mapv, key);
@@ -1485,10 +1740,8 @@ static int test_pprint(void)
        }
 
        pin_file = fopen(pin_path, "r");
-       if (!pin_file) {
+       if (CHECK(!pin_file, "fopen(%s): errno:%d", pin_path, errno)) {
                err = -1;
-               fprintf(stderr, "fopen(%s): %s(%d)\n", pin_path,
-                       strerror(errno), errno);
                goto done;
        }
 
@@ -1497,9 +1750,8 @@ static int test_pprint(void)
               *line == '#')
                ;
 
-       if (nread <= 0) {
+       if (CHECK(nread <= 0, "Unexpected EOF")) {
                err = -1;
-               fprintf(stderr, "Unexpected EOF\n");
                goto done;
        }
 
@@ -1518,9 +1770,9 @@ static int test_pprint(void)
                                          mapv.ui8a[4], mapv.ui8a[5], mapv.ui8a[6], mapv.ui8a[7],
                                          pprint_enum_str[mapv.aenum]);
 
-               if (nexpected_line == sizeof(expected_line)) {
+               if (CHECK(nexpected_line == sizeof(expected_line),
+                         "expected_line is too long")) {
                        err = -1;
-                       fprintf(stderr, "expected_line is too long\n");
                        goto done;
                }
 
@@ -1535,15 +1787,15 @@ static int test_pprint(void)
                nread = getline(&line, &line_len, pin_file);
        } while (++key < test->max_entries && nread > 0);
 
-       if (key < test->max_entries) {
+       if (CHECK(key < test->max_entries,
+                 "Unexpected EOF. key:%u test->max_entries:%u",
+                 key, test->max_entries)) {
                err = -1;
-               fprintf(stderr, "Unexpected EOF\n");
                goto done;
        }
 
-       if (nread > 0) {
+       if (CHECK(nread > 0, "Unexpected extra pprint output: %s", line)) {
                err = -1;
-               fprintf(stderr, "Unexpected extra pprint output: %s\n", line);
                goto done;
        }
 
@@ -1551,9 +1803,9 @@ static int test_pprint(void)
 
 done:
        if (!err)
-               fprintf(stderr, "OK\n");
+               fprintf(stderr, "OK");
        if (*btf_log_buf && (err || args.always_log))
-               fprintf(stderr, "%s\n", btf_log_buf);
+               fprintf(stderr, "\n%s", btf_log_buf);
        if (btf_fd != -1)
                close(btf_fd);
        if (map_fd != -1)
@@ -1634,6 +1886,12 @@ static int parse_args(int argc, char **argv)
        return 0;
 }
 
+static void print_summary(void)
+{
+       fprintf(stderr, "PASS:%u SKIP:%u FAIL:%u\n",
+               pass_cnt - skip_cnt, skip_cnt, error_cnt);
+}
+
 int main(int argc, char **argv)
 {
        int err = 0;
@@ -1655,15 +1913,17 @@ int main(int argc, char **argv)
                err |= test_file();
 
        if (args.pprint_test)
-               err |= test_pprint();
+               err |= count_result(test_pprint());
 
        if (args.raw_test || args.get_info_test || args.file_test ||
            args.pprint_test)
-               return err;
+               goto done;
 
        err |= test_raw();
        err |= test_get_info();
        err |= test_file();
 
+done:
+       print_summary();
        return err;
 }
diff --git a/tools/testing/selftests/bpf/test_get_stack_rawtp.c b/tools/testing/selftests/bpf/test_get_stack_rawtp.c
new file mode 100644 (file)
index 0000000..f6d9f23
--- /dev/null
@@ -0,0 +1,102 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+/* Permit pretty deep stack traces */
+#define MAX_STACK_RAWTP 100
+struct stack_trace_t {
+       int pid;
+       int kern_stack_size;
+       int user_stack_size;
+       int user_stack_buildid_size;
+       __u64 kern_stack[MAX_STACK_RAWTP];
+       __u64 user_stack[MAX_STACK_RAWTP];
+       struct bpf_stack_build_id user_stack_buildid[MAX_STACK_RAWTP];
+};
+
+struct bpf_map_def SEC("maps") perfmap = {
+       .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
+       .key_size = sizeof(int),
+       .value_size = sizeof(__u32),
+       .max_entries = 2,
+};
+
+struct bpf_map_def SEC("maps") stackdata_map = {
+       .type = BPF_MAP_TYPE_PERCPU_ARRAY,
+       .key_size = sizeof(__u32),
+       .value_size = sizeof(struct stack_trace_t),
+       .max_entries = 1,
+};
+
+/* Allocate per-cpu space twice the needed. For the code below
+ *   usize = bpf_get_stack(ctx, raw_data, max_len, BPF_F_USER_STACK);
+ *   if (usize < 0)
+ *     return 0;
+ *   ksize = bpf_get_stack(ctx, raw_data + usize, max_len - usize, 0);
+ *
+ * If we have value_size = MAX_STACK_RAWTP * sizeof(__u64),
+ * verifier will complain that access "raw_data + usize"
+ * with size "max_len - usize" may be out of bound.
+ * The maximum "raw_data + usize" is "raw_data + max_len"
+ * and the maximum "max_len - usize" is "max_len", verifier
+ * concludes that the maximum buffer access range is
+ * "raw_data[0...max_len * 2 - 1]" and hence reject the program.
+ *
+ * Doubling the to-be-used max buffer size can fix this verifier
+ * issue and avoid complicated C programming massaging.
+ * This is an acceptable workaround since there is one entry here.
+ */
+struct bpf_map_def SEC("maps") rawdata_map = {
+       .type = BPF_MAP_TYPE_PERCPU_ARRAY,
+       .key_size = sizeof(__u32),
+       .value_size = MAX_STACK_RAWTP * sizeof(__u64) * 2,
+       .max_entries = 1,
+};
+
+SEC("tracepoint/raw_syscalls/sys_enter")
+int bpf_prog1(void *ctx)
+{
+       int max_len, max_buildid_len, usize, ksize, total_size;
+       struct stack_trace_t *data;
+       void *raw_data;
+       __u32 key = 0;
+
+       data = bpf_map_lookup_elem(&stackdata_map, &key);
+       if (!data)
+               return 0;
+
+       max_len = MAX_STACK_RAWTP * sizeof(__u64);
+       max_buildid_len = MAX_STACK_RAWTP * sizeof(struct bpf_stack_build_id);
+       data->pid = bpf_get_current_pid_tgid();
+       data->kern_stack_size = bpf_get_stack(ctx, data->kern_stack,
+                                             max_len, 0);
+       data->user_stack_size = bpf_get_stack(ctx, data->user_stack, max_len,
+                                           BPF_F_USER_STACK);
+       data->user_stack_buildid_size = bpf_get_stack(
+               ctx, data->user_stack_buildid, max_buildid_len,
+               BPF_F_USER_STACK | BPF_F_USER_BUILD_ID);
+       bpf_perf_event_output(ctx, &perfmap, 0, data, sizeof(*data));
+
+       /* write both kernel and user stacks to the same buffer */
+       raw_data = bpf_map_lookup_elem(&rawdata_map, &key);
+       if (!raw_data)
+               return 0;
+
+       usize = bpf_get_stack(ctx, raw_data, max_len, BPF_F_USER_STACK);
+       if (usize < 0)
+               return 0;
+
+       ksize = bpf_get_stack(ctx, raw_data + usize, max_len - usize, 0);
+       if (ksize < 0)
+               return 0;
+
+       total_size = usize + ksize;
+       if (total_size > 0 && total_size <= max_len)
+               bpf_perf_event_output(ctx, &perfmap, 0, raw_data, total_size);
+
+       return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */
index eedda98d7bb1ee860a4f068c40e8602508a6a291..3ecf733330c109cc099cfba3ded4aabb9d4bf386 100644 (file)
@@ -38,8 +38,10 @@ typedef __u16 __sum16;
 #include "bpf_util.h"
 #include "bpf_endian.h"
 #include "bpf_rlimit.h"
+#include "trace_helpers.h"
 
 static int error_cnt, pass_cnt;
+static bool jit_enabled;
 
 #define MAGIC_BYTES 123
 
@@ -391,13 +393,30 @@ static inline __u64 ptr_to_u64(const void *ptr)
        return (__u64) (unsigned long) ptr;
 }
 
+static bool is_jit_enabled(void)
+{
+       const char *jit_sysctl = "/proc/sys/net/core/bpf_jit_enable";
+       bool enabled = false;
+       int sysctl_fd;
+
+       sysctl_fd = open(jit_sysctl, 0, O_RDONLY);
+       if (sysctl_fd != -1) {
+               char tmpc;
+
+               if (read(sysctl_fd, &tmpc, sizeof(tmpc)) == 1)
+                       enabled = (tmpc != '0');
+               close(sysctl_fd);
+       }
+
+       return enabled;
+}
+
 static void test_bpf_obj_id(void)
 {
        const __u64 array_magic_value = 0xfaceb00c;
        const __u32 array_key = 0;
        const int nr_iters = 2;
        const char *file = "./test_obj_id.o";
-       const char *jit_sysctl = "/proc/sys/net/core/bpf_jit_enable";
        const char *expected_prog_name = "test_obj_id";
        const char *expected_map_name = "test_map_id";
        const __u64 nsec_per_sec = 1000000000;
@@ -414,20 +433,11 @@ static void test_bpf_obj_id(void)
        char jited_insns[128], xlated_insns[128], zeros[128];
        __u32 i, next_id, info_len, nr_id_found, duration = 0;
        struct timespec real_time_ts, boot_time_ts;
-       int sysctl_fd, jit_enabled = 0, err = 0;
+       int err = 0;
        __u64 array_value;
        uid_t my_uid = getuid();
        time_t now, load_time;
 
-       sysctl_fd = open(jit_sysctl, 0, O_RDONLY);
-       if (sysctl_fd != -1) {
-               char tmpc;
-
-               if (read(sysctl_fd, &tmpc, sizeof(tmpc)) == 1)
-                       jit_enabled = (tmpc != '0');
-               close(sysctl_fd);
-       }
-
        err = bpf_prog_get_fd_by_id(0);
        CHECK(err >= 0 || errno != ENOENT,
              "get-fd-by-notexist-prog-id", "err %d errno %d\n", err, errno);
@@ -896,11 +906,47 @@ static int compare_map_keys(int map1_fd, int map2_fd)
        return 0;
 }
 
+static int compare_stack_ips(int smap_fd, int amap_fd, int stack_trace_len)
+{
+       __u32 key, next_key, *cur_key_p, *next_key_p;
+       char *val_buf1, *val_buf2;
+       int i, err = 0;
+
+       val_buf1 = malloc(stack_trace_len);
+       val_buf2 = malloc(stack_trace_len);
+       cur_key_p = NULL;
+       next_key_p = &key;
+       while (bpf_map_get_next_key(smap_fd, cur_key_p, next_key_p) == 0) {
+               err = bpf_map_lookup_elem(smap_fd, next_key_p, val_buf1);
+               if (err)
+                       goto out;
+               err = bpf_map_lookup_elem(amap_fd, next_key_p, val_buf2);
+               if (err)
+                       goto out;
+               for (i = 0; i < stack_trace_len; i++) {
+                       if (val_buf1[i] != val_buf2[i]) {
+                               err = -1;
+                               goto out;
+                       }
+               }
+               key = *next_key_p;
+               cur_key_p = &key;
+               next_key_p = &next_key;
+       }
+       if (errno != ENOENT)
+               err = -1;
+
+out:
+       free(val_buf1);
+       free(val_buf2);
+       return err;
+}
+
 static void test_stacktrace_map()
 {
-       int control_map_fd, stackid_hmap_fd, stackmap_fd;
+       int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd;
        const char *file = "./test_stacktrace_map.o";
-       int bytes, efd, err, pmu_fd, prog_fd;
+       int bytes, efd, err, pmu_fd, prog_fd, stack_trace_len;
        struct perf_event_attr attr = {};
        __u32 key, val, duration = 0;
        struct bpf_object *obj;
@@ -956,6 +1002,10 @@ static void test_stacktrace_map()
        if (stackmap_fd < 0)
                goto disable_pmu;
 
+       stack_amap_fd = bpf_find_map(__func__, obj, "stack_amap");
+       if (stack_amap_fd < 0)
+               goto disable_pmu;
+
        /* give some time for bpf program run */
        sleep(1);
 
@@ -977,6 +1027,12 @@ static void test_stacktrace_map()
                  "err %d errno %d\n", err, errno))
                goto disable_pmu_noerr;
 
+       stack_trace_len = PERF_MAX_STACK_DEPTH * sizeof(__u64);
+       err = compare_stack_ips(stackmap_fd, stack_amap_fd, stack_trace_len);
+       if (CHECK(err, "compare_stack_ips stackmap vs. stack_amap",
+                 "err %d errno %d\n", err, errno))
+               goto disable_pmu_noerr;
+
        goto disable_pmu_noerr;
 disable_pmu:
        error_cnt++;
@@ -1070,9 +1126,9 @@ static int extract_build_id(char *build_id, size_t size)
 
 static void test_stacktrace_build_id(void)
 {
-       int control_map_fd, stackid_hmap_fd, stackmap_fd;
+       int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd;
        const char *file = "./test_stacktrace_build_id.o";
-       int bytes, efd, err, pmu_fd, prog_fd;
+       int bytes, efd, err, pmu_fd, prog_fd, stack_trace_len;
        struct perf_event_attr attr = {};
        __u32 key, previous_key, val, duration = 0;
        struct bpf_object *obj;
@@ -1137,9 +1193,14 @@ static void test_stacktrace_build_id(void)
                  err, errno))
                goto disable_pmu;
 
+       stack_amap_fd = bpf_find_map(__func__, obj, "stack_amap");
+       if (CHECK(stack_amap_fd < 0, "bpf_find_map stack_amap",
+                 "err %d errno %d\n", err, errno))
+               goto disable_pmu;
+
        assert(system("dd if=/dev/urandom of=/dev/zero count=4 2> /dev/null")
               == 0);
-       assert(system("./urandom_read if=/dev/urandom of=/dev/zero count=4 2> /dev/null") == 0);
+       assert(system("./urandom_read") == 0);
        /* disable stack trace collection */
        key = 0;
        val = 1;
@@ -1188,8 +1249,15 @@ static void test_stacktrace_build_id(void)
                previous_key = key;
        } while (bpf_map_get_next_key(stackmap_fd, &previous_key, &key) == 0);
 
-       CHECK(build_id_matches < 1, "build id match",
-             "Didn't find expected build ID from the map");
+       if (CHECK(build_id_matches < 1, "build id match",
+                 "Didn't find expected build ID from the map\n"))
+               goto disable_pmu;
+
+       stack_trace_len = PERF_MAX_STACK_DEPTH
+               * sizeof(struct bpf_stack_build_id);
+       err = compare_stack_ips(stackmap_fd, stack_amap_fd, stack_trace_len);
+       CHECK(err, "compare_stack_ips stackmap vs. stack_amap",
+             "err %d errno %d\n", err, errno);
 
 disable_pmu:
        ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE);
@@ -1204,8 +1272,280 @@ static void test_stacktrace_build_id(void)
        return;
 }
 
+static void test_stacktrace_build_id_nmi(void)
+{
+       int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd;
+       const char *file = "./test_stacktrace_build_id.o";
+       int err, pmu_fd, prog_fd;
+       struct perf_event_attr attr = {
+               .sample_freq = 5000,
+               .freq = 1,
+               .type = PERF_TYPE_HARDWARE,
+               .config = PERF_COUNT_HW_CPU_CYCLES,
+       };
+       __u32 key, previous_key, val, duration = 0;
+       struct bpf_object *obj;
+       char buf[256];
+       int i, j;
+       struct bpf_stack_build_id id_offs[PERF_MAX_STACK_DEPTH];
+       int build_id_matches = 0;
+
+       err = bpf_prog_load(file, BPF_PROG_TYPE_PERF_EVENT, &obj, &prog_fd);
+       if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno))
+               return;
+
+       pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
+                        0 /* cpu 0 */, -1 /* group id */,
+                        0 /* flags */);
+       if (CHECK(pmu_fd < 0, "perf_event_open",
+                 "err %d errno %d. Does the test host support PERF_COUNT_HW_CPU_CYCLES?\n",
+                 pmu_fd, errno))
+               goto close_prog;
+
+       err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
+       if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n",
+                 err, errno))
+               goto close_pmu;
+
+       err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd);
+       if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n",
+                 err, errno))
+               goto disable_pmu;
+
+       /* find map fds */
+       control_map_fd = bpf_find_map(__func__, obj, "control_map");
+       if (CHECK(control_map_fd < 0, "bpf_find_map control_map",
+                 "err %d errno %d\n", err, errno))
+               goto disable_pmu;
+
+       stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap");
+       if (CHECK(stackid_hmap_fd < 0, "bpf_find_map stackid_hmap",
+                 "err %d errno %d\n", err, errno))
+               goto disable_pmu;
+
+       stackmap_fd = bpf_find_map(__func__, obj, "stackmap");
+       if (CHECK(stackmap_fd < 0, "bpf_find_map stackmap", "err %d errno %d\n",
+                 err, errno))
+               goto disable_pmu;
+
+       stack_amap_fd = bpf_find_map(__func__, obj, "stack_amap");
+       if (CHECK(stack_amap_fd < 0, "bpf_find_map stack_amap",
+                 "err %d errno %d\n", err, errno))
+               goto disable_pmu;
+
+       assert(system("dd if=/dev/urandom of=/dev/zero count=4 2> /dev/null")
+              == 0);
+       assert(system("taskset 0x1 ./urandom_read 100000") == 0);
+       /* disable stack trace collection */
+       key = 0;
+       val = 1;
+       bpf_map_update_elem(control_map_fd, &key, &val, 0);
+
+       /* for every element in stackid_hmap, we can find a corresponding one
+        * in stackmap, and vise versa.
+        */
+       err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
+       if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
+                 "err %d errno %d\n", err, errno))
+               goto disable_pmu;
+
+       err = compare_map_keys(stackmap_fd, stackid_hmap_fd);
+       if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap",
+                 "err %d errno %d\n", err, errno))
+               goto disable_pmu;
+
+       err = extract_build_id(buf, 256);
+
+       if (CHECK(err, "get build_id with readelf",
+                 "err %d errno %d\n", err, errno))
+               goto disable_pmu;
+
+       err = bpf_map_get_next_key(stackmap_fd, NULL, &key);
+       if (CHECK(err, "get_next_key from stackmap",
+                 "err %d, errno %d\n", err, errno))
+               goto disable_pmu;
+
+       do {
+               char build_id[64];
+
+               err = bpf_map_lookup_elem(stackmap_fd, &key, id_offs);
+               if (CHECK(err, "lookup_elem from stackmap",
+                         "err %d, errno %d\n", err, errno))
+                       goto disable_pmu;
+               for (i = 0; i < PERF_MAX_STACK_DEPTH; ++i)
+                       if (id_offs[i].status == BPF_STACK_BUILD_ID_VALID &&
+                           id_offs[i].offset != 0) {
+                               for (j = 0; j < 20; ++j)
+                                       sprintf(build_id + 2 * j, "%02x",
+                                               id_offs[i].build_id[j] & 0xff);
+                               if (strstr(buf, build_id) != NULL)
+                                       build_id_matches = 1;
+                       }
+               previous_key = key;
+       } while (bpf_map_get_next_key(stackmap_fd, &previous_key, &key) == 0);
+
+       if (CHECK(build_id_matches < 1, "build id match",
+                 "Didn't find expected build ID from the map\n"))
+               goto disable_pmu;
+
+       /*
+        * We intentionally skip compare_stack_ips(). This is because we
+        * only support one in_nmi() ips-to-build_id translation per cpu
+        * at any time, thus stack_amap here will always fallback to
+        * BPF_STACK_BUILD_ID_IP;
+        */
+
+disable_pmu:
+       ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE);
+
+close_pmu:
+       close(pmu_fd);
+
+close_prog:
+       bpf_object__close(obj);
+}
+
+#define MAX_CNT_RAWTP  10ull
+#define MAX_STACK_RAWTP        100
+struct get_stack_trace_t {
+       int pid;
+       int kern_stack_size;
+       int user_stack_size;
+       int user_stack_buildid_size;
+       __u64 kern_stack[MAX_STACK_RAWTP];
+       __u64 user_stack[MAX_STACK_RAWTP];
+       struct bpf_stack_build_id user_stack_buildid[MAX_STACK_RAWTP];
+};
+
+static int get_stack_print_output(void *data, int size)
+{
+       bool good_kern_stack = false, good_user_stack = false;
+       const char *nonjit_func = "___bpf_prog_run";
+       struct get_stack_trace_t *e = data;
+       int i, num_stack;
+       static __u64 cnt;
+       struct ksym *ks;
+
+       cnt++;
+
+       if (size < sizeof(struct get_stack_trace_t)) {
+               __u64 *raw_data = data;
+               bool found = false;
+
+               num_stack = size / sizeof(__u64);
+               /* If jit is enabled, we do not have a good way to
+                * verify the sanity of the kernel stack. So we
+                * just assume it is good if the stack is not empty.
+                * This could be improved in the future.
+                */
+               if (jit_enabled) {
+                       found = num_stack > 0;
+               } else {
+                       for (i = 0; i < num_stack; i++) {
+                               ks = ksym_search(raw_data[i]);
+                               if (strcmp(ks->name, nonjit_func) == 0) {
+                                       found = true;
+                                       break;
+                               }
+                       }
+               }
+               if (found) {
+                       good_kern_stack = true;
+                       good_user_stack = true;
+               }
+       } else {
+               num_stack = e->kern_stack_size / sizeof(__u64);
+               if (jit_enabled) {
+                       good_kern_stack = num_stack > 0;
+               } else {
+                       for (i = 0; i < num_stack; i++) {
+                               ks = ksym_search(e->kern_stack[i]);
+                               if (strcmp(ks->name, nonjit_func) == 0) {
+                                       good_kern_stack = true;
+                                       break;
+                               }
+                       }
+               }
+               if (e->user_stack_size > 0 && e->user_stack_buildid_size > 0)
+                       good_user_stack = true;
+       }
+       if (!good_kern_stack || !good_user_stack)
+               return LIBBPF_PERF_EVENT_ERROR;
+
+       if (cnt == MAX_CNT_RAWTP)
+               return LIBBPF_PERF_EVENT_DONE;
+
+       return LIBBPF_PERF_EVENT_CONT;
+}
+
+static void test_get_stack_raw_tp(void)
+{
+       const char *file = "./test_get_stack_rawtp.o";
+       int i, efd, err, prog_fd, pmu_fd, perfmap_fd;
+       struct perf_event_attr attr = {};
+       struct timespec tv = {0, 10};
+       __u32 key = 0, duration = 0;
+       struct bpf_object *obj;
+
+       err = bpf_prog_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd);
+       if (CHECK(err, "prog_load raw tp", "err %d errno %d\n", err, errno))
+               return;
+
+       efd = bpf_raw_tracepoint_open("sys_enter", prog_fd);
+       if (CHECK(efd < 0, "raw_tp_open", "err %d errno %d\n", efd, errno))
+               goto close_prog;
+
+       perfmap_fd = bpf_find_map(__func__, obj, "perfmap");
+       if (CHECK(perfmap_fd < 0, "bpf_find_map", "err %d errno %d\n",
+                 perfmap_fd, errno))
+               goto close_prog;
+
+       err = load_kallsyms();
+       if (CHECK(err < 0, "load_kallsyms", "err %d errno %d\n", err, errno))
+               goto close_prog;
+
+       attr.sample_type = PERF_SAMPLE_RAW;
+       attr.type = PERF_TYPE_SOFTWARE;
+       attr.config = PERF_COUNT_SW_BPF_OUTPUT;
+       pmu_fd = syscall(__NR_perf_event_open, &attr, getpid()/*pid*/, -1/*cpu*/,
+                        -1/*group_fd*/, 0);
+       if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n", pmu_fd,
+                 errno))
+               goto close_prog;
+
+       err = bpf_map_update_elem(perfmap_fd, &key, &pmu_fd, BPF_ANY);
+       if (CHECK(err < 0, "bpf_map_update_elem", "err %d errno %d\n", err,
+                 errno))
+               goto close_prog;
+
+       err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
+       if (CHECK(err < 0, "ioctl PERF_EVENT_IOC_ENABLE", "err %d errno %d\n",
+                 err, errno))
+               goto close_prog;
+
+       err = perf_event_mmap(pmu_fd);
+       if (CHECK(err < 0, "perf_event_mmap", "err %d errno %d\n", err, errno))
+               goto close_prog;
+
+       /* trigger some syscall action */
+       for (i = 0; i < MAX_CNT_RAWTP; i++)
+               nanosleep(&tv, NULL);
+
+       err = perf_event_poller(pmu_fd, get_stack_print_output);
+       if (CHECK(err < 0, "perf_event_poller", "err %d errno %d\n", err, errno))
+               goto close_prog;
+
+       goto close_prog_noerr;
+close_prog:
+       error_cnt++;
+close_prog_noerr:
+       bpf_object__close(obj);
+}
+
 int main(void)
 {
+       jit_enabled = is_jit_enabled();
+
        test_pkt_access();
        test_xdp();
        test_xdp_adjust_tail();
@@ -1218,7 +1558,9 @@ int main(void)
        test_tp_attach_query();
        test_stacktrace_map();
        test_stacktrace_build_id();
+       test_stacktrace_build_id_nmi();
        test_stacktrace_map_raw_tp();
+       test_get_stack_raw_tp();
 
        printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt);
        return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS;
index 73bb20cfb9b7b07c5c604812e0bbdd3245b7bc34..f4d99fabc56de89ea0a093b59763f98669573f86 100644 (file)
@@ -13,6 +13,7 @@
 #include <bpf/bpf.h>
 
 #include "cgroup_helpers.h"
+#include "bpf_rlimit.h"
 
 #ifndef ARRAY_SIZE
 # define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
index d488f20926e8962f1b8d59ecf5b85c6ee4767c7a..2950f80ba7fb9f8770cb92bde50003d3c6186e78 100644 (file)
@@ -15,6 +15,7 @@
 #include <bpf/libbpf.h>
 
 #include "cgroup_helpers.h"
+#include "bpf_rlimit.h"
 
 #define CG_PATH        "/foo"
 #define CONNECT4_PROG_PATH     "./connect4_prog.o"
index c6e1dcf992c44a104abedb2592fbbef1c47f233b..9832a875a828979be26b8756cbd589d5cf5e44f1 100755 (executable)
@@ -4,7 +4,7 @@ set -eu
 
 ping_once()
 {
-       ping -q -c 1 -W 1 ${1%%/*} >/dev/null 2>&1
+       ping -${1} -q -c 1 -W 1 ${2%%/*} >/dev/null 2>&1
 }
 
 wait_for_ip()
@@ -13,7 +13,7 @@ wait_for_ip()
        echo -n "Wait for testing IPv4/IPv6 to become available "
        for _i in $(seq ${MAX_PING_TRIES}); do
                echo -n "."
-               if ping_once ${TEST_IPv4} && ping_once ${TEST_IPv6}; then
+               if ping_once 4 ${TEST_IPv4} && ping_once 6 ${TEST_IPv6}; then
                        echo " OK"
                        return
                fi
diff --git a/tools/testing/selftests/bpf/test_sockhash_kern.c b/tools/testing/selftests/bpf/test_sockhash_kern.c
new file mode 100644 (file)
index 0000000..e675591
--- /dev/null
@@ -0,0 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
+#undef SOCKMAP
+#define TEST_MAP_TYPE BPF_MAP_TYPE_SOCKHASH
+#include "./test_sockmap_kern.h"
similarity index 56%
rename from samples/sockmap/sockmap_user.c
rename to tools/testing/selftests/bpf/test_sockmap.c
index 6f2334912283ef7ff60cb22439bef99c69645d86..eb17fae458e6cf153fbcb172388ceb4720ae26a1 100644 (file)
@@ -1,14 +1,5 @@
-/* Copyright (c) 2017 Covalent IO, Inc. http://covalent.io
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- */
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2017-2018 Covalent IO, Inc. http://covalent.io
 #include <stdio.h>
 #include <stdlib.h>
 #include <sys/socket.h>
@@ -25,6 +16,7 @@
 #include <fcntl.h>
 #include <sys/wait.h>
 #include <time.h>
+#include <sched.h>
 
 #include <sys/time.h>
 #include <sys/resource.h>
 
 #include <getopt.h>
 
-#include "../bpf/bpf_load.h"
-#include "../bpf/bpf_util.h"
-#include "../bpf/libbpf.h"
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "bpf_util.h"
+#include "bpf_rlimit.h"
+#include "cgroup_helpers.h"
 
 int running;
-void running_handler(int a);
+static void running_handler(int a);
 
 /* randomly selected ports for testing on lo */
 #define S1_PORT 10000
 #define S2_PORT 10001
 
+#define BPF_SOCKMAP_FILENAME "test_sockmap_kern.o"
+#define BPF_SOCKHASH_FILENAME "test_sockhash_kern.o"
+#define CG_PATH "/sockmap"
+
 /* global sockets */
 int s1, s2, c1, c2, p1, p2;
+int test_cnt;
+int passed;
+int failed;
+int map_fd[8];
+struct bpf_map *maps[8];
+int prog_fd[11];
 
 int txmsg_pass;
 int txmsg_noisy;
@@ -107,7 +112,7 @@ static void usage(char *argv[])
        printf("\n");
 }
 
-static int sockmap_init_sockets(void)
+static int sockmap_init_sockets(int verbose)
 {
        int i, err, one = 1;
        struct sockaddr_in addr;
@@ -207,9 +212,11 @@ static int sockmap_init_sockets(void)
                return errno;
        }
 
-       printf("connected sockets: c1 <-> p1, c2 <-> p2\n");
-       printf("cgroups binding: c1(%i) <-> s1(%i) - - - c2(%i) <-> s2(%i)\n",
-               c1, s1, c2, s2);
+       if (verbose) {
+               printf("connected sockets: c1 <-> p1, c2 <-> p2\n");
+               printf("cgroups binding: c1(%i) <-> s1(%i) - - - c2(%i) <-> s2(%i)\n",
+                       c1, s1, c2, s2);
+       }
        return 0;
 }
 
@@ -226,6 +233,9 @@ struct sockmap_options {
        bool sendpage;
        bool data_test;
        bool drop_expected;
+       int iov_count;
+       int iov_length;
+       int rate;
 };
 
 static int msg_loop_sendpage(int fd, int iov_length, int cnt,
@@ -324,17 +334,19 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
                clock_gettime(CLOCK_MONOTONIC, &s->end);
        } else {
                int slct, recv, max_fd = fd;
+               int fd_flags = O_NONBLOCK;
                struct timeval timeout;
                float total_bytes;
                fd_set w;
 
+               fcntl(fd, fd_flags);
                total_bytes = (float)iov_count * (float)iov_length * (float)cnt;
                err = clock_gettime(CLOCK_MONOTONIC, &s->start);
                if (err < 0)
                        perror("recv start time: ");
                while (s->bytes_recvd < total_bytes) {
-                       timeout.tv_sec = 1;
-                       timeout.tv_usec = 0;
+                       timeout.tv_sec = 0;
+                       timeout.tv_usec = 10;
 
                        /* FD sets */
                        FD_ZERO(&w);
@@ -346,7 +358,8 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
                                clock_gettime(CLOCK_MONOTONIC, &s->end);
                                goto out_errno;
                        } else if (!slct) {
-                               fprintf(stderr, "unexpected timeout\n");
+                               if (opt->verbose)
+                                       fprintf(stderr, "unexpected timeout\n");
                                errno = -EIO;
                                clock_gettime(CLOCK_MONOTONIC, &s->end);
                                goto out_errno;
@@ -409,12 +422,14 @@ static inline float recvdBps(struct msg_stats s)
        return s.bytes_recvd / (s.end.tv_sec - s.start.tv_sec);
 }
 
-static int sendmsg_test(int iov_count, int iov_buf, int cnt,
-                       struct sockmap_options *opt)
+static int sendmsg_test(struct sockmap_options *opt)
 {
        float sent_Bps = 0, recvd_Bps = 0;
        int rx_fd, txpid, rxpid, err = 0;
        struct msg_stats s = {0};
+       int iov_count = opt->iov_count;
+       int iov_buf = opt->iov_length;
+       int cnt = opt->rate;
        int status;
 
        errno = 0;
@@ -433,7 +448,7 @@ static int sendmsg_test(int iov_count, int iov_buf, int cnt,
                        iov_count = 1;
                err = msg_loop(rx_fd, iov_count, iov_buf,
                               cnt, &s, false, opt);
-               if (err)
+               if (err && opt->verbose)
                        fprintf(stderr,
                                "msg_loop_rx: iov_count %i iov_buf %i cnt %i err %i\n",
                                iov_count, iov_buf, cnt, err);
@@ -443,10 +458,11 @@ static int sendmsg_test(int iov_count, int iov_buf, int cnt,
                        sent_Bps = sentBps(s);
                        recvd_Bps = recvdBps(s);
                }
-               fprintf(stdout,
-                       "rx_sendmsg: TX: %zuB %fB/s %fGB/s RX: %zuB %fB/s %fGB/s\n",
-                       s.bytes_sent, sent_Bps, sent_Bps/giga,
-                       s.bytes_recvd, recvd_Bps, recvd_Bps/giga);
+               if (opt->verbose)
+                       fprintf(stdout,
+                               "rx_sendmsg: TX: %zuB %fB/s %fGB/s RX: %zuB %fB/s %fGB/s\n",
+                               s.bytes_sent, sent_Bps, sent_Bps/giga,
+                               s.bytes_recvd, recvd_Bps, recvd_Bps/giga);
                exit(1);
        } else if (rxpid == -1) {
                perror("msg_loop_rx: ");
@@ -470,10 +486,11 @@ static int sendmsg_test(int iov_count, int iov_buf, int cnt,
                        sent_Bps = sentBps(s);
                        recvd_Bps = recvdBps(s);
                }
-               fprintf(stdout,
-                       "tx_sendmsg: TX: %zuB %fB/s %f GB/s RX: %zuB %fB/s %fGB/s\n",
-                       s.bytes_sent, sent_Bps, sent_Bps/giga,
-                       s.bytes_recvd, recvd_Bps, recvd_Bps/giga);
+               if (opt->verbose)
+                       fprintf(stdout,
+                               "tx_sendmsg: TX: %zuB %fB/s %f GB/s RX: %zuB %fB/s %fGB/s\n",
+                               s.bytes_sent, sent_Bps, sent_Bps/giga,
+                               s.bytes_recvd, recvd_Bps, recvd_Bps/giga);
                exit(1);
        } else if (txpid == -1) {
                perror("msg_loop_tx: ");
@@ -568,102 +585,9 @@ enum {
        SENDPAGE,
 };
 
-int main(int argc, char **argv)
+static int run_options(struct sockmap_options *options, int cg_fd,  int test)
 {
-       int iov_count = 1, length = 1024, rate = 1, tx_prog_fd;
-       struct rlimit r = {10 * 1024 * 1024, RLIM_INFINITY};
-       int opt, longindex, err, cg_fd = 0;
-       struct sockmap_options options = {0};
-       int test = PING_PONG;
-       char filename[256];
-
-       while ((opt = getopt_long(argc, argv, ":dhvc:r:i:l:t:",
-                                 long_options, &longindex)) != -1) {
-               switch (opt) {
-               case 's':
-                       txmsg_start = atoi(optarg);
-                       break;
-               case 'e':
-                       txmsg_end = atoi(optarg);
-                       break;
-               case 'a':
-                       txmsg_apply = atoi(optarg);
-                       break;
-               case 'k':
-                       txmsg_cork = atoi(optarg);
-                       break;
-               case 'c':
-                       cg_fd = open(optarg, O_DIRECTORY, O_RDONLY);
-                       if (cg_fd < 0) {
-                               fprintf(stderr,
-                                       "ERROR: (%i) open cg path failed: %s\n",
-                                       cg_fd, optarg);
-                               return cg_fd;
-                       }
-                       break;
-               case 'r':
-                       rate = atoi(optarg);
-                       break;
-               case 'v':
-                       options.verbose = 1;
-                       break;
-               case 'i':
-                       iov_count = atoi(optarg);
-                       break;
-               case 'l':
-                       length = atoi(optarg);
-                       break;
-               case 'd':
-                       options.data_test = true;
-                       break;
-               case 't':
-                       if (strcmp(optarg, "ping") == 0) {
-                               test = PING_PONG;
-                       } else if (strcmp(optarg, "sendmsg") == 0) {
-                               test = SENDMSG;
-                       } else if (strcmp(optarg, "base") == 0) {
-                               test = BASE;
-                       } else if (strcmp(optarg, "base_sendpage") == 0) {
-                               test = BASE_SENDPAGE;
-                       } else if (strcmp(optarg, "sendpage") == 0) {
-                               test = SENDPAGE;
-                       } else {
-                               usage(argv);
-                               return -1;
-                       }
-                       break;
-               case 0:
-                       break;
-               case 'h':
-               default:
-                       usage(argv);
-                       return -1;
-               }
-       }
-
-       if (!cg_fd) {
-               fprintf(stderr, "%s requires cgroup option: --cgroup <path>\n",
-                       argv[0]);
-               return -1;
-       }
-
-       if (setrlimit(RLIMIT_MEMLOCK, &r)) {
-               perror("setrlimit(RLIMIT_MEMLOCK)");
-               return 1;
-       }
-
-       snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
-
-       running = 1;
-
-       /* catch SIGINT */
-       signal(SIGINT, running_handler);
-
-       if (load_bpf_file(filename)) {
-               fprintf(stderr, "load_bpf_file: (%s) %s\n",
-                       filename, strerror(errno));
-               return 1;
-       }
+       int i, key, next_key, err, tx_prog_fd = -1, zero = 0;
 
        /* If base test skip BPF setup */
        if (test == BASE || test == BASE_SENDPAGE)
@@ -673,8 +597,9 @@ int main(int argc, char **argv)
        err = bpf_prog_attach(prog_fd[0], map_fd[0],
                                BPF_SK_SKB_STREAM_PARSER, 0);
        if (err) {
-               fprintf(stderr, "ERROR: bpf_prog_attach (sockmap): %d (%s)\n",
-                       err, strerror(errno));
+               fprintf(stderr,
+                       "ERROR: bpf_prog_attach (sockmap %i->%i): %d (%s)\n",
+                       prog_fd[0], map_fd[0], err, strerror(errno));
                return err;
        }
 
@@ -695,7 +620,7 @@ int main(int argc, char **argv)
        }
 
 run:
-       err = sockmap_init_sockets();
+       err = sockmap_init_sockets(options->verbose);
        if (err) {
                fprintf(stderr, "ERROR: test socket failed: %d\n", err);
                goto out;
@@ -729,7 +654,7 @@ int main(int argc, char **argv)
                        fprintf(stderr,
                                "ERROR: bpf_prog_attach (txmsg): %d (%s)\n",
                                err, strerror(errno));
-                       return err;
+                       goto out;
                }
 
                err = bpf_map_update_elem(map_fd[1], &i, &c1, BPF_ANY);
@@ -737,7 +662,7 @@ int main(int argc, char **argv)
                        fprintf(stderr,
                                "ERROR: bpf_map_update_elem (txmsg):  %d (%s\n",
                                err, strerror(errno));
-                       return err;
+                       goto out;
                }
 
                if (txmsg_redir || txmsg_redir_noisy)
@@ -750,7 +675,7 @@ int main(int argc, char **argv)
                        fprintf(stderr,
                                "ERROR: bpf_map_update_elem (txmsg):  %d (%s\n",
                                err, strerror(errno));
-                       return err;
+                       goto out;
                }
 
                if (txmsg_apply) {
@@ -760,7 +685,7 @@ int main(int argc, char **argv)
                                fprintf(stderr,
                                        "ERROR: bpf_map_update_elem (apply_bytes):  %d (%s\n",
                                        err, strerror(errno));
-                               return err;
+                               goto out;
                        }
                }
 
@@ -771,7 +696,7 @@ int main(int argc, char **argv)
                                fprintf(stderr,
                                        "ERROR: bpf_map_update_elem (cork_bytes):  %d (%s\n",
                                        err, strerror(errno));
-                               return err;
+                               goto out;
                        }
                }
 
@@ -782,7 +707,7 @@ int main(int argc, char **argv)
                                fprintf(stderr,
                                        "ERROR: bpf_map_update_elem (txmsg_start):  %d (%s)\n",
                                        err, strerror(errno));
-                               return err;
+                               goto out;
                        }
                }
 
@@ -794,7 +719,7 @@ int main(int argc, char **argv)
                                fprintf(stderr,
                                        "ERROR: bpf_map_update_elem (txmsg_end):  %d (%s)\n",
                                        err, strerror(errno));
-                               return err;
+                               goto out;
                        }
                }
 
@@ -832,11 +757,13 @@ int main(int argc, char **argv)
                }
 
                if (txmsg_skb) {
-                       int skb_fd = (test == SENDMSG || test == SENDPAGE) ? p2 : p1;
+                       int skb_fd = (test == SENDMSG || test == SENDPAGE) ?
+                                       p2 : p1;
                        int ingress = BPF_F_INGRESS;
 
                        i = 0;
-                       err = bpf_map_update_elem(map_fd[7], &i, &ingress, BPF_ANY);
+                       err = bpf_map_update_elem(map_fd[7],
+                                                 &i, &ingress, BPF_ANY);
                        if (err) {
                                fprintf(stderr,
                                        "ERROR: bpf_map_update_elem (txmsg_ingress): %d (%s)\n",
@@ -844,7 +771,8 @@ int main(int argc, char **argv)
                        }
 
                        i = 3;
-                       err = bpf_map_update_elem(map_fd[0], &i, &skb_fd, BPF_ANY);
+                       err = bpf_map_update_elem(map_fd[0],
+                                                 &i, &skb_fd, BPF_ANY);
                        if (err) {
                                fprintf(stderr,
                                        "ERROR: bpf_map_update_elem (c1 sockmap): %d (%s)\n",
@@ -854,36 +782,691 @@ int main(int argc, char **argv)
        }
 
        if (txmsg_drop)
-               options.drop_expected = true;
+               options->drop_expected = true;
 
        if (test == PING_PONG)
-               err = forever_ping_pong(rate, &options);
+               err = forever_ping_pong(options->rate, options);
        else if (test == SENDMSG) {
-               options.base = false;
-               options.sendpage = false;
-               err = sendmsg_test(iov_count, length, rate, &options);
+               options->base = false;
+               options->sendpage = false;
+               err = sendmsg_test(options);
        } else if (test == SENDPAGE) {
-               options.base = false;
-               options.sendpage = true;
-               err = sendmsg_test(iov_count, length, rate, &options);
+               options->base = false;
+               options->sendpage = true;
+               err = sendmsg_test(options);
        } else if (test == BASE) {
-               options.base = true;
-               options.sendpage = false;
-               err = sendmsg_test(iov_count, length, rate, &options);
+               options->base = true;
+               options->sendpage = false;
+               err = sendmsg_test(options);
        } else if (test == BASE_SENDPAGE) {
-               options.base = true;
-               options.sendpage = true;
-               err = sendmsg_test(iov_count, length, rate, &options);
+               options->base = true;
+               options->sendpage = true;
+               err = sendmsg_test(options);
        } else
                fprintf(stderr, "unknown test\n");
 out:
+       /* Detatch and zero all the maps */
        bpf_prog_detach2(prog_fd[2], cg_fd, BPF_CGROUP_SOCK_OPS);
+       bpf_prog_detach2(prog_fd[0], map_fd[0], BPF_SK_SKB_STREAM_PARSER);
+       bpf_prog_detach2(prog_fd[1], map_fd[0], BPF_SK_SKB_STREAM_VERDICT);
+       if (tx_prog_fd >= 0)
+               bpf_prog_detach2(tx_prog_fd, map_fd[1], BPF_SK_MSG_VERDICT);
+
+       for (i = 0; i < 8; i++) {
+               key = next_key = 0;
+               bpf_map_update_elem(map_fd[i], &key, &zero, BPF_ANY);
+               while (bpf_map_get_next_key(map_fd[i], &key, &next_key) == 0) {
+                       bpf_map_update_elem(map_fd[i], &key, &zero, BPF_ANY);
+                       key = next_key;
+               }
+       }
+
        close(s1);
        close(s2);
        close(p1);
        close(p2);
        close(c1);
        close(c2);
+       return err;
+}
+
+static char *test_to_str(int test)
+{
+       switch (test) {
+       case SENDMSG:
+               return "sendmsg";
+       case SENDPAGE:
+               return "sendpage";
+       }
+       return "unknown";
+}
+
+#define OPTSTRING 60
+static void test_options(char *options)
+{
+       memset(options, 0, OPTSTRING);
+
+       if (txmsg_pass)
+               strncat(options, "pass,", OPTSTRING);
+       if (txmsg_noisy)
+               strncat(options, "pass_noisy,", OPTSTRING);
+       if (txmsg_redir)
+               strncat(options, "redir,", OPTSTRING);
+       if (txmsg_redir_noisy)
+               strncat(options, "redir_noisy,", OPTSTRING);
+       if (txmsg_drop)
+               strncat(options, "drop,", OPTSTRING);
+       if (txmsg_apply)
+               strncat(options, "apply,", OPTSTRING);
+       if (txmsg_cork)
+               strncat(options, "cork,", OPTSTRING);
+       if (txmsg_start)
+               strncat(options, "start,", OPTSTRING);
+       if (txmsg_end)
+               strncat(options, "end,", OPTSTRING);
+       if (txmsg_ingress)
+               strncat(options, "ingress,", OPTSTRING);
+       if (txmsg_skb)
+               strncat(options, "skb,", OPTSTRING);
+}
+
+static int __test_exec(int cgrp, int test, struct sockmap_options *opt)
+{
+       char *options = calloc(60, sizeof(char));
+       int err;
+
+       if (test == SENDPAGE)
+               opt->sendpage = true;
+       else
+               opt->sendpage = false;
+
+       if (txmsg_drop)
+               opt->drop_expected = true;
+       else
+               opt->drop_expected = false;
+
+       test_options(options);
+
+       fprintf(stdout,
+               "[TEST %i]: (%i, %i, %i, %s, %s): ",
+               test_cnt, opt->rate, opt->iov_count, opt->iov_length,
+               test_to_str(test), options);
+       fflush(stdout);
+       err = run_options(opt, cgrp, test);
+       fprintf(stdout, "%s\n", !err ? "PASS" : "FAILED");
+       test_cnt++;
+       !err ? passed++ : failed++;
+       free(options);
+       return err;
+}
+
+static int test_exec(int cgrp, struct sockmap_options *opt)
+{
+       int err = __test_exec(cgrp, SENDMSG, opt);
+
+       if (err)
+               goto out;
+
+       err = __test_exec(cgrp, SENDPAGE, opt);
+out:
+       return err;
+}
+
+static int test_loop(int cgrp)
+{
+       struct sockmap_options opt;
+
+       int err, i, l, r;
+
+       opt.verbose = 0;
+       opt.base = false;
+       opt.sendpage = false;
+       opt.data_test = false;
+       opt.drop_expected = false;
+       opt.iov_count = 0;
+       opt.iov_length = 0;
+       opt.rate = 0;
+
+       r = 1;
+       for (i = 1; i < 100; i += 33) {
+               for (l = 1; l < 100; l += 33) {
+                       opt.rate = r;
+                       opt.iov_count = i;
+                       opt.iov_length = l;
+                       err = test_exec(cgrp, &opt);
+                       if (err)
+                               goto out;
+               }
+       }
+       sched_yield();
+out:
+       return err;
+}
+
+static int test_txmsg(int cgrp)
+{
+       int err;
+
+       txmsg_pass = txmsg_noisy = txmsg_redir_noisy = txmsg_drop = 0;
+       txmsg_apply = txmsg_cork = 0;
+       txmsg_ingress = txmsg_skb = 0;
+
+       txmsg_pass = 1;
+       err = test_loop(cgrp);
+       txmsg_pass = 0;
+       if (err)
+               goto out;
+
+       txmsg_redir = 1;
+       err = test_loop(cgrp);
+       txmsg_redir = 0;
+       if (err)
+               goto out;
+
+       txmsg_drop = 1;
+       err = test_loop(cgrp);
+       txmsg_drop = 0;
+       if (err)
+               goto out;
+
+       txmsg_redir = 1;
+       txmsg_ingress = 1;
+       err = test_loop(cgrp);
+       txmsg_redir = 0;
+       txmsg_ingress = 0;
+       if (err)
+               goto out;
+out:
+       txmsg_pass = 0;
+       txmsg_redir = 0;
+       txmsg_drop = 0;
+       return err;
+}
+
+static int test_send(struct sockmap_options *opt, int cgrp)
+{
+       int err;
+
+       opt->iov_length = 1;
+       opt->iov_count = 1;
+       opt->rate = 1;
+       err = test_exec(cgrp, opt);
+       if (err)
+               goto out;
+
+       opt->iov_length = 1;
+       opt->iov_count = 1024;
+       opt->rate = 1;
+       err = test_exec(cgrp, opt);
+       if (err)
+               goto out;
+
+       opt->iov_length = 1024;
+       opt->iov_count = 1;
+       opt->rate = 1;
+       err = test_exec(cgrp, opt);
+       if (err)
+               goto out;
+
+       opt->iov_length = 1;
+       opt->iov_count = 1;
+       opt->rate = 1024;
+       err = test_exec(cgrp, opt);
+       if (err)
+               goto out;
+
+       opt->iov_length = 256;
+       opt->iov_count = 1024;
+       opt->rate = 10;
+       err = test_exec(cgrp, opt);
+       if (err)
+               goto out;
+
+       opt->rate = 100;
+       opt->iov_count = 1;
+       opt->iov_length = 5;
+       err = test_exec(cgrp, opt);
+       if (err)
+               goto out;
+out:
+       sched_yield();
+       return err;
+}
+
+static int test_mixed(int cgrp)
+{
+       struct sockmap_options opt = {0};
+       int err;
+
+       txmsg_pass = txmsg_noisy = txmsg_redir_noisy = txmsg_drop = 0;
+       txmsg_apply = txmsg_cork = 0;
+       txmsg_start = txmsg_end = 0;
+       /* Test small and large iov_count values with pass/redir/apply/cork */
+       txmsg_pass = 1;
+       txmsg_redir = 0;
+       txmsg_apply = 1;
+       txmsg_cork = 0;
+       err = test_send(&opt, cgrp);
+       if (err)
+               goto out;
+
+       txmsg_pass = 1;
+       txmsg_redir = 0;
+       txmsg_apply = 0;
+       txmsg_cork = 1;
+       err = test_send(&opt, cgrp);
+       if (err)
+               goto out;
+
+       txmsg_pass = 1;
+       txmsg_redir = 0;
+       txmsg_apply = 1;
+       txmsg_cork = 1;
+       err = test_send(&opt, cgrp);
+       if (err)
+               goto out;
+
+       txmsg_pass = 1;
+       txmsg_redir = 0;
+       txmsg_apply = 1024;
+       txmsg_cork = 0;
+       err = test_send(&opt, cgrp);
+       if (err)
+               goto out;
+
+       txmsg_pass = 1;
+       txmsg_redir = 0;
+       txmsg_apply = 0;
+       txmsg_cork = 1024;
+       err = test_send(&opt, cgrp);
+       if (err)
+               goto out;
+
+       txmsg_pass = 1;
+       txmsg_redir = 0;
+       txmsg_apply = 1024;
+       txmsg_cork = 1024;
+       err = test_send(&opt, cgrp);
+       if (err)
+               goto out;
+
+       txmsg_pass = 1;
+       txmsg_redir = 0;
+       txmsg_cork = 4096;
+       txmsg_apply = 4096;
+       err = test_send(&opt, cgrp);
+       if (err)
+               goto out;
+
+       txmsg_pass = 0;
+       txmsg_redir = 1;
+       txmsg_apply = 1;
+       txmsg_cork = 0;
+       err = test_send(&opt, cgrp);
+       if (err)
+               goto out;
+
+       txmsg_pass = 0;
+       txmsg_redir = 1;
+       txmsg_apply = 0;
+       txmsg_cork = 1;
+       err = test_send(&opt, cgrp);
+       if (err)
+               goto out;
+
+       txmsg_pass = 0;
+       txmsg_redir = 1;
+       txmsg_apply = 1024;
+       txmsg_cork = 0;
+       err = test_send(&opt, cgrp);
+       if (err)
+               goto out;
+
+       txmsg_pass = 0;
+       txmsg_redir = 1;
+       txmsg_apply = 0;
+       txmsg_cork = 1024;
+       err = test_send(&opt, cgrp);
+       if (err)
+               goto out;
+
+       txmsg_pass = 0;
+       txmsg_redir = 1;
+       txmsg_apply = 1024;
+       txmsg_cork = 1024;
+       err = test_send(&opt, cgrp);
+       if (err)
+               goto out;
+
+       txmsg_pass = 0;
+       txmsg_redir = 1;
+       txmsg_cork = 4096;
+       txmsg_apply = 4096;
+       err = test_send(&opt, cgrp);
+       if (err)
+               goto out;
+out:
+       return err;
+}
+
+static int test_start_end(int cgrp)
+{
+       struct sockmap_options opt = {0};
+       int err, i;
+
+       /* Test basic start/end with lots of iov_count and iov_lengths */
+       txmsg_start = 1;
+       txmsg_end = 2;
+       err = test_txmsg(cgrp);
+       if (err)
+               goto out;
+
+       /* Test start/end with cork */
+       opt.rate = 16;
+       opt.iov_count = 1;
+       opt.iov_length = 100;
+       txmsg_cork = 1600;
+
+       for (i = 99; i <= 1600; i += 500) {
+               txmsg_start = 0;
+               txmsg_end = i;
+               err = test_exec(cgrp, &opt);
+               if (err)
+                       goto out;
+       }
+
+       /* Test start/end with cork but pull data in middle */
+       for (i = 199; i <= 1600; i += 500) {
+               txmsg_start = 100;
+               txmsg_end = i;
+               err = test_exec(cgrp, &opt);
+               if (err)
+                       goto out;
+       }
+
+       /* Test start/end with cork pulling last sg entry */
+       txmsg_start = 1500;
+       txmsg_end = 1600;
+       err = test_exec(cgrp, &opt);
+       if (err)
+               goto out;
+
+       /* Test start/end pull of single byte in last page */
+       txmsg_start = 1111;
+       txmsg_end = 1112;
+       err = test_exec(cgrp, &opt);
+       if (err)
+               goto out;
+
+       /* Test start/end with end < start */
+       txmsg_start = 1111;
+       txmsg_end = 0;
+       err = test_exec(cgrp, &opt);
+       if (err)
+               goto out;
+
+       /* Test start/end with end > data */
+       txmsg_start = 0;
+       txmsg_end = 1601;
+       err = test_exec(cgrp, &opt);
+       if (err)
+               goto out;
+
+       /* Test start/end with start > data */
+       txmsg_start = 1601;
+       txmsg_end = 1600;
+       err = test_exec(cgrp, &opt);
+
+out:
+       txmsg_start = 0;
+       txmsg_end = 0;
+       sched_yield();
+       return err;
+}
+
+char *map_names[] = {
+       "sock_map",
+       "sock_map_txmsg",
+       "sock_map_redir",
+       "sock_apply_bytes",
+       "sock_cork_bytes",
+       "sock_pull_bytes",
+       "sock_redir_flags",
+       "sock_skb_opts",
+};
+
+int prog_attach_type[] = {
+       BPF_SK_SKB_STREAM_PARSER,
+       BPF_SK_SKB_STREAM_VERDICT,
+       BPF_CGROUP_SOCK_OPS,
+       BPF_SK_MSG_VERDICT,
+       BPF_SK_MSG_VERDICT,
+       BPF_SK_MSG_VERDICT,
+       BPF_SK_MSG_VERDICT,
+       BPF_SK_MSG_VERDICT,
+       BPF_SK_MSG_VERDICT,
+       BPF_SK_MSG_VERDICT,
+};
+
+int prog_type[] = {
+       BPF_PROG_TYPE_SK_SKB,
+       BPF_PROG_TYPE_SK_SKB,
+       BPF_PROG_TYPE_SOCK_OPS,
+       BPF_PROG_TYPE_SK_MSG,
+       BPF_PROG_TYPE_SK_MSG,
+       BPF_PROG_TYPE_SK_MSG,
+       BPF_PROG_TYPE_SK_MSG,
+       BPF_PROG_TYPE_SK_MSG,
+       BPF_PROG_TYPE_SK_MSG,
+       BPF_PROG_TYPE_SK_MSG,
+};
+
+static int populate_progs(char *bpf_file)
+{
+       struct bpf_program *prog;
+       struct bpf_object *obj;
+       int i = 0;
+       long err;
+
+       obj = bpf_object__open(bpf_file);
+       err = libbpf_get_error(obj);
+       if (err) {
+               char err_buf[256];
+
+               libbpf_strerror(err, err_buf, sizeof(err_buf));
+               printf("Unable to load eBPF objects in file '%s' : %s\n",
+                      bpf_file, err_buf);
+               return -1;
+       }
+
+       bpf_object__for_each_program(prog, obj) {
+               bpf_program__set_type(prog, prog_type[i]);
+               bpf_program__set_expected_attach_type(prog,
+                                                     prog_attach_type[i]);
+               i++;
+       }
+
+       i = bpf_object__load(obj);
+       i = 0;
+       bpf_object__for_each_program(prog, obj) {
+               prog_fd[i] = bpf_program__fd(prog);
+               i++;
+       }
+
+       for (i = 0; i < sizeof(map_fd)/sizeof(int); i++) {
+               maps[i] = bpf_object__find_map_by_name(obj, map_names[i]);
+               map_fd[i] = bpf_map__fd(maps[i]);
+               if (map_fd[i] < 0) {
+                       fprintf(stderr, "load_bpf_file: (%i) %s\n",
+                               map_fd[i], strerror(errno));
+                       return -1;
+               }
+       }
+
+       return 0;
+}
+
+static int __test_suite(char *bpf_file)
+{
+       int cg_fd, err;
+
+       err = populate_progs(bpf_file);
+       if (err < 0) {
+               fprintf(stderr, "ERROR: (%i) load bpf failed\n", err);
+               return err;
+       }
+
+       if (setup_cgroup_environment()) {
+               fprintf(stderr, "ERROR: cgroup env failed\n");
+               return -EINVAL;
+       }
+
+       cg_fd = create_and_get_cgroup(CG_PATH);
+       if (cg_fd < 0) {
+               fprintf(stderr,
+                       "ERROR: (%i) open cg path failed: %s\n",
+                       cg_fd, optarg);
+               return cg_fd;
+       }
+
+       /* Tests basic commands and APIs with range of iov values */
+       txmsg_start = txmsg_end = 0;
+       err = test_txmsg(cg_fd);
+       if (err)
+               goto out;
+
+       /* Tests interesting combinations of APIs used together */
+       err = test_mixed(cg_fd);
+       if (err)
+               goto out;
+
+       /* Tests pull_data API using start/end API */
+       err = test_start_end(cg_fd);
+       if (err)
+               goto out;
+
+out:
+       printf("Summary: %i PASSED %i FAILED\n", passed, failed);
+       cleanup_cgroup_environment();
+       close(cg_fd);
+       return err;
+}
+
+static int test_suite(void)
+{
+       int err;
+
+       err = __test_suite(BPF_SOCKMAP_FILENAME);
+       if (err)
+               goto out;
+       err = __test_suite(BPF_SOCKHASH_FILENAME);
+out:
+       return err;
+}
+
+int main(int argc, char **argv)
+{
+       struct rlimit r = {10 * 1024 * 1024, RLIM_INFINITY};
+       int iov_count = 1, length = 1024, rate = 1;
+       struct sockmap_options options = {0};
+       int opt, longindex, err, cg_fd = 0;
+       char *bpf_file = BPF_SOCKMAP_FILENAME;
+       int test = PING_PONG;
+
+       if (setrlimit(RLIMIT_MEMLOCK, &r)) {
+               perror("setrlimit(RLIMIT_MEMLOCK)");
+               return 1;
+       }
+
+       if (argc < 2)
+               return test_suite();
+
+       while ((opt = getopt_long(argc, argv, ":dhvc:r:i:l:t:",
+                                 long_options, &longindex)) != -1) {
+               switch (opt) {
+               case 's':
+                       txmsg_start = atoi(optarg);
+                       break;
+               case 'e':
+                       txmsg_end = atoi(optarg);
+                       break;
+               case 'a':
+                       txmsg_apply = atoi(optarg);
+                       break;
+               case 'k':
+                       txmsg_cork = atoi(optarg);
+                       break;
+               case 'c':
+                       cg_fd = open(optarg, O_DIRECTORY, O_RDONLY);
+                       if (cg_fd < 0) {
+                               fprintf(stderr,
+                                       "ERROR: (%i) open cg path failed: %s\n",
+                                       cg_fd, optarg);
+                               return cg_fd;
+                       }
+                       break;
+               case 'r':
+                       rate = atoi(optarg);
+                       break;
+               case 'v':
+                       options.verbose = 1;
+                       break;
+               case 'i':
+                       iov_count = atoi(optarg);
+                       break;
+               case 'l':
+                       length = atoi(optarg);
+                       break;
+               case 'd':
+                       options.data_test = true;
+                       break;
+               case 't':
+                       if (strcmp(optarg, "ping") == 0) {
+                               test = PING_PONG;
+                       } else if (strcmp(optarg, "sendmsg") == 0) {
+                               test = SENDMSG;
+                       } else if (strcmp(optarg, "base") == 0) {
+                               test = BASE;
+                       } else if (strcmp(optarg, "base_sendpage") == 0) {
+                               test = BASE_SENDPAGE;
+                       } else if (strcmp(optarg, "sendpage") == 0) {
+                               test = SENDPAGE;
+                       } else {
+                               usage(argv);
+                               return -1;
+                       }
+                       break;
+               case 0:
+                       break;
+               case 'h':
+               default:
+                       usage(argv);
+                       return -1;
+               }
+       }
+
+       if (!cg_fd) {
+               fprintf(stderr, "%s requires cgroup option: --cgroup <path>\n",
+                       argv[0]);
+               return -1;
+       }
+
+       err = populate_progs(bpf_file);
+       if (err) {
+               fprintf(stderr, "populate program: (%s) %s\n",
+                       bpf_file, strerror(errno));
+               return 1;
+       }
+       running = 1;
+
+       /* catch SIGINT */
+       signal(SIGINT, running_handler);
+
+       options.iov_count = iov_count;
+       options.iov_length = length;
+       options.rate = rate;
+
+       err = run_options(&options, cg_fd, test);
        close(cg_fd);
        return err;
 }
diff --git a/tools/testing/selftests/bpf/test_sockmap_kern.c b/tools/testing/selftests/bpf/test_sockmap_kern.c
new file mode 100644 (file)
index 0000000..677b2ed
--- /dev/null
@@ -0,0 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
+#define SOCKMAP
+#define TEST_MAP_TYPE BPF_MAP_TYPE_SOCKMAP
+#include "./test_sockmap_kern.h"
similarity index 88%
rename from samples/sockmap/sockmap_kern.c
rename to tools/testing/selftests/bpf/test_sockmap_kern.h
index 9ff8bc5dc20619220621bfec182ef766af6c3c1b..8e8e41780bb9f8a9eb8f5611e1610ab1d10b705a 100644 (file)
@@ -1,20 +1,19 @@
-/* Copyright (c) 2017 Covalent IO, Inc. http://covalent.io
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- */
-#include <uapi/linux/bpf.h>
-#include <uapi/linux/if_ether.h>
-#include <uapi/linux/if_packet.h>
-#include <uapi/linux/ip.h>
-#include "../../tools/testing/selftests/bpf/bpf_helpers.h"
-#include "../../tools/testing/selftests/bpf/bpf_endian.h"
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2017-2018 Covalent IO, Inc. http://covalent.io */
+#include <stddef.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/in.h>
+#include <linux/udp.h>
+#include <linux/tcp.h>
+#include <linux/pkt_cls.h>
+#include <sys/socket.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
 
 /* Sockmap sample program connects a client and a backend together
  * using cgroups.
 })
 
 struct bpf_map_def SEC("maps") sock_map = {
-       .type = BPF_MAP_TYPE_SOCKMAP,
+       .type = TEST_MAP_TYPE,
        .key_size = sizeof(int),
        .value_size = sizeof(int),
        .max_entries = 20,
 };
 
 struct bpf_map_def SEC("maps") sock_map_txmsg = {
-       .type = BPF_MAP_TYPE_SOCKMAP,
+       .type = TEST_MAP_TYPE,
        .key_size = sizeof(int),
        .value_size = sizeof(int),
        .max_entries = 20,
 };
 
 struct bpf_map_def SEC("maps") sock_map_redir = {
-       .type = BPF_MAP_TYPE_SOCKMAP,
+       .type = TEST_MAP_TYPE,
        .key_size = sizeof(int),
        .value_size = sizeof(int),
        .max_entries = 20,
@@ -120,7 +119,12 @@ int bpf_prog2(struct __sk_buff *skb)
 
        bpf_printk("sk_skb2: redirect(%iB) flags=%i\n",
                   len, flags);
+#ifdef SOCKMAP
        return bpf_sk_redirect_map(skb, &sock_map, ret, flags);
+#else
+       return bpf_sk_redirect_hash(skb, &sock_map, &ret, flags);
+#endif
+
 }
 
 SEC("sockops")
@@ -139,8 +143,13 @@ int bpf_sockmap(struct bpf_sock_ops *skops)
 
                if (lport == 10000) {
                        ret = 1;
+#ifdef SOCKMAP
                        err = bpf_sock_map_update(skops, &sock_map, &ret,
                                                  BPF_NOEXIST);
+#else
+                       err = bpf_sock_hash_update(skops, &sock_map, &ret,
+                                                  BPF_NOEXIST);
+#endif
                        bpf_printk("passive(%i -> %i) map ctx update err: %d\n",
                                   lport, bpf_ntohl(rport), err);
                }
@@ -151,8 +160,13 @@ int bpf_sockmap(struct bpf_sock_ops *skops)
 
                if (bpf_ntohl(rport) == 10001) {
                        ret = 10;
+#ifdef SOCKMAP
                        err = bpf_sock_map_update(skops, &sock_map, &ret,
                                                  BPF_NOEXIST);
+#else
+                       err = bpf_sock_hash_update(skops, &sock_map, &ret,
+                                                  BPF_NOEXIST);
+#endif
                        bpf_printk("active(%i -> %i) map ctx update err: %d\n",
                                   lport, bpf_ntohl(rport), err);
                }
@@ -238,7 +252,11 @@ int bpf_prog6(struct sk_msg_md *msg)
                key = 2;
                flags = *f;
        }
+#ifdef SOCKMAP
        return bpf_msg_redirect_map(msg, &sock_map_redir, key, flags);
+#else
+       return bpf_msg_redirect_hash(msg, &sock_map_redir, &key, flags);
+#endif
 }
 
 SEC("sk_msg4")
@@ -277,7 +295,11 @@ int bpf_prog7(struct sk_msg_md *msg)
        }
        bpf_printk("sk_msg3: redirect(%iB) flags=%i err=%i\n",
                   len1, flags, err1 ? err1 : err2);
+#ifdef SOCKMAP
        err = bpf_msg_redirect_map(msg, &sock_map_redir, key, flags);
+#else
+       err = bpf_msg_redirect_hash(msg, &sock_map_redir, &key, flags);
+#endif
        bpf_printk("sk_msg3: err %i\n", err);
        return err;
 }
@@ -337,5 +359,5 @@ int bpf_prog10(struct sk_msg_md *msg)
        return SK_DROP;
 }
 
-
+int _version SEC("version") = 1;
 char _license[] SEC("license") = "GPL";
index b755bd783ce5acca3c86e6f6264d728c02a5c3cf..d86c281e957fd9c3cf2baaecb3844f057b3fb05f 100644 (file)
@@ -19,7 +19,7 @@ struct bpf_map_def SEC("maps") stackid_hmap = {
        .type = BPF_MAP_TYPE_HASH,
        .key_size = sizeof(__u32),
        .value_size = sizeof(__u32),
-       .max_entries = 10000,
+       .max_entries = 16384,
 };
 
 struct bpf_map_def SEC("maps") stackmap = {
@@ -31,6 +31,14 @@ struct bpf_map_def SEC("maps") stackmap = {
        .map_flags = BPF_F_STACK_BUILD_ID,
 };
 
+struct bpf_map_def SEC("maps") stack_amap = {
+       .type = BPF_MAP_TYPE_ARRAY,
+       .key_size = sizeof(__u32),
+       .value_size = sizeof(struct bpf_stack_build_id)
+               * PERF_MAX_STACK_DEPTH,
+       .max_entries = 128,
+};
+
 /* taken from /sys/kernel/debug/tracing/events/random/urandom_read/format */
 struct random_urandom_args {
        unsigned long long pad;
@@ -42,7 +50,10 @@ struct random_urandom_args {
 SEC("tracepoint/random/urandom_read")
 int oncpu(struct random_urandom_args *args)
 {
+       __u32 max_len = sizeof(struct bpf_stack_build_id)
+                       * PERF_MAX_STACK_DEPTH;
        __u32 key = 0, val = 0, *value_p;
+       void *stack_p;
 
        value_p = bpf_map_lookup_elem(&control_map, &key);
        if (value_p && *value_p)
@@ -50,8 +61,13 @@ int oncpu(struct random_urandom_args *args)
 
        /* The size of stackmap and stackid_hmap should be the same */
        key = bpf_get_stackid(args, &stackmap, BPF_F_USER_STACK);
-       if ((int)key >= 0)
+       if ((int)key >= 0) {
                bpf_map_update_elem(&stackid_hmap, &key, &val, 0);
+               stack_p = bpf_map_lookup_elem(&stack_amap, &key);
+               if (stack_p)
+                       bpf_get_stack(args, stack_p, max_len,
+                                     BPF_F_USER_STACK | BPF_F_USER_BUILD_ID);
+       }
 
        return 0;
 }
index 76d85c5d08bdbc95af3f9e60c9a89f98e836f41e..af111af7ca1a737ddf4db6b83b0f1370930b5992 100644 (file)
@@ -19,14 +19,21 @@ struct bpf_map_def SEC("maps") stackid_hmap = {
        .type = BPF_MAP_TYPE_HASH,
        .key_size = sizeof(__u32),
        .value_size = sizeof(__u32),
-       .max_entries = 10000,
+       .max_entries = 16384,
 };
 
 struct bpf_map_def SEC("maps") stackmap = {
        .type = BPF_MAP_TYPE_STACK_TRACE,
        .key_size = sizeof(__u32),
        .value_size = sizeof(__u64) * PERF_MAX_STACK_DEPTH,
-       .max_entries = 10000,
+       .max_entries = 16384,
+};
+
+struct bpf_map_def SEC("maps") stack_amap = {
+       .type = BPF_MAP_TYPE_ARRAY,
+       .key_size = sizeof(__u32),
+       .value_size = sizeof(__u64) * PERF_MAX_STACK_DEPTH,
+       .max_entries = 16384,
 };
 
 /* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */
@@ -44,7 +51,9 @@ struct sched_switch_args {
 SEC("tracepoint/sched/sched_switch")
 int oncpu(struct sched_switch_args *ctx)
 {
+       __u32 max_len = PERF_MAX_STACK_DEPTH * sizeof(__u64);
        __u32 key = 0, val = 0, *value_p;
+       void *stack_p;
 
        value_p = bpf_map_lookup_elem(&control_map, &key);
        if (value_p && *value_p)
@@ -52,8 +61,12 @@ int oncpu(struct sched_switch_args *ctx)
 
        /* The size of stackmap and stackid_hmap should be the same */
        key = bpf_get_stackid(ctx, &stackmap, 0);
-       if ((int)key >= 0)
+       if ((int)key >= 0) {
                bpf_map_update_elem(&stackid_hmap, &key, &val, 0);
+               stack_p = bpf_map_lookup_elem(&stack_amap, &key);
+               if (stack_p)
+                       bpf_get_stack(ctx, stack_p, max_len, 0);
+       }
 
        return 0;
 }
diff --git a/tools/testing/selftests/bpf/test_tunnel.sh b/tools/testing/selftests/bpf/test_tunnel.sh
new file mode 100755 (executable)
index 0000000..aeb2901
--- /dev/null
@@ -0,0 +1,729 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# End-to-end eBPF tunnel test suite
+#   The script tests BPF network tunnel implementation.
+#
+# Topology:
+# ---------
+#     root namespace   |     at_ns0 namespace
+#                      |
+#      -----------     |     -----------
+#      | tnl dev |     |     | tnl dev |  (overlay network)
+#      -----------     |     -----------
+#      metadata-mode   |     native-mode
+#       with bpf       |
+#                      |
+#      ----------      |     ----------
+#      |  veth1  | --------- |  veth0  |  (underlay network)
+#      ----------    peer    ----------
+#
+#
+# Device Configuration
+# --------------------
+# Root namespace with metadata-mode tunnel + BPF
+# Device names and addresses:
+#      veth1 IP: 172.16.1.200, IPv6: 00::22 (underlay)
+#      tunnel dev <type>11, ex: gre11, IPv4: 10.1.1.200 (overlay)
+#
+# Namespace at_ns0 with native tunnel
+# Device names and addresses:
+#      veth0 IPv4: 172.16.1.100, IPv6: 00::11 (underlay)
+#      tunnel dev <type>00, ex: gre00, IPv4: 10.1.1.100 (overlay)
+#
+#
+# End-to-end ping packet flow
+# ---------------------------
+# Most of the tests start by namespace creation, device configuration,
+# then ping the underlay and overlay network.  When doing 'ping 10.1.1.100'
+# from root namespace, the following operations happen:
+# 1) Route lookup shows 10.1.1.100/24 belongs to tnl dev, fwd to tnl dev.
+# 2) Tnl device's egress BPF program is triggered and set the tunnel metadata,
+#    with remote_ip=172.16.1.200 and others.
+# 3) Outer tunnel header is prepended and route the packet to veth1's egress
+# 4) veth0's ingress queue receive the tunneled packet at namespace at_ns0
+# 5) Tunnel protocol handler, ex: vxlan_rcv, decap the packet
+# 6) Forward the packet to the overlay tnl dev
+
+PING_ARG="-c 3 -w 10 -q"
+ret=0
+GREEN='\033[0;92m'
+RED='\033[0;31m'
+NC='\033[0m' # No Color
+
+config_device()
+{
+       ip netns add at_ns0
+       ip link add veth0 type veth peer name veth1
+       ip link set veth0 netns at_ns0
+       ip netns exec at_ns0 ip addr add 172.16.1.100/24 dev veth0
+       ip netns exec at_ns0 ip link set dev veth0 up
+       ip link set dev veth1 up mtu 1500
+       ip addr add dev veth1 172.16.1.200/24
+}
+
+add_gre_tunnel()
+{
+       # at_ns0 namespace
+       ip netns exec at_ns0 \
+        ip link add dev $DEV_NS type $TYPE seq key 2 \
+               local 172.16.1.100 remote 172.16.1.200
+       ip netns exec at_ns0 ip link set dev $DEV_NS up
+       ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+
+       # root namespace
+       ip link add dev $DEV type $TYPE key 2 external
+       ip link set dev $DEV up
+       ip addr add dev $DEV 10.1.1.200/24
+}
+
+add_ip6gretap_tunnel()
+{
+
+       # assign ipv6 address
+       ip netns exec at_ns0 ip addr add ::11/96 dev veth0
+       ip netns exec at_ns0 ip link set dev veth0 up
+       ip addr add dev veth1 ::22/96
+       ip link set dev veth1 up
+
+       # at_ns0 namespace
+       ip netns exec at_ns0 \
+               ip link add dev $DEV_NS type $TYPE seq flowlabel 0xbcdef key 2 \
+               local ::11 remote ::22
+
+       ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+       ip netns exec at_ns0 ip addr add dev $DEV_NS fc80::100/96
+       ip netns exec at_ns0 ip link set dev $DEV_NS up
+
+       # root namespace
+       ip link add dev $DEV type $TYPE external
+       ip addr add dev $DEV 10.1.1.200/24
+       ip addr add dev $DEV fc80::200/24
+       ip link set dev $DEV up
+}
+
+add_erspan_tunnel()
+{
+       # at_ns0 namespace
+       if [ "$1" == "v1" ]; then
+               ip netns exec at_ns0 \
+               ip link add dev $DEV_NS type $TYPE seq key 2 \
+               local 172.16.1.100 remote 172.16.1.200 \
+               erspan_ver 1 erspan 123
+       else
+               ip netns exec at_ns0 \
+               ip link add dev $DEV_NS type $TYPE seq key 2 \
+               local 172.16.1.100 remote 172.16.1.200 \
+               erspan_ver 2 erspan_dir egress erspan_hwid 3
+       fi
+       ip netns exec at_ns0 ip link set dev $DEV_NS up
+       ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+
+       # root namespace
+       ip link add dev $DEV type $TYPE external
+       ip link set dev $DEV up
+       ip addr add dev $DEV 10.1.1.200/24
+}
+
+add_ip6erspan_tunnel()
+{
+
+       # assign ipv6 address
+       ip netns exec at_ns0 ip addr add ::11/96 dev veth0
+       ip netns exec at_ns0 ip link set dev veth0 up
+       ip addr add dev veth1 ::22/96
+       ip link set dev veth1 up
+
+       # at_ns0 namespace
+       if [ "$1" == "v1" ]; then
+               ip netns exec at_ns0 \
+               ip link add dev $DEV_NS type $TYPE seq key 2 \
+               local ::11 remote ::22 \
+               erspan_ver 1 erspan 123
+       else
+               ip netns exec at_ns0 \
+               ip link add dev $DEV_NS type $TYPE seq key 2 \
+               local ::11 remote ::22 \
+               erspan_ver 2 erspan_dir egress erspan_hwid 7
+       fi
+       ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+       ip netns exec at_ns0 ip link set dev $DEV_NS up
+
+       # root namespace
+       ip link add dev $DEV type $TYPE external
+       ip addr add dev $DEV 10.1.1.200/24
+       ip link set dev $DEV up
+}
+
+add_vxlan_tunnel()
+{
+       # Set static ARP entry here because iptables set-mark works
+       # on L3 packet, as a result not applying to ARP packets,
+       # causing errors at get_tunnel_{key/opt}.
+
+       # at_ns0 namespace
+       ip netns exec at_ns0 \
+               ip link add dev $DEV_NS type $TYPE \
+               id 2 dstport 4789 gbp remote 172.16.1.200
+       ip netns exec at_ns0 \
+               ip link set dev $DEV_NS address 52:54:00:d9:01:00 up
+       ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+       ip netns exec at_ns0 arp -s 10.1.1.200 52:54:00:d9:02:00
+       ip netns exec at_ns0 iptables -A OUTPUT -j MARK --set-mark 0x800FF
+
+       # root namespace
+       ip link add dev $DEV type $TYPE external gbp dstport 4789
+       ip link set dev $DEV address 52:54:00:d9:02:00 up
+       ip addr add dev $DEV 10.1.1.200/24
+       arp -s 10.1.1.100 52:54:00:d9:01:00
+}
+
+add_ip6vxlan_tunnel()
+{
+       #ip netns exec at_ns0 ip -4 addr del 172.16.1.100 dev veth0
+       ip netns exec at_ns0 ip -6 addr add ::11/96 dev veth0
+       ip netns exec at_ns0 ip link set dev veth0 up
+       #ip -4 addr del 172.16.1.200 dev veth1
+       ip -6 addr add dev veth1 ::22/96
+       ip link set dev veth1 up
+
+       # at_ns0 namespace
+       ip netns exec at_ns0 \
+               ip link add dev $DEV_NS type $TYPE id 22 dstport 4789 \
+               local ::11 remote ::22
+       ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+       ip netns exec at_ns0 ip link set dev $DEV_NS up
+
+       # root namespace
+       ip link add dev $DEV type $TYPE external dstport 4789
+       ip addr add dev $DEV 10.1.1.200/24
+       ip link set dev $DEV up
+}
+
+add_geneve_tunnel()
+{
+       # at_ns0 namespace
+       ip netns exec at_ns0 \
+               ip link add dev $DEV_NS type $TYPE \
+               id 2 dstport 6081 remote 172.16.1.200
+       ip netns exec at_ns0 ip link set dev $DEV_NS up
+       ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+
+       # root namespace
+       ip link add dev $DEV type $TYPE dstport 6081 external
+       ip link set dev $DEV up
+       ip addr add dev $DEV 10.1.1.200/24
+}
+
+add_ip6geneve_tunnel()
+{
+       ip netns exec at_ns0 ip addr add ::11/96 dev veth0
+       ip netns exec at_ns0 ip link set dev veth0 up
+       ip addr add dev veth1 ::22/96
+       ip link set dev veth1 up
+
+       # at_ns0 namespace
+       ip netns exec at_ns0 \
+               ip link add dev $DEV_NS type $TYPE id 22 \
+               remote ::22     # geneve has no local option
+       ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+       ip netns exec at_ns0 ip link set dev $DEV_NS up
+
+       # root namespace
+       ip link add dev $DEV type $TYPE external
+       ip addr add dev $DEV 10.1.1.200/24
+       ip link set dev $DEV up
+}
+
+add_ipip_tunnel()
+{
+       # at_ns0 namespace
+       ip netns exec at_ns0 \
+               ip link add dev $DEV_NS type $TYPE \
+               local 172.16.1.100 remote 172.16.1.200
+       ip netns exec at_ns0 ip link set dev $DEV_NS up
+       ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+
+       # root namespace
+       ip link add dev $DEV type $TYPE external
+       ip link set dev $DEV up
+       ip addr add dev $DEV 10.1.1.200/24
+}
+
+add_ipip6tnl_tunnel()
+{
+       ip netns exec at_ns0 ip addr add ::11/96 dev veth0
+       ip netns exec at_ns0 ip link set dev veth0 up
+       ip addr add dev veth1 ::22/96
+       ip link set dev veth1 up
+
+       # at_ns0 namespace
+       ip netns exec at_ns0 \
+               ip link add dev $DEV_NS type $TYPE \
+               local ::11 remote ::22
+       ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+       ip netns exec at_ns0 ip link set dev $DEV_NS up
+
+       # root namespace
+       ip link add dev $DEV type $TYPE external
+       ip addr add dev $DEV 10.1.1.200/24
+       ip link set dev $DEV up
+}
+
+test_gre()
+{
+       TYPE=gretap
+       DEV_NS=gretap00
+       DEV=gretap11
+       ret=0
+
+       check $TYPE
+       config_device
+       add_gre_tunnel
+       attach_bpf $DEV gre_set_tunnel gre_get_tunnel
+       ping $PING_ARG 10.1.1.100
+       check_err $?
+       ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+       check_err $?
+       cleanup
+
+        if [ $ret -ne 0 ]; then
+                echo -e ${RED}"FAIL: $TYPE"${NC}
+                return 1
+        fi
+        echo -e ${GREEN}"PASS: $TYPE"${NC}
+}
+
+test_ip6gre()
+{
+       TYPE=ip6gre
+       DEV_NS=ip6gre00
+       DEV=ip6gre11
+       ret=0
+
+       check $TYPE
+       config_device
+       # reuse the ip6gretap function
+       add_ip6gretap_tunnel
+       attach_bpf $DEV ip6gretap_set_tunnel ip6gretap_get_tunnel
+       # underlay
+       ping6 $PING_ARG ::11
+       # overlay: ipv4 over ipv6
+       ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+       ping $PING_ARG 10.1.1.100
+       check_err $?
+       # overlay: ipv6 over ipv6
+       ip netns exec at_ns0 ping6 $PING_ARG fc80::200
+       check_err $?
+       cleanup
+
+        if [ $ret -ne 0 ]; then
+                echo -e ${RED}"FAIL: $TYPE"${NC}
+                return 1
+        fi
+        echo -e ${GREEN}"PASS: $TYPE"${NC}
+}
+
+test_ip6gretap()
+{
+       TYPE=ip6gretap
+       DEV_NS=ip6gretap00
+       DEV=ip6gretap11
+       ret=0
+
+       check $TYPE
+       config_device
+       add_ip6gretap_tunnel
+       attach_bpf $DEV ip6gretap_set_tunnel ip6gretap_get_tunnel
+       # underlay
+       ping6 $PING_ARG ::11
+       # overlay: ipv4 over ipv6
+       ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+       ping $PING_ARG 10.1.1.100
+       check_err $?
+       # overlay: ipv6 over ipv6
+       ip netns exec at_ns0 ping6 $PING_ARG fc80::200
+       check_err $?
+       cleanup
+
+       if [ $ret -ne 0 ]; then
+                echo -e ${RED}"FAIL: $TYPE"${NC}
+                return 1
+        fi
+        echo -e ${GREEN}"PASS: $TYPE"${NC}
+}
+
+test_erspan()
+{
+       TYPE=erspan
+       DEV_NS=erspan00
+       DEV=erspan11
+       ret=0
+
+       check $TYPE
+       config_device
+       add_erspan_tunnel $1
+       attach_bpf $DEV erspan_set_tunnel erspan_get_tunnel
+       ping $PING_ARG 10.1.1.100
+       check_err $?
+       ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+       check_err $?
+       cleanup
+
+       if [ $ret -ne 0 ]; then
+                echo -e ${RED}"FAIL: $TYPE"${NC}
+                return 1
+        fi
+        echo -e ${GREEN}"PASS: $TYPE"${NC}
+}
+
+test_ip6erspan()
+{
+       TYPE=ip6erspan
+       DEV_NS=ip6erspan00
+       DEV=ip6erspan11
+       ret=0
+
+       check $TYPE
+       config_device
+       add_ip6erspan_tunnel $1
+       attach_bpf $DEV ip4ip6erspan_set_tunnel ip4ip6erspan_get_tunnel
+       ping6 $PING_ARG ::11
+       ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+       check_err $?
+       cleanup
+
+       if [ $ret -ne 0 ]; then
+                echo -e ${RED}"FAIL: $TYPE"${NC}
+                return 1
+        fi
+        echo -e ${GREEN}"PASS: $TYPE"${NC}
+}
+
+test_vxlan()
+{
+       TYPE=vxlan
+       DEV_NS=vxlan00
+       DEV=vxlan11
+       ret=0
+
+       check $TYPE
+       config_device
+       add_vxlan_tunnel
+       attach_bpf $DEV vxlan_set_tunnel vxlan_get_tunnel
+       ping $PING_ARG 10.1.1.100
+       check_err $?
+       ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+       check_err $?
+       cleanup
+
+       if [ $ret -ne 0 ]; then
+                echo -e ${RED}"FAIL: $TYPE"${NC}
+                return 1
+        fi
+        echo -e ${GREEN}"PASS: $TYPE"${NC}
+}
+
+test_ip6vxlan()
+{
+       TYPE=vxlan
+       DEV_NS=ip6vxlan00
+       DEV=ip6vxlan11
+       ret=0
+
+       check $TYPE
+       config_device
+       add_ip6vxlan_tunnel
+       ip link set dev veth1 mtu 1500
+       attach_bpf $DEV ip6vxlan_set_tunnel ip6vxlan_get_tunnel
+       # underlay
+       ping6 $PING_ARG ::11
+       # ip4 over ip6
+       ping $PING_ARG 10.1.1.100
+       check_err $?
+       ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+       check_err $?
+       cleanup
+
+       if [ $ret -ne 0 ]; then
+                echo -e ${RED}"FAIL: ip6$TYPE"${NC}
+                return 1
+        fi
+        echo -e ${GREEN}"PASS: ip6$TYPE"${NC}
+}
+
+test_geneve()
+{
+       TYPE=geneve
+       DEV_NS=geneve00
+       DEV=geneve11
+       ret=0
+
+       check $TYPE
+       config_device
+       add_geneve_tunnel
+       attach_bpf $DEV geneve_set_tunnel geneve_get_tunnel
+       ping $PING_ARG 10.1.1.100
+       check_err $?
+       ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+       check_err $?
+       cleanup
+
+       if [ $ret -ne 0 ]; then
+                echo -e ${RED}"FAIL: $TYPE"${NC}
+                return 1
+        fi
+        echo -e ${GREEN}"PASS: $TYPE"${NC}
+}
+
+test_ip6geneve()
+{
+       TYPE=geneve
+       DEV_NS=ip6geneve00
+       DEV=ip6geneve11
+       ret=0
+
+       check $TYPE
+       config_device
+       add_ip6geneve_tunnel
+       attach_bpf $DEV ip6geneve_set_tunnel ip6geneve_get_tunnel
+       ping $PING_ARG 10.1.1.100
+       check_err $?
+       ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+       check_err $?
+       cleanup
+
+       if [ $ret -ne 0 ]; then
+                echo -e ${RED}"FAIL: ip6$TYPE"${NC}
+                return 1
+        fi
+        echo -e ${GREEN}"PASS: ip6$TYPE"${NC}
+}
+
+test_ipip()
+{
+       TYPE=ipip
+       DEV_NS=ipip00
+       DEV=ipip11
+       ret=0
+
+       check $TYPE
+       config_device
+       add_ipip_tunnel
+       ip link set dev veth1 mtu 1500
+       attach_bpf $DEV ipip_set_tunnel ipip_get_tunnel
+       ping $PING_ARG 10.1.1.100
+       check_err $?
+       ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+       check_err $?
+       cleanup
+
+       if [ $ret -ne 0 ]; then
+                echo -e ${RED}"FAIL: $TYPE"${NC}
+                return 1
+        fi
+        echo -e ${GREEN}"PASS: $TYPE"${NC}
+}
+
+test_ipip6()
+{
+       TYPE=ip6tnl
+       DEV_NS=ipip6tnl00
+       DEV=ipip6tnl11
+       ret=0
+
+       check $TYPE
+       config_device
+       add_ipip6tnl_tunnel
+       ip link set dev veth1 mtu 1500
+       attach_bpf $DEV ipip6_set_tunnel ipip6_get_tunnel
+       # underlay
+       ping6 $PING_ARG ::11
+       # ip4 over ip6
+       ping $PING_ARG 10.1.1.100
+       check_err $?
+       ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+       check_err $?
+       cleanup
+
+       if [ $ret -ne 0 ]; then
+                echo -e ${RED}"FAIL: $TYPE"${NC}
+                return 1
+        fi
+        echo -e ${GREEN}"PASS: $TYPE"${NC}
+}
+
+setup_xfrm_tunnel()
+{
+       auth=0x$(printf '1%.0s' {1..40})
+       enc=0x$(printf '2%.0s' {1..32})
+       spi_in_to_out=0x1
+       spi_out_to_in=0x2
+       # at_ns0 namespace
+       # at_ns0 -> root
+       ip netns exec at_ns0 \
+               ip xfrm state add src 172.16.1.100 dst 172.16.1.200 proto esp \
+                       spi $spi_in_to_out reqid 1 mode tunnel \
+                       auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc
+       ip netns exec at_ns0 \
+               ip xfrm policy add src 10.1.1.100/32 dst 10.1.1.200/32 dir out \
+               tmpl src 172.16.1.100 dst 172.16.1.200 proto esp reqid 1 \
+               mode tunnel
+       # root -> at_ns0
+       ip netns exec at_ns0 \
+               ip xfrm state add src 172.16.1.200 dst 172.16.1.100 proto esp \
+                       spi $spi_out_to_in reqid 2 mode tunnel \
+                       auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc
+       ip netns exec at_ns0 \
+               ip xfrm policy add src 10.1.1.200/32 dst 10.1.1.100/32 dir in \
+               tmpl src 172.16.1.200 dst 172.16.1.100 proto esp reqid 2 \
+               mode tunnel
+       # address & route
+       ip netns exec at_ns0 \
+               ip addr add dev veth0 10.1.1.100/32
+       ip netns exec at_ns0 \
+               ip route add 10.1.1.200 dev veth0 via 172.16.1.200 \
+                       src 10.1.1.100
+
+       # root namespace
+       # at_ns0 -> root
+       ip xfrm state add src 172.16.1.100 dst 172.16.1.200 proto esp \
+               spi $spi_in_to_out reqid 1 mode tunnel \
+               auth-trunc 'hmac(sha1)' $auth 96  enc 'cbc(aes)' $enc
+       ip xfrm policy add src 10.1.1.100/32 dst 10.1.1.200/32 dir in \
+               tmpl src 172.16.1.100 dst 172.16.1.200 proto esp reqid 1 \
+               mode tunnel
+       # root -> at_ns0
+       ip xfrm state add src 172.16.1.200 dst 172.16.1.100 proto esp \
+               spi $spi_out_to_in reqid 2 mode tunnel \
+               auth-trunc 'hmac(sha1)' $auth 96  enc 'cbc(aes)' $enc
+       ip xfrm policy add src 10.1.1.200/32 dst 10.1.1.100/32 dir out \
+               tmpl src 172.16.1.200 dst 172.16.1.100 proto esp reqid 2 \
+               mode tunnel
+       # address & route
+       ip addr add dev veth1 10.1.1.200/32
+       ip route add 10.1.1.100 dev veth1 via 172.16.1.100 src 10.1.1.200
+}
+
+test_xfrm_tunnel()
+{
+       config_device
+        #tcpdump -nei veth1 ip &
+       output=$(mktemp)
+       cat /sys/kernel/debug/tracing/trace_pipe | tee $output &
+        setup_xfrm_tunnel
+       tc qdisc add dev veth1 clsact
+       tc filter add dev veth1 proto ip ingress bpf da obj test_tunnel_kern.o \
+               sec xfrm_get_state
+       ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+       sleep 1
+       grep "reqid 1" $output
+       check_err $?
+       grep "spi 0x1" $output
+       check_err $?
+       grep "remote ip 0xac100164" $output
+       check_err $?
+       cleanup
+
+       if [ $ret -ne 0 ]; then
+                echo -e ${RED}"FAIL: xfrm tunnel"${NC}
+                return 1
+        fi
+        echo -e ${GREEN}"PASS: xfrm tunnel"${NC}
+}
+
+attach_bpf()
+{
+       DEV=$1
+       SET=$2
+       GET=$3
+       tc qdisc add dev $DEV clsact
+       tc filter add dev $DEV egress bpf da obj test_tunnel_kern.o sec $SET
+       tc filter add dev $DEV ingress bpf da obj test_tunnel_kern.o sec $GET
+}
+
+cleanup()
+{
+       ip netns delete at_ns0 2> /dev/null
+       ip link del veth1 2> /dev/null
+       ip link del ipip11 2> /dev/null
+       ip link del ipip6tnl11 2> /dev/null
+       ip link del gretap11 2> /dev/null
+       ip link del ip6gre11 2> /dev/null
+       ip link del ip6gretap11 2> /dev/null
+       ip link del vxlan11 2> /dev/null
+       ip link del ip6vxlan11 2> /dev/null
+       ip link del geneve11 2> /dev/null
+       ip link del ip6geneve11 2> /dev/null
+       ip link del erspan11 2> /dev/null
+       ip link del ip6erspan11 2> /dev/null
+}
+
+cleanup_exit()
+{
+       echo "CATCH SIGKILL or SIGINT, cleanup and exit"
+       cleanup
+       exit 0
+}
+
+check()
+{
+       ip link help $1 2>&1 | grep -q "^Usage:"
+       if [ $? -ne 0 ];then
+               echo "SKIP $1: iproute2 not support"
+       cleanup
+       return 1
+       fi
+}
+
+enable_debug()
+{
+       echo 'file ip_gre.c +p' > /sys/kernel/debug/dynamic_debug/control
+       echo 'file ip6_gre.c +p' > /sys/kernel/debug/dynamic_debug/control
+       echo 'file vxlan.c +p' > /sys/kernel/debug/dynamic_debug/control
+       echo 'file geneve.c +p' > /sys/kernel/debug/dynamic_debug/control
+       echo 'file ipip.c +p' > /sys/kernel/debug/dynamic_debug/control
+}
+
+check_err()
+{
+       if [ $ret -eq 0 ]; then
+               ret=$1
+       fi
+}
+
+bpf_tunnel_test()
+{
+       echo "Testing GRE tunnel..."
+       test_gre
+       echo "Testing IP6GRE tunnel..."
+       test_ip6gre
+       echo "Testing IP6GRETAP tunnel..."
+       test_ip6gretap
+       echo "Testing ERSPAN tunnel..."
+       test_erspan v2
+       echo "Testing IP6ERSPAN tunnel..."
+       test_ip6erspan v2
+       echo "Testing VXLAN tunnel..."
+       test_vxlan
+       echo "Testing IP6VXLAN tunnel..."
+       test_ip6vxlan
+       echo "Testing GENEVE tunnel..."
+       test_geneve
+       echo "Testing IP6GENEVE tunnel..."
+       test_ip6geneve
+       echo "Testing IPIP tunnel..."
+       test_ipip
+       echo "Testing IPIP6 tunnel..."
+       test_ipip6
+       echo "Testing IPSec tunnel..."
+       test_xfrm_tunnel
+}
+
+trap cleanup 0 3 6
+trap cleanup_exit 2 9
+
+cleanup
+bpf_tunnel_test
+
+exit 0
similarity index 68%
rename from samples/bpf/tcbpf2_kern.c
rename to tools/testing/selftests/bpf/test_tunnel_kern.c
index 9a8db7bd6db4899591835ebfb4dffc32b1bb876b..504df69c83df42381e5e1508ce3873bf388d0364 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2016 VMware
  * Copyright (c) 2016 Facebook
  *
@@ -5,39 +6,41 @@
  * modify it under the terms of version 2 of the GNU General Public
  * License as published by the Free Software Foundation.
  */
-#define KBUILD_MODNAME "foo"
-#include <uapi/linux/bpf.h>
-#include <uapi/linux/if_ether.h>
-#include <uapi/linux/if_packet.h>
-#include <uapi/linux/ip.h>
-#include <uapi/linux/ipv6.h>
-#include <uapi/linux/in.h>
-#include <uapi/linux/tcp.h>
-#include <uapi/linux/filter.h>
-#include <uapi/linux/pkt_cls.h>
-#include <uapi/linux/erspan.h>
-#include <net/ipv6.h>
+#include <stddef.h>
+#include <string.h>
+#include <arpa/inet.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/types.h>
+#include <linux/tcp.h>
+#include <linux/socket.h>
+#include <linux/pkt_cls.h>
+#include <linux/erspan.h>
 #include "bpf_helpers.h"
 #include "bpf_endian.h"
 
-#define _htonl __builtin_bswap32
 #define ERROR(ret) do {\
                char fmt[] = "ERROR line:%d ret:%d\n";\
                bpf_trace_printk(fmt, sizeof(fmt), __LINE__, ret); \
-       } while(0)
+       } while (0)
+
+int _version SEC("version") = 1;
 
 struct geneve_opt {
        __be16  opt_class;
-       u8      type;
-       u8      length:5;
-       u8      r3:1;
-       u8      r2:1;
-       u8      r1:1;
-       u8      opt_data[8]; /* hard-coded to 8 byte */
+       __u8    type;
+       __u8    length:5;
+       __u8    r3:1;
+       __u8    r2:1;
+       __u8    r1:1;
+       __u8    opt_data[8]; /* hard-coded to 8 byte */
 };
 
 struct vxlan_metadata {
-       u32     gbp;
+       __u32     gbp;
 };
 
 SEC("gre_set_tunnel")
@@ -86,7 +89,7 @@ int _ip6gretap_set_tunnel(struct __sk_buff *skb)
        int ret;
 
        __builtin_memset(&key, 0x0, sizeof(key));
-       key.remote_ipv6[3] = _htonl(0x11); /* ::11 */
+       key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */
        key.tunnel_id = 2;
        key.tunnel_tos = 0;
        key.tunnel_ttl = 64;
@@ -136,7 +139,8 @@ int _erspan_set_tunnel(struct __sk_buff *skb)
        key.tunnel_tos = 0;
        key.tunnel_ttl = 64;
 
-       ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_ZERO_CSUM_TX);
+       ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+                                    BPF_F_ZERO_CSUM_TX);
        if (ret < 0) {
                ERROR(ret);
                return TC_ACT_SHOT;
@@ -147,8 +151,8 @@ int _erspan_set_tunnel(struct __sk_buff *skb)
        md.version = 1;
        md.u.index = bpf_htonl(123);
 #else
-       u8 direction = 1;
-       u8 hwid = 7;
+       __u8 direction = 1;
+       __u8 hwid = 7;
 
        md.version = 2;
        md.u.md2.dir = direction;
@@ -171,7 +175,7 @@ int _erspan_get_tunnel(struct __sk_buff *skb)
        char fmt[] = "key %d remote ip 0x%x erspan version %d\n";
        struct bpf_tunnel_key key;
        struct erspan_metadata md;
-       u32 index;
+       __u32 index;
        int ret;
 
        ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
@@ -214,7 +218,7 @@ int _ip4ip6erspan_set_tunnel(struct __sk_buff *skb)
        int ret;
 
        __builtin_memset(&key, 0x0, sizeof(key));
-       key.remote_ipv6[3] = _htonl(0x11);
+       key.remote_ipv6[3] = bpf_htonl(0x11);
        key.tunnel_id = 2;
        key.tunnel_tos = 0;
        key.tunnel_ttl = 64;
@@ -229,11 +233,11 @@ int _ip4ip6erspan_set_tunnel(struct __sk_buff *skb)
        __builtin_memset(&md, 0, sizeof(md));
 
 #ifdef ERSPAN_V1
-       md.u.index = htonl(123);
+       md.u.index = bpf_htonl(123);
        md.version = 1;
 #else
-       u8 direction = 0;
-       u8 hwid = 17;
+       __u8 direction = 0;
+       __u8 hwid = 17;
 
        md.version = 2;
        md.u.md2.dir = direction;
@@ -256,10 +260,11 @@ int _ip4ip6erspan_get_tunnel(struct __sk_buff *skb)
        char fmt[] = "ip6erspan get key %d remote ip6 ::%x erspan version %d\n";
        struct bpf_tunnel_key key;
        struct erspan_metadata md;
-       u32 index;
+       __u32 index;
        int ret;
 
-       ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), BPF_F_TUNINFO_IPV6);
+       ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
+                                    BPF_F_TUNINFO_IPV6);
        if (ret < 0) {
                ERROR(ret);
                return TC_ACT_SHOT;
@@ -304,7 +309,8 @@ int _vxlan_set_tunnel(struct __sk_buff *skb)
        key.tunnel_tos = 0;
        key.tunnel_ttl = 64;
 
-       ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_ZERO_CSUM_TX);
+       ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+                                    BPF_F_ZERO_CSUM_TX);
        if (ret < 0) {
                ERROR(ret);
                return TC_ACT_SHOT;
@@ -346,6 +352,48 @@ int _vxlan_get_tunnel(struct __sk_buff *skb)
        return TC_ACT_OK;
 }
 
+SEC("ip6vxlan_set_tunnel")
+int _ip6vxlan_set_tunnel(struct __sk_buff *skb)
+{
+       struct bpf_tunnel_key key;
+       int ret;
+
+       __builtin_memset(&key, 0x0, sizeof(key));
+       key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */
+       key.tunnel_id = 22;
+       key.tunnel_tos = 0;
+       key.tunnel_ttl = 64;
+
+       ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+                                    BPF_F_TUNINFO_IPV6);
+       if (ret < 0) {
+               ERROR(ret);
+               return TC_ACT_SHOT;
+       }
+
+       return TC_ACT_OK;
+}
+
+SEC("ip6vxlan_get_tunnel")
+int _ip6vxlan_get_tunnel(struct __sk_buff *skb)
+{
+       char fmt[] = "key %d remote ip6 ::%x label %x\n";
+       struct bpf_tunnel_key key;
+       int ret;
+
+       ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
+                                    BPF_F_TUNINFO_IPV6);
+       if (ret < 0) {
+               ERROR(ret);
+               return TC_ACT_SHOT;
+       }
+
+       bpf_trace_printk(fmt, sizeof(fmt),
+                        key.tunnel_id, key.remote_ipv6[3], key.tunnel_label);
+
+       return TC_ACT_OK;
+}
+
 SEC("geneve_set_tunnel")
 int _geneve_set_tunnel(struct __sk_buff *skb)
 {
@@ -360,15 +408,16 @@ int _geneve_set_tunnel(struct __sk_buff *skb)
        key.tunnel_ttl = 64;
 
        __builtin_memset(&gopt, 0x0, sizeof(gopt));
-       gopt.opt_class = 0x102; /* Open Virtual Networking (OVN) */
+       gopt.opt_class = bpf_htons(0x102); /* Open Virtual Networking (OVN) */
        gopt.type = 0x08;
        gopt.r1 = 0;
        gopt.r2 = 0;
        gopt.r3 = 0;
        gopt.length = 2; /* 4-byte multiple */
-       *(int *) &gopt.opt_data = 0xdeadbeef;
+       *(int *) &gopt.opt_data = bpf_htonl(0xdeadbeef);
 
-       ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_ZERO_CSUM_TX);
+       ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+                                    BPF_F_ZERO_CSUM_TX);
        if (ret < 0) {
                ERROR(ret);
                return TC_ACT_SHOT;
@@ -408,6 +457,71 @@ int _geneve_get_tunnel(struct __sk_buff *skb)
        return TC_ACT_OK;
 }
 
+SEC("ip6geneve_set_tunnel")
+int _ip6geneve_set_tunnel(struct __sk_buff *skb)
+{
+       struct bpf_tunnel_key key;
+       struct geneve_opt gopt;
+       int ret;
+
+       __builtin_memset(&key, 0x0, sizeof(key));
+       key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */
+       key.tunnel_id = 22;
+       key.tunnel_tos = 0;
+       key.tunnel_ttl = 64;
+
+       ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+                                    BPF_F_TUNINFO_IPV6);
+       if (ret < 0) {
+               ERROR(ret);
+               return TC_ACT_SHOT;
+       }
+
+       __builtin_memset(&gopt, 0x0, sizeof(gopt));
+       gopt.opt_class = bpf_htons(0x102); /* Open Virtual Networking (OVN) */
+       gopt.type = 0x08;
+       gopt.r1 = 0;
+       gopt.r2 = 0;
+       gopt.r3 = 0;
+       gopt.length = 2; /* 4-byte multiple */
+       *(int *) &gopt.opt_data = bpf_htonl(0xfeedbeef);
+
+       ret = bpf_skb_set_tunnel_opt(skb, &gopt, sizeof(gopt));
+       if (ret < 0) {
+               ERROR(ret);
+               return TC_ACT_SHOT;
+       }
+
+       return TC_ACT_OK;
+}
+
+SEC("ip6geneve_get_tunnel")
+int _ip6geneve_get_tunnel(struct __sk_buff *skb)
+{
+       char fmt[] = "key %d remote ip 0x%x geneve class 0x%x\n";
+       struct bpf_tunnel_key key;
+       struct geneve_opt gopt;
+       int ret;
+
+       ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
+                                    BPF_F_TUNINFO_IPV6);
+       if (ret < 0) {
+               ERROR(ret);
+               return TC_ACT_SHOT;
+       }
+
+       ret = bpf_skb_get_tunnel_opt(skb, &gopt, sizeof(gopt));
+       if (ret < 0) {
+               ERROR(ret);
+               return TC_ACT_SHOT;
+       }
+
+       bpf_trace_printk(fmt, sizeof(fmt),
+                       key.tunnel_id, key.remote_ipv4, gopt.opt_class);
+
+       return TC_ACT_OK;
+}
+
 SEC("ipip_set_tunnel")
 int _ipip_set_tunnel(struct __sk_buff *skb)
 {
@@ -431,9 +545,9 @@ int _ipip_set_tunnel(struct __sk_buff *skb)
                if (iph->protocol != IPPROTO_TCP || iph->ihl != 5)
                        return TC_ACT_SHOT;
 
-               if (tcp->dest == htons(5200))
+               if (tcp->dest == bpf_htons(5200))
                        key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
-               else if (tcp->dest == htons(5201))
+               else if (tcp->dest == bpf_htons(5201))
                        key.remote_ipv4 = 0xac100165; /* 172.16.1.101 */
                else
                        return TC_ACT_SHOT;
@@ -481,28 +595,12 @@ int _ipip6_set_tunnel(struct __sk_buff *skb)
                return TC_ACT_SHOT;
        }
 
-       key.remote_ipv6[0] = _htonl(0x2401db00);
+       __builtin_memset(&key, 0x0, sizeof(key));
+       key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */
        key.tunnel_ttl = 64;
 
-       if (iph->protocol == IPPROTO_ICMP) {
-               key.remote_ipv6[3] = _htonl(1);
-       } else {
-               if (iph->protocol != IPPROTO_TCP || iph->ihl != 5) {
-                       ERROR(iph->protocol);
-                       return TC_ACT_SHOT;
-               }
-
-               if (tcp->dest == htons(5200)) {
-                       key.remote_ipv6[3] = _htonl(1);
-               } else if (tcp->dest == htons(5201)) {
-                       key.remote_ipv6[3] = _htonl(2);
-               } else {
-                       ERROR(tcp->dest);
-                       return TC_ACT_SHOT;
-               }
-       }
-
-       ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_TUNINFO_IPV6);
+       ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+                                    BPF_F_TUNINFO_IPV6);
        if (ret < 0) {
                ERROR(ret);
                return TC_ACT_SHOT;
@@ -518,14 +616,15 @@ int _ipip6_get_tunnel(struct __sk_buff *skb)
        struct bpf_tunnel_key key;
        char fmt[] = "remote ip6 %x::%x\n";
 
-       ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), BPF_F_TUNINFO_IPV6);
+       ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
+                                    BPF_F_TUNINFO_IPV6);
        if (ret < 0) {
                ERROR(ret);
                return TC_ACT_SHOT;
        }
 
-       bpf_trace_printk(fmt, sizeof(fmt), _htonl(key.remote_ipv6[0]),
-                        _htonl(key.remote_ipv6[3]));
+       bpf_trace_printk(fmt, sizeof(fmt), bpf_htonl(key.remote_ipv6[0]),
+                        bpf_htonl(key.remote_ipv6[3]));
        return TC_ACT_OK;
 }
 
@@ -545,28 +644,29 @@ int _ip6ip6_set_tunnel(struct __sk_buff *skb)
                return TC_ACT_SHOT;
        }
 
-       key.remote_ipv6[0] = _htonl(0x2401db00);
+       key.remote_ipv6[0] = bpf_htonl(0x2401db00);
        key.tunnel_ttl = 64;
 
-       if (iph->nexthdr == NEXTHDR_ICMP) {
-               key.remote_ipv6[3] = _htonl(1);
+       if (iph->nexthdr == 58 /* NEXTHDR_ICMP */) {
+               key.remote_ipv6[3] = bpf_htonl(1);
        } else {
-               if (iph->nexthdr != NEXTHDR_TCP) {
+               if (iph->nexthdr != 6 /* NEXTHDR_TCP */) {
                        ERROR(iph->nexthdr);
                        return TC_ACT_SHOT;
                }
 
-               if (tcp->dest == htons(5200)) {
-                       key.remote_ipv6[3] = _htonl(1);
-               } else if (tcp->dest == htons(5201)) {
-                       key.remote_ipv6[3] = _htonl(2);
+               if (tcp->dest == bpf_htons(5200)) {
+                       key.remote_ipv6[3] = bpf_htonl(1);
+               } else if (tcp->dest == bpf_htons(5201)) {
+                       key.remote_ipv6[3] = bpf_htonl(2);
                } else {
                        ERROR(tcp->dest);
                        return TC_ACT_SHOT;
                }
        }
 
-       ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_TUNINFO_IPV6);
+       ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+                                    BPF_F_TUNINFO_IPV6);
        if (ret < 0) {
                ERROR(ret);
                return TC_ACT_SHOT;
@@ -582,14 +682,31 @@ int _ip6ip6_get_tunnel(struct __sk_buff *skb)
        struct bpf_tunnel_key key;
        char fmt[] = "remote ip6 %x::%x\n";
 
-       ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), BPF_F_TUNINFO_IPV6);
+       ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
+                                    BPF_F_TUNINFO_IPV6);
        if (ret < 0) {
                ERROR(ret);
                return TC_ACT_SHOT;
        }
 
-       bpf_trace_printk(fmt, sizeof(fmt), _htonl(key.remote_ipv6[0]),
-                        _htonl(key.remote_ipv6[3]));
+       bpf_trace_printk(fmt, sizeof(fmt), bpf_htonl(key.remote_ipv6[0]),
+                        bpf_htonl(key.remote_ipv6[3]));
+       return TC_ACT_OK;
+}
+
+SEC("xfrm_get_state")
+int _xfrm_get_state(struct __sk_buff *skb)
+{
+       struct bpf_xfrm_state x;
+       char fmt[] = "reqid %d spi 0x%x remote ip 0x%x\n";
+       int ret;
+
+       ret = bpf_skb_get_xfrm_state(skb, 0, &x, sizeof(x), 0);
+       if (ret < 0)
+               return TC_ACT_OK;
+
+       bpf_trace_printk(fmt, sizeof(fmt), x.reqid, bpf_ntohl(x.spi),
+                        bpf_ntohl(x.remote_ipv4));
        return TC_ACT_OK;
 }
 
index 3e7718b1a9ae49c176a407d9f14ad31704255701..94498eaf872ed32e560dd2c6b3564a25e38c17b6 100644 (file)
 # endif
 #endif
 #include "bpf_rlimit.h"
+#include "bpf_rand.h"
 #include "../../../include/linux/filter.h"
 
 #ifndef ARRAY_SIZE
 # define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
 #endif
 
-#define MAX_INSNS      512
+#define MAX_INSNS      BPF_MAXINSNS
 #define MAX_FIXUPS     8
 #define MAX_NR_MAPS    4
 #define POINTER_VALUE  0xcafe4all
@@ -64,6 +65,7 @@ struct bpf_test {
        struct bpf_insn insns[MAX_INSNS];
        int fixup_map1[MAX_FIXUPS];
        int fixup_map2[MAX_FIXUPS];
+       int fixup_map3[MAX_FIXUPS];
        int fixup_prog[MAX_FIXUPS];
        int fixup_map_in_map[MAX_FIXUPS];
        const char *errstr;
@@ -76,6 +78,8 @@ struct bpf_test {
        } result, result_unpriv;
        enum bpf_prog_type prog_type;
        uint8_t flags;
+       __u8 data[TEST_DATA_LEN];
+       void (*fill_helper)(struct bpf_test *self);
 };
 
 /* Note we want this to be 64 bit aligned so that the end of our array is
@@ -88,6 +92,91 @@ struct test_val {
        int foo[MAX_ENTRIES];
 };
 
+struct other_val {
+       long long foo;
+       long long bar;
+};
+
+static void bpf_fill_ld_abs_vlan_push_pop(struct bpf_test *self)
+{
+       /* test: {skb->data[0], vlan_push} x 68 + {skb->data[0], vlan_pop} x 68 */
+#define PUSH_CNT 51
+       unsigned int len = BPF_MAXINSNS;
+       struct bpf_insn *insn = self->insns;
+       int i = 0, j, k = 0;
+
+       insn[i++] = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1);
+loop:
+       for (j = 0; j < PUSH_CNT; j++) {
+               insn[i++] = BPF_LD_ABS(BPF_B, 0);
+               insn[i] = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0x34, len - i - 2);
+               i++;
+               insn[i++] = BPF_MOV64_REG(BPF_REG_1, BPF_REG_6);
+               insn[i++] = BPF_MOV64_IMM(BPF_REG_2, 1);
+               insn[i++] = BPF_MOV64_IMM(BPF_REG_3, 2);
+               insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                        BPF_FUNC_skb_vlan_push),
+               insn[i] = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, len - i - 2);
+               i++;
+       }
+
+       for (j = 0; j < PUSH_CNT; j++) {
+               insn[i++] = BPF_LD_ABS(BPF_B, 0);
+               insn[i] = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0x34, len - i - 2);
+               i++;
+               insn[i++] = BPF_MOV64_REG(BPF_REG_1, BPF_REG_6);
+               insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                        BPF_FUNC_skb_vlan_pop),
+               insn[i] = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, len - i - 2);
+               i++;
+       }
+       if (++k < 5)
+               goto loop;
+
+       for (; i < len - 1; i++)
+               insn[i] = BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 0xbef);
+       insn[len - 1] = BPF_EXIT_INSN();
+}
+
+static void bpf_fill_jump_around_ld_abs(struct bpf_test *self)
+{
+       struct bpf_insn *insn = self->insns;
+       unsigned int len = BPF_MAXINSNS;
+       int i = 0;
+
+       insn[i++] = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1);
+       insn[i++] = BPF_LD_ABS(BPF_B, 0);
+       insn[i] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 10, len - i - 2);
+       i++;
+       while (i < len - 1)
+               insn[i++] = BPF_LD_ABS(BPF_B, 1);
+       insn[i] = BPF_EXIT_INSN();
+}
+
+static void bpf_fill_rand_ld_dw(struct bpf_test *self)
+{
+       struct bpf_insn *insn = self->insns;
+       uint64_t res = 0;
+       int i = 0;
+
+       insn[i++] = BPF_MOV32_IMM(BPF_REG_0, 0);
+       while (i < self->retval) {
+               uint64_t val = bpf_semi_rand_get();
+               struct bpf_insn tmp[2] = { BPF_LD_IMM64(BPF_REG_1, val) };
+
+               res ^= val;
+               insn[i++] = tmp[0];
+               insn[i++] = tmp[1];
+               insn[i++] = BPF_ALU64_REG(BPF_XOR, BPF_REG_0, BPF_REG_1);
+       }
+       insn[i++] = BPF_MOV64_REG(BPF_REG_1, BPF_REG_0);
+       insn[i++] = BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 32);
+       insn[i++] = BPF_ALU64_REG(BPF_XOR, BPF_REG_0, BPF_REG_1);
+       insn[i] = BPF_EXIT_INSN();
+       res ^= (res >> 32);
+       self->retval = (uint32_t)res;
+}
+
 static struct bpf_test tests[] = {
        {
                "add+sub+mul",
@@ -5593,6 +5682,257 @@ static struct bpf_test tests[] = {
                .errstr = "R1 min value is negative",
                .prog_type = BPF_PROG_TYPE_TRACEPOINT,
        },
+       {
+               "map lookup helper access to map",
+               .insns = {
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map3 = { 3, 8 },
+               .result = ACCEPT,
+               .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+       },
+       {
+               "map update helper access to map",
+               .insns = {
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+                       BPF_MOV64_IMM(BPF_REG_4, 0),
+                       BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_EMIT_CALL(BPF_FUNC_map_update_elem),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map3 = { 3, 10 },
+               .result = ACCEPT,
+               .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+       },
+       {
+               "map update helper access to map: wrong size",
+               .insns = {
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+                       BPF_MOV64_IMM(BPF_REG_4, 0),
+                       BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_EMIT_CALL(BPF_FUNC_map_update_elem),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 3 },
+               .fixup_map3 = { 10 },
+               .result = REJECT,
+               .errstr = "invalid access to map value, value_size=8 off=0 size=16",
+               .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+       },
+       {
+               "map helper access to adjusted map (via const imm)",
+               .insns = {
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2,
+                                     offsetof(struct other_val, bar)),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map3 = { 3, 9 },
+               .result = ACCEPT,
+               .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+       },
+       {
+               "map helper access to adjusted map (via const imm): out-of-bound 1",
+               .insns = {
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2,
+                                     sizeof(struct other_val) - 4),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map3 = { 3, 9 },
+               .result = REJECT,
+               .errstr = "invalid access to map value, value_size=16 off=12 size=8",
+               .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+       },
+       {
+               "map helper access to adjusted map (via const imm): out-of-bound 2",
+               .insns = {
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map3 = { 3, 9 },
+               .result = REJECT,
+               .errstr = "invalid access to map value, value_size=16 off=-4 size=8",
+               .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+       },
+       {
+               "map helper access to adjusted map (via const reg)",
+               .insns = {
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+                       BPF_MOV64_IMM(BPF_REG_3,
+                                     offsetof(struct other_val, bar)),
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map3 = { 3, 10 },
+               .result = ACCEPT,
+               .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+       },
+       {
+               "map helper access to adjusted map (via const reg): out-of-bound 1",
+               .insns = {
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+                       BPF_MOV64_IMM(BPF_REG_3,
+                                     sizeof(struct other_val) - 4),
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map3 = { 3, 10 },
+               .result = REJECT,
+               .errstr = "invalid access to map value, value_size=16 off=12 size=8",
+               .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+       },
+       {
+               "map helper access to adjusted map (via const reg): out-of-bound 2",
+               .insns = {
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+                       BPF_MOV64_IMM(BPF_REG_3, -4),
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map3 = { 3, 10 },
+               .result = REJECT,
+               .errstr = "invalid access to map value, value_size=16 off=-4 size=8",
+               .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+       },
+       {
+               "map helper access to adjusted map (via variable)",
+               .insns = {
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+                       BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+                       BPF_JMP_IMM(BPF_JGT, BPF_REG_3,
+                                   offsetof(struct other_val, bar), 4),
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map3 = { 3, 11 },
+               .result = ACCEPT,
+               .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+       },
+       {
+               "map helper access to adjusted map (via variable): no max check",
+               .insns = {
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+                       BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map3 = { 3, 10 },
+               .result = REJECT,
+               .errstr = "R2 unbounded memory access, make sure to bounds check any array access into a map",
+               .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+       },
+       {
+               "map helper access to adjusted map (via variable): wrong max check",
+               .insns = {
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+                       BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+                       BPF_JMP_IMM(BPF_JGT, BPF_REG_3,
+                                   offsetof(struct other_val, bar) + 1, 4),
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map3 = { 3, 11 },
+               .result = REJECT,
+               .errstr = "invalid access to map value, value_size=16 off=9 size=8",
+               .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+       },
        {
                "map element value is preserved across register spilling",
                .insns = {
@@ -11423,6 +11763,278 @@ static struct bpf_test tests[] = {
                .errstr = "BPF_XADD stores into R2 packet",
                .prog_type = BPF_PROG_TYPE_XDP,
        },
+       {
+               "bpf_get_stack return R0 within range",
+               .insns = {
+                       BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 28),
+                       BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+                       BPF_MOV64_IMM(BPF_REG_9, sizeof(struct test_val)),
+                       BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
+                       BPF_MOV64_IMM(BPF_REG_3, sizeof(struct test_val)),
+                       BPF_MOV64_IMM(BPF_REG_4, 256),
+                       BPF_EMIT_CALL(BPF_FUNC_get_stack),
+                       BPF_MOV64_IMM(BPF_REG_1, 0),
+                       BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
+                       BPF_ALU64_IMM(BPF_LSH, BPF_REG_8, 32),
+                       BPF_ALU64_IMM(BPF_ARSH, BPF_REG_8, 32),
+                       BPF_JMP_REG(BPF_JSLT, BPF_REG_1, BPF_REG_8, 16),
+                       BPF_ALU64_REG(BPF_SUB, BPF_REG_9, BPF_REG_8),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_8),
+                       BPF_MOV64_REG(BPF_REG_1, BPF_REG_9),
+                       BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 32),
+                       BPF_ALU64_IMM(BPF_ARSH, BPF_REG_1, 32),
+                       BPF_MOV64_REG(BPF_REG_3, BPF_REG_2),
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_1),
+                       BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+                       BPF_MOV64_IMM(BPF_REG_5, sizeof(struct test_val)),
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_5),
+                       BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 4),
+                       BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+                       BPF_MOV64_REG(BPF_REG_3, BPF_REG_9),
+                       BPF_MOV64_IMM(BPF_REG_4, 0),
+                       BPF_EMIT_CALL(BPF_FUNC_get_stack),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map2 = { 4 },
+               .result = ACCEPT,
+               .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+       },
+       {
+               "ld_abs: invalid op 1",
+               .insns = {
+                       BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+                       BPF_LD_ABS(BPF_DW, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+               .result = REJECT,
+               .errstr = "unknown opcode",
+       },
+       {
+               "ld_abs: invalid op 2",
+               .insns = {
+                       BPF_MOV32_IMM(BPF_REG_0, 256),
+                       BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+                       BPF_LD_IND(BPF_DW, BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+               .result = REJECT,
+               .errstr = "unknown opcode",
+       },
+       {
+               "ld_abs: nmap reduced",
+               .insns = {
+                       BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+                       BPF_LD_ABS(BPF_H, 12),
+                       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0x806, 28),
+                       BPF_LD_ABS(BPF_H, 12),
+                       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0x806, 26),
+                       BPF_MOV32_IMM(BPF_REG_0, 18),
+                       BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -64),
+                       BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_10, -64),
+                       BPF_LD_IND(BPF_W, BPF_REG_7, 14),
+                       BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -60),
+                       BPF_MOV32_IMM(BPF_REG_0, 280971478),
+                       BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -56),
+                       BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_10, -56),
+                       BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_10, -60),
+                       BPF_ALU32_REG(BPF_SUB, BPF_REG_0, BPF_REG_7),
+                       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 15),
+                       BPF_LD_ABS(BPF_H, 12),
+                       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0x806, 13),
+                       BPF_MOV32_IMM(BPF_REG_0, 22),
+                       BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -56),
+                       BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_10, -56),
+                       BPF_LD_IND(BPF_H, BPF_REG_7, 14),
+                       BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -52),
+                       BPF_MOV32_IMM(BPF_REG_0, 17366),
+                       BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -48),
+                       BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_10, -48),
+                       BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_10, -52),
+                       BPF_ALU32_REG(BPF_SUB, BPF_REG_0, BPF_REG_7),
+                       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+                       BPF_MOV32_IMM(BPF_REG_0, 256),
+                       BPF_EXIT_INSN(),
+                       BPF_MOV32_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .data = {
+                       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0x06, 0,
+                       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                       0x10, 0xbf, 0x48, 0xd6, 0x43, 0xd6,
+               },
+               .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+               .result = ACCEPT,
+               .retval = 256,
+       },
+       {
+               "ld_abs: div + abs, test 1",
+               .insns = {
+                       BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1),
+                       BPF_LD_ABS(BPF_B, 3),
+                       BPF_ALU64_IMM(BPF_MOV, BPF_REG_2, 2),
+                       BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_2),
+                       BPF_ALU64_REG(BPF_MOV, BPF_REG_8, BPF_REG_0),
+                       BPF_LD_ABS(BPF_B, 4),
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_8, BPF_REG_0),
+                       BPF_LD_IND(BPF_B, BPF_REG_8, -70),
+                       BPF_EXIT_INSN(),
+               },
+               .data = {
+                       10, 20, 30, 40, 50,
+               },
+               .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+               .result = ACCEPT,
+               .retval = 10,
+       },
+       {
+               "ld_abs: div + abs, test 2",
+               .insns = {
+                       BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1),
+                       BPF_LD_ABS(BPF_B, 3),
+                       BPF_ALU64_IMM(BPF_MOV, BPF_REG_2, 2),
+                       BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_2),
+                       BPF_ALU64_REG(BPF_MOV, BPF_REG_8, BPF_REG_0),
+                       BPF_LD_ABS(BPF_B, 128),
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_8, BPF_REG_0),
+                       BPF_LD_IND(BPF_B, BPF_REG_8, -70),
+                       BPF_EXIT_INSN(),
+               },
+               .data = {
+                       10, 20, 30, 40, 50,
+               },
+               .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+               .result = ACCEPT,
+               .retval = 0,
+       },
+       {
+               "ld_abs: div + abs, test 3",
+               .insns = {
+                       BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1),
+                       BPF_ALU64_IMM(BPF_MOV, BPF_REG_7, 0),
+                       BPF_LD_ABS(BPF_B, 3),
+                       BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_7),
+                       BPF_EXIT_INSN(),
+               },
+               .data = {
+                       10, 20, 30, 40, 50,
+               },
+               .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+               .result = ACCEPT,
+               .retval = 0,
+       },
+       {
+               "ld_abs: div + abs, test 4",
+               .insns = {
+                       BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1),
+                       BPF_ALU64_IMM(BPF_MOV, BPF_REG_7, 0),
+                       BPF_LD_ABS(BPF_B, 256),
+                       BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_7),
+                       BPF_EXIT_INSN(),
+               },
+               .data = {
+                       10, 20, 30, 40, 50,
+               },
+               .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+               .result = ACCEPT,
+               .retval = 0,
+       },
+       {
+               "ld_abs: vlan + abs, test 1",
+               .insns = { },
+               .data = {
+                       0x34,
+               },
+               .fill_helper = bpf_fill_ld_abs_vlan_push_pop,
+               .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+               .result = ACCEPT,
+               .retval = 0xbef,
+       },
+       {
+               "ld_abs: vlan + abs, test 2",
+               .insns = {
+                       BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+                       BPF_LD_ABS(BPF_B, 0),
+                       BPF_LD_ABS(BPF_H, 0),
+                       BPF_LD_ABS(BPF_W, 0),
+                       BPF_MOV64_REG(BPF_REG_7, BPF_REG_6),
+                       BPF_MOV64_IMM(BPF_REG_6, 0),
+                       BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+                       BPF_MOV64_IMM(BPF_REG_2, 1),
+                       BPF_MOV64_IMM(BPF_REG_3, 2),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_skb_vlan_push),
+                       BPF_MOV64_REG(BPF_REG_6, BPF_REG_7),
+                       BPF_LD_ABS(BPF_B, 0),
+                       BPF_LD_ABS(BPF_H, 0),
+                       BPF_LD_ABS(BPF_W, 0),
+                       BPF_MOV64_IMM(BPF_REG_0, 42),
+                       BPF_EXIT_INSN(),
+               },
+               .data = {
+                       0x34,
+               },
+               .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+               .result = ACCEPT,
+               .retval = 42,
+       },
+       {
+               "ld_abs: jump around ld_abs",
+               .insns = { },
+               .data = {
+                       10, 11,
+               },
+               .fill_helper = bpf_fill_jump_around_ld_abs,
+               .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+               .result = ACCEPT,
+               .retval = 10,
+       },
+       {
+               "ld_dw: xor semi-random 64 bit imms, test 1",
+               .insns = { },
+               .data = { },
+               .fill_helper = bpf_fill_rand_ld_dw,
+               .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+               .result = ACCEPT,
+               .retval = 4090,
+       },
+       {
+               "ld_dw: xor semi-random 64 bit imms, test 2",
+               .insns = { },
+               .data = { },
+               .fill_helper = bpf_fill_rand_ld_dw,
+               .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+               .result = ACCEPT,
+               .retval = 2047,
+       },
+       {
+               "ld_dw: xor semi-random 64 bit imms, test 3",
+               .insns = { },
+               .data = { },
+               .fill_helper = bpf_fill_rand_ld_dw,
+               .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+               .result = ACCEPT,
+               .retval = 511,
+       },
+       {
+               "ld_dw: xor semi-random 64 bit imms, test 4",
+               .insns = { },
+               .data = { },
+               .fill_helper = bpf_fill_rand_ld_dw,
+               .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+               .result = ACCEPT,
+               .retval = 5,
+       },
 };
 
 static int probe_filter_length(const struct bpf_insn *fp)
@@ -11526,16 +12138,20 @@ static int create_map_in_map(void)
        return outer_map_fd;
 }
 
-static char bpf_vlog[32768];
+static char bpf_vlog[UINT_MAX >> 8];
 
 static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog,
                          int *map_fds)
 {
        int *fixup_map1 = test->fixup_map1;
        int *fixup_map2 = test->fixup_map2;
+       int *fixup_map3 = test->fixup_map3;
        int *fixup_prog = test->fixup_prog;
        int *fixup_map_in_map = test->fixup_map_in_map;
 
+       if (test->fill_helper)
+               test->fill_helper(test);
+
        /* Allocating HTs with 1 elem is fine here, since we only test
         * for verifier and not do a runtime lookup, so the only thing
         * that really matters is value size in this case.
@@ -11556,6 +12172,14 @@ static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog,
                } while (*fixup_map2);
        }
 
+       if (*fixup_map3) {
+               map_fds[1] = create_map(sizeof(struct other_val), 1);
+               do {
+                       prog[*fixup_map3].imm = map_fds[1];
+                       fixup_map3++;
+               } while (*fixup_map3);
+       }
+
        if (*fixup_prog) {
                map_fds[2] = create_prog_array();
                do {
@@ -11577,10 +12201,8 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
                           int *passes, int *errors)
 {
        int fd_prog, expected_ret, reject_from_alignment;
+       int prog_len, prog_type = test->prog_type;
        struct bpf_insn *prog = test->insns;
-       int prog_len = probe_filter_length(prog);
-       char data_in[TEST_DATA_LEN] = {};
-       int prog_type = test->prog_type;
        int map_fds[MAX_NR_MAPS];
        const char *expected_err;
        uint32_t retval;
@@ -11590,6 +12212,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
                map_fds[i] = -1;
 
        do_test_fixup(test, prog, map_fds);
+       prog_len = probe_filter_length(prog);
 
        fd_prog = bpf_verify_program(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER,
                                     prog, prog_len, test->flags & F_LOAD_WITH_STRICT_ALIGNMENT,
@@ -11629,8 +12252,9 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
        }
 
        if (fd_prog >= 0) {
-               err = bpf_prog_test_run(fd_prog, 1, data_in, sizeof(data_in),
-                                       NULL, NULL, &retval, NULL);
+               err = bpf_prog_test_run(fd_prog, 1, test->data,
+                                       sizeof(test->data), NULL, NULL,
+                                       &retval, NULL);
                if (err && errno != 524/*ENOTSUPP*/ && errno != EPERM) {
                        printf("Unexpected bpf_prog_test_run error\n");
                        goto fail_log;
@@ -11713,6 +12337,11 @@ static void get_unpriv_disabled()
        FILE *fd;
 
        fd = fopen("/proc/sys/"UNPRIV_SYSCTL, "r");
+       if (!fd) {
+               perror("fopen /proc/sys/"UNPRIV_SYSCTL);
+               unpriv_disabled = true;
+               return;
+       }
        if (fgets(buf, 2, fd) == buf && atoi(buf))
                unpriv_disabled = true;
        fclose(fd);
@@ -11783,5 +12412,6 @@ int main(int argc, char **argv)
                return EXIT_FAILURE;
        }
 
+       bpf_semi_rand_init();
        return do_test(unpriv, from, to);
 }
diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c
new file mode 100644 (file)
index 0000000..8fb4fe8
--- /dev/null
@@ -0,0 +1,153 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <errno.h>
+#include <poll.h>
+#include <unistd.h>
+#include <linux/perf_event.h>
+#include <sys/mman.h>
+#include "trace_helpers.h"
+
+#define MAX_SYMS 300000
+static struct ksym syms[MAX_SYMS];
+static int sym_cnt;
+
+static int ksym_cmp(const void *p1, const void *p2)
+{
+       return ((struct ksym *)p1)->addr - ((struct ksym *)p2)->addr;
+}
+
+int load_kallsyms(void)
+{
+       FILE *f = fopen("/proc/kallsyms", "r");
+       char func[256], buf[256];
+       char symbol;
+       void *addr;
+       int i = 0;
+
+       if (!f)
+               return -ENOENT;
+
+       while (!feof(f)) {
+               if (!fgets(buf, sizeof(buf), f))
+                       break;
+               if (sscanf(buf, "%p %c %s", &addr, &symbol, func) != 3)
+                       break;
+               if (!addr)
+                       continue;
+               syms[i].addr = (long) addr;
+               syms[i].name = strdup(func);
+               i++;
+       }
+       sym_cnt = i;
+       qsort(syms, sym_cnt, sizeof(struct ksym), ksym_cmp);
+       return 0;
+}
+
+struct ksym *ksym_search(long key)
+{
+       int start = 0, end = sym_cnt;
+       int result;
+
+       while (start < end) {
+               size_t mid = start + (end - start) / 2;
+
+               result = key - syms[mid].addr;
+               if (result < 0)
+                       end = mid;
+               else if (result > 0)
+                       start = mid + 1;
+               else
+                       return &syms[mid];
+       }
+
+       if (start >= 1 && syms[start - 1].addr < key &&
+           key < syms[start].addr)
+               /* valid ksym */
+               return &syms[start - 1];
+
+       /* out of range. return _stext */
+       return &syms[0];
+}
+
+static int page_size;
+static int page_cnt = 8;
+static struct perf_event_mmap_page *header;
+
+int perf_event_mmap(int fd)
+{
+       void *base;
+       int mmap_size;
+
+       page_size = getpagesize();
+       mmap_size = page_size * (page_cnt + 1);
+
+       base = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+       if (base == MAP_FAILED) {
+               printf("mmap err\n");
+               return -1;
+       }
+
+       header = base;
+       return 0;
+}
+
+static int perf_event_poll(int fd)
+{
+       struct pollfd pfd = { .fd = fd, .events = POLLIN };
+
+       return poll(&pfd, 1, 1000);
+}
+
+struct perf_event_sample {
+       struct perf_event_header header;
+       __u32 size;
+       char data[];
+};
+
+static enum bpf_perf_event_ret bpf_perf_event_print(void *event, void *priv)
+{
+       struct perf_event_sample *e = event;
+       perf_event_print_fn fn = priv;
+       int ret;
+
+       if (e->header.type == PERF_RECORD_SAMPLE) {
+               ret = fn(e->data, e->size);
+               if (ret != LIBBPF_PERF_EVENT_CONT)
+                       return ret;
+       } else if (e->header.type == PERF_RECORD_LOST) {
+               struct {
+                       struct perf_event_header header;
+                       __u64 id;
+                       __u64 lost;
+               } *lost = (void *) e;
+               printf("lost %lld events\n", lost->lost);
+       } else {
+               printf("unknown event type=%d size=%d\n",
+                      e->header.type, e->header.size);
+       }
+
+       return LIBBPF_PERF_EVENT_CONT;
+}
+
+int perf_event_poller(int fd, perf_event_print_fn output_fn)
+{
+       enum bpf_perf_event_ret ret;
+       void *buf = NULL;
+       size_t len = 0;
+
+       for (;;) {
+               perf_event_poll(fd);
+               ret = bpf_perf_event_read_simple(header, page_cnt * page_size,
+                                                page_size, &buf, &len,
+                                                bpf_perf_event_print,
+                                                output_fn);
+               if (ret != LIBBPF_PERF_EVENT_CONT)
+                       break;
+       }
+       free(buf);
+
+       return ret;
+}
diff --git a/tools/testing/selftests/bpf/trace_helpers.h b/tools/testing/selftests/bpf/trace_helpers.h
new file mode 100644 (file)
index 0000000..36d90e3
--- /dev/null
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __TRACE_HELPER_H
+#define __TRACE_HELPER_H
+
+#include <libbpf.h>
+
+struct ksym {
+       long addr;
+       char *name;
+};
+
+int load_kallsyms(void);
+struct ksym *ksym_search(long key);
+
+typedef enum bpf_perf_event_ret (*perf_event_print_fn)(void *data, int size);
+
+int perf_event_mmap(int fd);
+/* return LIBBPF_PERF_EVENT_DONE or LIBBPF_PERF_EVENT_ERROR */
+int perf_event_poller(int fd, perf_event_print_fn output_fn);
+#endif
index 4acfdebf36fad511bea9d90f6f7b62f140f60eb6..9de8b7cb4e6df6b5929c915566dff74163efbcf2 100644 (file)
@@ -6,15 +6,21 @@
 #include <stdlib.h>
 
 #define BUF_SIZE 256
-int main(void)
+
+int main(int argc, char *argv[])
 {
        int fd = open("/dev/urandom", O_RDONLY);
        int i;
        char buf[BUF_SIZE];
+       int count = 4;
 
        if (fd < 0)
                return 1;
-       for (i = 0; i < 4; ++i)
+
+       if (argc == 2)
+               count = atoi(argv[1]);
+
+       for (i = 0; i < count; ++i)
                read(fd, buf, BUF_SIZE);
 
        close(fd);
index 4e6d09fb166f1076bdbd6a03227559792270071e..5c7d7001ad37c34d1ccaf5ef34e05864cbeaa8ed 100644 (file)
@@ -1,8 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
-TEST_PROGS := dnotify_test devpts_pts
-all: $(TEST_PROGS)
 
-include ../lib.mk
+TEST_GEN_PROGS := devpts_pts
+TEST_GEN_PROGS_EXTENDED := dnotify_test
 
-clean:
-       rm -fr $(TEST_PROGS)
+include ../lib.mk
index 826f38d5dd19f1a0455cdc1074bdd83b6dce02c5..261c81f086064e922da966f8a1497e1b724e2d85 100644 (file)
@@ -4,6 +4,7 @@
 all:
 
 TEST_PROGS := fw_run_tests.sh
+TEST_FILES := fw_fallback.sh fw_filesystem.sh fw_lib.sh
 
 include ../lib.mk
 
index 9ea31b57d71a60008430b56a1b1498ed0184ec86..962d7f4ac6276c598bedf42755e5f711a9c51925 100755 (executable)
@@ -154,11 +154,13 @@ test_finish()
        if [ "$HAS_FW_LOADER_USER_HELPER" = "yes" ]; then
                echo "$OLD_TIMEOUT" >/sys/class/firmware/timeout
        fi
-       if [ "$OLD_FWPATH" = "" ]; then
-               OLD_FWPATH=" "
-       fi
        if [ "$TEST_REQS_FW_SET_CUSTOM_PATH" = "yes" ]; then
-               echo -n "$OLD_FWPATH" >/sys/module/firmware_class/parameters/path
+               if [ "$OLD_FWPATH" = "" ]; then
+                       # A zero-length write won't work; write a null byte
+                       printf '\000' >/sys/module/firmware_class/parameters/path
+               else
+                       echo -n "$OLD_FWPATH" >/sys/module/firmware_class/parameters/path
+               fi
        fi
        if [ -f $FW ]; then
                rm -f "$FW"
index 06d638e9dc62748bf33bfbf7dbcb37f9494fdc00..cffdd4eb0a57ccc76b6da44b6f557cec1798f72d 100755 (executable)
@@ -66,5 +66,5 @@ if [ -f $FW_FORCE_SYSFS_FALLBACK ]; then
        run_test_config_0003
 else
        echo "Running basic kernel configuration, working with your config"
-       run_test
+       run_tests
 fi
index 786dce7e48bed53e78c0590786254426dbd34adc..2aabab363cfb2b206c546341ef5d3a58a868a325 100644 (file)
@@ -29,7 +29,7 @@ do_reset
 
 echo "Test extended error support"
 echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_wakeup/trigger
-echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' >> events/sched/sched_wakeup/trigger &>/dev/null
+! echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' >> events/sched/sched_wakeup/trigger 2> /dev/null
 if ! grep -q "ERROR:" events/sched/sched_wakeup/hist; then
     fail "Failed to generate extended error in histogram"
 fi
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-multi-actions-accept.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-multi-actions-accept.tc
new file mode 100644 (file)
index 0000000..c193dce
--- /dev/null
@@ -0,0 +1,44 @@
+#!/bin/sh
+# description: event trigger - test multiple actions on hist trigger
+
+
+do_reset() {
+    reset_trigger
+    echo > set_event
+    clear_trace
+}
+
+fail() { #msg
+    do_reset
+    echo $1
+    exit_fail
+}
+
+if [ ! -f set_event ]; then
+    echo "event tracing is not supported"
+    exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+    echo "synthetic event is not supported"
+    exit_unsupported
+fi
+
+clear_synthetic_events
+reset_tracer
+do_reset
+
+echo "Test multiple actions on hist trigger"
+echo 'wakeup_latency u64 lat; pid_t pid' >> synthetic_events
+TRIGGER1=events/sched/sched_wakeup/trigger
+TRIGGER2=events/sched/sched_switch/trigger
+
+echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="cyclictest"' > $TRIGGER1
+echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0 if next_comm=="cyclictest"' >> $TRIGGER2
+echo 'hist:keys=next_pid:onmatch(sched.sched_wakeup).wakeup_latency(sched.sched_switch.$wakeup_lat,next_pid) if next_comm=="cyclictest"' >> $TRIGGER2
+echo 'hist:keys=next_pid:onmatch(sched.sched_wakeup).wakeup_latency(sched.sched_switch.$wakeup_lat,prev_pid) if next_comm=="cyclictest"' >> $TRIGGER2
+echo 'hist:keys=next_pid if next_comm=="cyclictest"' >> $TRIGGER2
+
+do_reset
+
+exit 0
index 2ddcc96ae4561acd3d773db814dffbb4c0acdb51..d9d00319b07cd691848a2f468c48e3bf10127284 100644 (file)
@@ -15,7 +15,7 @@ LIBKVM += $(LIBKVM_$(UNAME_M))
 
 INSTALL_HDR_PATH = $(top_srcdir)/usr
 LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/
-CFLAGS += -O2 -g -std=gnu99 -I$(LINUX_HDR_PATH) -Iinclude -I$(<D)
+CFLAGS += -O2 -g -std=gnu99 -I$(LINUX_HDR_PATH) -Iinclude -I$(<D) -I..
 
 # After inclusion, $(OUTPUT) is defined and
 # $(TEST_GEN_PROGS) starts with $(OUTPUT)/
index 7ab98e41324ffd012720338e3a59074795f23ef7..ac53730b30aa48c9f9ecf54e3e6362f8cdf0e1c4 100644 (file)
@@ -19,6 +19,7 @@
 #include <errno.h>
 #include <unistd.h>
 #include <fcntl.h>
+#include "kselftest.h"
 
 ssize_t test_write(int fd, const void *buf, size_t count);
 ssize_t test_read(int fd, void *buf, size_t count);
index 2cedfda181d4560dbc02ca0d2c5e44ab54326c61..37e2a787d2fcc6cc6cedcb30b9620456e99e43c0 100644 (file)
@@ -50,8 +50,8 @@ int kvm_check_cap(long cap)
        int kvm_fd;
 
        kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
-       TEST_ASSERT(kvm_fd >= 0, "open %s failed, rc: %i errno: %i",
-               KVM_DEV_PATH, kvm_fd, errno);
+       if (kvm_fd < 0)
+               exit(KSFT_SKIP);
 
        ret = ioctl(kvm_fd, KVM_CHECK_EXTENSION, cap);
        TEST_ASSERT(ret != -1, "KVM_CHECK_EXTENSION IOCTL failed,\n"
@@ -91,8 +91,8 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
 
        vm->mode = mode;
        kvm_fd = open(KVM_DEV_PATH, perm);
-       TEST_ASSERT(kvm_fd >= 0, "open %s failed, rc: %i errno: %i",
-               KVM_DEV_PATH, kvm_fd, errno);
+       if (kvm_fd < 0)
+               exit(KSFT_SKIP);
 
        /* Create VM. */
        vm->fd = ioctl(kvm_fd, KVM_CREATE_VM, NULL);
@@ -418,8 +418,8 @@ struct kvm_cpuid2 *kvm_get_supported_cpuid(void)
 
        cpuid = allocate_kvm_cpuid2();
        kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
-       TEST_ASSERT(kvm_fd >= 0, "open %s failed, rc: %i errno: %i",
-               KVM_DEV_PATH, kvm_fd, errno);
+       if (kvm_fd < 0)
+               exit(KSFT_SKIP);
 
        ret = ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID, cpuid);
        TEST_ASSERT(ret == 0, "KVM_GET_SUPPORTED_CPUID failed %d %d\n",
@@ -675,8 +675,8 @@ static int vcpu_mmap_sz(void)
        int dev_fd, ret;
 
        dev_fd = open(KVM_DEV_PATH, O_RDONLY);
-       TEST_ASSERT(dev_fd >= 0, "%s open %s failed, rc: %i errno: %i",
-               __func__, KVM_DEV_PATH, dev_fd, errno);
+       if (dev_fd < 0)
+               exit(KSFT_SKIP);
 
        ret = ioctl(dev_fd, KVM_GET_VCPU_MMAP_SIZE, NULL);
        TEST_ASSERT(ret >= sizeof(struct kvm_run),
index 428e9473f5e20cec7a3fe2df19d615c259491072..eae1ece3c31b8505e99877bf11d8fa8fb6907cdc 100644 (file)
@@ -85,6 +85,9 @@ static void compare_vcpu_events(struct kvm_vcpu_events *left,
 {
 }
 
+#define TEST_SYNC_FIELDS   (KVM_SYNC_X86_REGS|KVM_SYNC_X86_SREGS|KVM_SYNC_X86_EVENTS)
+#define INVALID_SYNC_FIELD 0x80000000
+
 int main(int argc, char *argv[])
 {
        struct kvm_vm *vm;
@@ -98,9 +101,14 @@ int main(int argc, char *argv[])
        setbuf(stdout, NULL);
 
        cap = kvm_check_cap(KVM_CAP_SYNC_REGS);
-       TEST_ASSERT((unsigned long)cap == KVM_SYNC_X86_VALID_FIELDS,
-                   "KVM_CAP_SYNC_REGS (0x%x) != KVM_SYNC_X86_VALID_FIELDS (0x%lx)\n",
-                   cap, KVM_SYNC_X86_VALID_FIELDS);
+       if ((cap & TEST_SYNC_FIELDS) != TEST_SYNC_FIELDS) {
+               fprintf(stderr, "KVM_CAP_SYNC_REGS not supported, skipping test\n");
+               exit(KSFT_SKIP);
+       }
+       if ((cap & INVALID_SYNC_FIELD) != 0) {
+               fprintf(stderr, "The \"invalid\" field is not invalid, skipping test\n");
+               exit(KSFT_SKIP);
+       }
 
        /* Create VM */
        vm = vm_create_default(VCPU_ID, guest_code);
@@ -108,7 +116,14 @@ int main(int argc, char *argv[])
        run = vcpu_state(vm, VCPU_ID);
 
        /* Request reading invalid register set from VCPU. */
-       run->kvm_valid_regs = KVM_SYNC_X86_VALID_FIELDS << 1;
+       run->kvm_valid_regs = INVALID_SYNC_FIELD;
+       rv = _vcpu_run(vm, VCPU_ID);
+       TEST_ASSERT(rv < 0 && errno == EINVAL,
+                   "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n",
+                   rv);
+       vcpu_state(vm, VCPU_ID)->kvm_valid_regs = 0;
+
+       run->kvm_valid_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS;
        rv = _vcpu_run(vm, VCPU_ID);
        TEST_ASSERT(rv < 0 && errno == EINVAL,
                    "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n",
@@ -116,7 +131,14 @@ int main(int argc, char *argv[])
        vcpu_state(vm, VCPU_ID)->kvm_valid_regs = 0;
 
        /* Request setting invalid register set into VCPU. */
-       run->kvm_dirty_regs = KVM_SYNC_X86_VALID_FIELDS << 1;
+       run->kvm_dirty_regs = INVALID_SYNC_FIELD;
+       rv = _vcpu_run(vm, VCPU_ID);
+       TEST_ASSERT(rv < 0 && errno == EINVAL,
+                   "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n",
+                   rv);
+       vcpu_state(vm, VCPU_ID)->kvm_dirty_regs = 0;
+
+       run->kvm_dirty_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS;
        rv = _vcpu_run(vm, VCPU_ID);
        TEST_ASSERT(rv < 0 && errno == EINVAL,
                    "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n",
@@ -125,7 +147,7 @@ int main(int argc, char *argv[])
 
        /* Request and verify all valid register sets. */
        /* TODO: BUILD TIME CHECK: TEST_ASSERT(KVM_SYNC_X86_NUM_FIELDS != 3); */
-       run->kvm_valid_regs = KVM_SYNC_X86_VALID_FIELDS;
+       run->kvm_valid_regs = TEST_SYNC_FIELDS;
        rv = _vcpu_run(vm, VCPU_ID);
        TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
                    "Unexpected exit reason: %u (%s),\n",
@@ -146,7 +168,7 @@ int main(int argc, char *argv[])
        run->s.regs.sregs.apic_base = 1 << 11;
        /* TODO run->s.regs.events.XYZ = ABC; */
 
-       run->kvm_valid_regs = KVM_SYNC_X86_VALID_FIELDS;
+       run->kvm_valid_regs = TEST_SYNC_FIELDS;
        run->kvm_dirty_regs = KVM_SYNC_X86_REGS | KVM_SYNC_X86_SREGS;
        rv = _vcpu_run(vm, VCPU_ID);
        TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
@@ -172,7 +194,7 @@ int main(int argc, char *argv[])
        /* Clear kvm_dirty_regs bits, verify new s.regs values are
         * overwritten with existing guest values.
         */
-       run->kvm_valid_regs = KVM_SYNC_X86_VALID_FIELDS;
+       run->kvm_valid_regs = TEST_SYNC_FIELDS;
        run->kvm_dirty_regs = 0;
        run->s.regs.regs.r11 = 0xDEADBEEF;
        rv = _vcpu_run(vm, VCPU_ID);
@@ -211,7 +233,7 @@ int main(int argc, char *argv[])
         * with kvm_sync_regs values.
         */
        run->kvm_valid_regs = 0;
-       run->kvm_dirty_regs = KVM_SYNC_X86_VALID_FIELDS;
+       run->kvm_dirty_regs = TEST_SYNC_FIELDS;
        run->s.regs.regs.r11 = 0xBBBB;
        rv = _vcpu_run(vm, VCPU_ID);
        TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
index 8f7f62093add0fca51d5923196189cc3110e9597..aaa633263b2c42b38445e3b73815a9f2db977670 100644 (file)
@@ -189,8 +189,8 @@ int main(int argc, char *argv[])
        struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1);
 
        if (!(entry->ecx & CPUID_VMX)) {
-               printf("nested VMX not enabled, skipping test");
-               return 0;
+               fprintf(stderr, "nested VMX not enabled, skipping test\n");
+               exit(KSFT_SKIP);
        }
 
        vm = vm_create_default_vmx(VCPU_ID, (void *) l1_guest_code);
index 195e9d4739a98d6aee0fb10f2c074b2a55bcf531..c1b1a4dc6a964dd162b05dd3528dfbab91a7f3ce 100644 (file)
@@ -20,10 +20,10 @@ all: $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES)
 
 .ONESHELL:
 define RUN_TESTS
-       @export KSFT_TAP_LEVEL=`echo 1`;
-       @test_num=`echo 0`;
-       @echo "TAP version 13";
-       @for TEST in $(1); do                           \
+       @export KSFT_TAP_LEVEL=`echo 1`;                \
+       test_num=`echo 0`;                              \
+       echo "TAP version 13";                          \
+       for TEST in $(1); do                            \
                BASENAME_TEST=`basename $$TEST`;        \
                test_num=`echo $$test_num+1 | bc`;      \
                echo "selftests: $$BASENAME_TEST";      \
index 5559f6add04680522bede6f9073cc961bec5b1d6..f0e6c35a93ae941c6494f2df80f8034dc46528c9 100644 (file)
@@ -8,3 +8,6 @@ reuseport_bpf_numa
 reuseport_dualstack
 reuseaddr_conflict
 tcp_mmap
+udpgso
+udpgso_bench_rx
+udpgso_bench_tx
index c3761c35f542ea78f8113ebd5c41740c86904e6a..e60dddbf963c76056deb7aeb4517c0079b1b305b 100644 (file)
@@ -5,10 +5,13 @@ CFLAGS =  -Wall -Wl,--no-as-needed -O2 -g
 CFLAGS += -I../../../../usr/include/
 
 TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh
-TEST_PROGS += fib_tests.sh fib-onlink-tests.sh in_netns.sh pmtu.sh
+TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh udpgso.sh
+TEST_PROGS += udpgso_bench.sh
+TEST_PROGS_EXTENDED := in_netns.sh
 TEST_GEN_FILES =  socket
 TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy
-TEST_GEN_FILES += tcp_mmap
+TEST_GEN_FILES += tcp_mmap tcp_inq
+TEST_GEN_FILES += udpgso udpgso_bench_tx udpgso_bench_rx
 TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
 TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict
 
@@ -16,3 +19,4 @@ include ../lib.mk
 
 $(OUTPUT)/reuseport_bpf_numa: LDFLAGS += -lnuma
 $(OUTPUT)/tcp_mmap: LDFLAGS += -lpthread
+$(OUTPUT)/tcp_inq: LDFLAGS += -lpthread
index 75d922438bc9768827a2d12a6ac2b1044e1d0f37..d8313d0438b7422df129f2fe5868236119eca991 100755 (executable)
@@ -1,6 +1,7 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
+ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding"
 NUM_NETIFS=4
 CHECK_TC="yes"
 source lib.sh
@@ -75,14 +76,31 @@ cleanup()
        vrf_cleanup
 }
 
+ping_ipv4()
+{
+       ping_test $h1 192.0.2.2
+}
+
+ping_ipv6()
+{
+       ping6_test $h1 2001:db8:1::2
+}
+
+learning()
+{
+       learning_test "br0" $swp1 $h1 $h2
+}
+
+flooding()
+{
+       flood_test $swp2 $h1 $h2
+}
+
 trap cleanup EXIT
 
 setup_prepare
 setup_wait
 
-ping_test $h1 192.0.2.2
-ping6_test $h1 2001:db8:1::2
-learning_test "br0" $swp1 $h1 $h2
-flood_test $swp2 $h1 $h2
+tests_run
 
 exit $EXIT_STATUS
index 1cddf06f691dd892fa2c922dfa8c26a023ee32e2..c15c6c85c9849ba768e74c478028a2d837c40426 100755 (executable)
@@ -1,6 +1,7 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
+ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding"
 NUM_NETIFS=4
 source lib.sh
 
@@ -73,14 +74,31 @@ cleanup()
        vrf_cleanup
 }
 
+ping_ipv4()
+{
+       ping_test $h1 192.0.2.2
+}
+
+ping_ipv6()
+{
+       ping6_test $h1 2001:db8:1::2
+}
+
+learning()
+{
+       learning_test "br0" $swp1 $h1 $h2
+}
+
+flooding()
+{
+       flood_test $swp2 $h1 $h2
+}
+
 trap cleanup EXIT
 
 setup_prepare
 setup_wait
 
-ping_test $h1 192.0.2.2
-ping6_test $h1 2001:db8:1::2
-learning_test "br0" $swp1 $h1 $h2
-flood_test $swp2 $h1 $h2
+tests_run
 
 exit $EXIT_STATUS
index 1ac6c62271f352329dbc45407df11b6f238cb07c..91041c49655baee70bb81b4af326def01a90cdb1 100644 (file)
@@ -321,6 +321,25 @@ simple_if_fini()
        vrf_destroy $vrf_name
 }
 
+tunnel_create()
+{
+       local name=$1; shift
+       local type=$1; shift
+       local local=$1; shift
+       local remote=$1; shift
+
+       ip link add name $name type $type \
+          local $local remote $remote "$@"
+       ip link set dev $name up
+}
+
+tunnel_destroy()
+{
+       local name=$1; shift
+
+       ip link del dev $name
+}
+
 master_name_get()
 {
        local if_name=$1
@@ -335,6 +354,15 @@ link_stats_tx_packets_get()
        ip -j -s link show dev $if_name | jq '.[]["stats64"]["tx"]["packets"]'
 }
 
+tc_rule_stats_get()
+{
+       local dev=$1; shift
+       local pref=$1; shift
+
+       tc -j -s filter show dev $dev ingress pref $pref |
+       jq '.[1].options.actions[].stats.packets'
+}
+
 mac_get()
 {
        local if_name=$1
@@ -353,19 +381,33 @@ bridge_ageing_time_get()
        echo $((ageing_time / 100))
 }
 
-forwarding_enable()
+declare -A SYSCTL_ORIG
+sysctl_set()
+{
+       local key=$1; shift
+       local value=$1; shift
+
+       SYSCTL_ORIG[$key]=$(sysctl -n $key)
+       sysctl -qw $key=$value
+}
+
+sysctl_restore()
 {
-       ipv4_fwd=$(sysctl -n net.ipv4.conf.all.forwarding)
-       ipv6_fwd=$(sysctl -n net.ipv6.conf.all.forwarding)
+       local key=$1; shift
 
-       sysctl -q -w net.ipv4.conf.all.forwarding=1
-       sysctl -q -w net.ipv6.conf.all.forwarding=1
+       sysctl -qw $key=${SYSCTL_ORIG["$key"]}
+}
+
+forwarding_enable()
+{
+       sysctl_set net.ipv4.conf.all.forwarding 1
+       sysctl_set net.ipv6.conf.all.forwarding 1
 }
 
 forwarding_restore()
 {
-       sysctl -q -w net.ipv6.conf.all.forwarding=$ipv6_fwd
-       sysctl -q -w net.ipv4.conf.all.forwarding=$ipv4_fwd
+       sysctl_restore net.ipv6.conf.all.forwarding
+       sysctl_restore net.ipv4.conf.all.forwarding
 }
 
 tc_offload_check()
@@ -381,6 +423,83 @@ tc_offload_check()
        return 0
 }
 
+slow_path_trap_install()
+{
+       local dev=$1; shift
+       local direction=$1; shift
+
+       if [ "${tcflags/skip_hw}" != "$tcflags" ]; then
+               # For slow-path testing, we need to install a trap to get to
+               # slow path the packets that would otherwise be switched in HW.
+               tc filter add dev $dev $direction pref 1 \
+                  flower skip_sw action trap
+       fi
+}
+
+slow_path_trap_uninstall()
+{
+       local dev=$1; shift
+       local direction=$1; shift
+
+       if [ "${tcflags/skip_hw}" != "$tcflags" ]; then
+               tc filter del dev $dev $direction pref 1 flower skip_sw
+       fi
+}
+
+__icmp_capture_add_del()
+{
+       local add_del=$1; shift
+       local pref=$1; shift
+       local vsuf=$1; shift
+       local tundev=$1; shift
+       local filter=$1; shift
+
+       tc filter $add_del dev "$tundev" ingress \
+          proto ip$vsuf pref $pref \
+          flower ip_proto icmp$vsuf $filter \
+          action pass
+}
+
+icmp_capture_install()
+{
+       __icmp_capture_add_del add 100 "" "$@"
+}
+
+icmp_capture_uninstall()
+{
+       __icmp_capture_add_del del 100 "" "$@"
+}
+
+icmp6_capture_install()
+{
+       __icmp_capture_add_del add 100 v6 "$@"
+}
+
+icmp6_capture_uninstall()
+{
+       __icmp_capture_add_del del 100 v6 "$@"
+}
+
+matchall_sink_create()
+{
+       local dev=$1; shift
+
+       tc qdisc add dev $dev clsact
+       tc filter add dev $dev ingress \
+          pref 10000 \
+          matchall \
+          action drop
+}
+
+tests_run()
+{
+       local current_test
+
+       for current_test in ${TESTS:-$ALL_TESTS}; do
+               $current_test
+       done
+}
+
 ##############################################################################
 # Tests
 
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre.sh b/tools/testing/selftests/net/forwarding/mirror_gre.sh
new file mode 100755 (executable)
index 0000000..c6786d1
--- /dev/null
@@ -0,0 +1,161 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing gretap. See
+# mirror_gre_topo_lib.sh for more details.
+#
+# Test for "tc action mirred egress mirror" when the device to mirror to is a
+# gretap or ip6gretap netdevice. Expect that the packets come out encapsulated,
+# and another gretap / ip6gretap netdevice is then capable of decapsulating the
+# traffic. Test that the payload is what is expected (ICMP ping request or
+# reply, depending on test).
+
+ALL_TESTS="
+       test_gretap
+       test_ip6gretap
+       test_gretap_mac
+       test_ip6gretap_mac
+       test_two_spans
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+source mirror_gre_topo_lib.sh
+
+setup_prepare()
+{
+       h1=${NETIFS[p1]}
+       swp1=${NETIFS[p2]}
+
+       swp2=${NETIFS[p3]}
+       h2=${NETIFS[p4]}
+
+       swp3=${NETIFS[p5]}
+       h3=${NETIFS[p6]}
+
+       vrf_prepare
+       mirror_gre_topo_create
+
+       ip address add dev $swp3 192.0.2.129/28
+       ip address add dev $h3 192.0.2.130/28
+
+       ip address add dev $swp3 2001:db8:2::1/64
+       ip address add dev $h3 2001:db8:2::2/64
+}
+
+cleanup()
+{
+       pre_cleanup
+
+       ip address del dev $h3 2001:db8:2::2/64
+       ip address del dev $swp3 2001:db8:2::1/64
+
+       ip address del dev $h3 192.0.2.130/28
+       ip address del dev $swp3 192.0.2.129/28
+
+       mirror_gre_topo_destroy
+       vrf_cleanup
+}
+
+test_span_gre_mac()
+{
+       local tundev=$1; shift
+       local direction=$1; shift
+       local prot=$1; shift
+       local what=$1; shift
+
+       local swp3mac=$(mac_get $swp3)
+       local h3mac=$(mac_get $h3)
+
+       RET=0
+
+       mirror_install $swp1 $direction $tundev "matchall $tcflags"
+       tc qdisc add dev $h3 clsact
+       tc filter add dev $h3 ingress pref 77 prot $prot \
+               flower ip_proto 0x2f src_mac $swp3mac dst_mac $h3mac \
+               action pass
+
+       mirror_test v$h1 192.0.2.1 192.0.2.2 $h3 77 10
+
+       tc filter del dev $h3 ingress pref 77
+       tc qdisc del dev $h3 clsact
+       mirror_uninstall $swp1 $direction
+
+       log_test "$direction $what: envelope MAC ($tcflags)"
+}
+
+test_two_spans()
+{
+       RET=0
+
+       mirror_install $swp1 ingress gt4 "matchall $tcflags"
+       mirror_install $swp1 egress gt6 "matchall $tcflags"
+       quick_test_span_gre_dir gt4 ingress
+       quick_test_span_gre_dir gt6 egress
+
+       mirror_uninstall $swp1 ingress
+       fail_test_span_gre_dir gt4 ingress
+       quick_test_span_gre_dir gt6 egress
+
+       mirror_install $swp1 ingress gt4 "matchall $tcflags"
+       mirror_uninstall $swp1 egress
+       quick_test_span_gre_dir gt4 ingress
+       fail_test_span_gre_dir gt6 egress
+
+       mirror_uninstall $swp1 ingress
+       log_test "two simultaneously configured mirrors ($tcflags)"
+}
+
+test_gretap()
+{
+       full_test_span_gre_dir gt4 ingress 8 0 "mirror to gretap"
+       full_test_span_gre_dir gt4 egress 0 8 "mirror to gretap"
+}
+
+test_ip6gretap()
+{
+       full_test_span_gre_dir gt6 ingress 8 0 "mirror to ip6gretap"
+       full_test_span_gre_dir gt6 egress 0 8 "mirror to ip6gretap"
+}
+
+test_gretap_mac()
+{
+       test_span_gre_mac gt4 ingress ip "mirror to gretap"
+       test_span_gre_mac gt4 egress ip "mirror to gretap"
+}
+
+test_ip6gretap_mac()
+{
+       test_span_gre_mac gt6 ingress ipv6 "mirror to ip6gretap"
+       test_span_gre_mac gt6 egress ipv6 "mirror to ip6gretap"
+}
+
+test_all()
+{
+       slow_path_trap_install $swp1 ingress
+       slow_path_trap_install $swp1 egress
+
+       tests_run
+
+       slow_path_trap_uninstall $swp1 egress
+       slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+       echo "WARN: Could not test offloaded functionality"
+else
+       tcflags="skip_sw"
+       test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh
new file mode 100755 (executable)
index 0000000..360ca13
--- /dev/null
@@ -0,0 +1,226 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+#   +---------------------+                             +---------------------+
+#   | H1                  |                             |                  H2 |
+#   |     + $h1           |                             |           $h2 +     |
+#   |     | 192.0.2.1/28  |                             |  192.0.2.2/28 |     |
+#   +-----|---------------+                             +---------------|-----+
+#         |                                                             |
+#   +-----|-------------------------------------------------------------|-----+
+#   | SW  o--> mirror                                                   |     |
+#   | +---|-------------------------------------------------------------|---+ |
+#   | |   + $swp1                    BR                           $swp2 +   | |
+#   | +---------------------------------------------------------------------+ |
+#   |                                                                         |
+#   | +---------------------------------------------------------------------+ |
+#   | | OL                      + gt6 (ip6gretap)      + gt4 (gretap)       | |
+#   | |                         : loc=2001:db8:2::1    : loc=192.0.2.129    | |
+#   | |                         : rem=2001:db8:2::2    : rem=192.0.2.130    | |
+#   | |                         : ttl=100              : ttl=100            | |
+#   | |                         : tos=inherit          : tos=inherit        | |
+#   | +-------------------------:--|-------------------:--|-----------------+ |
+#   |                           :  |                   :  |                   |
+#   | +-------------------------:--|-------------------:--|-----------------+ |
+#   | | UL                      :  |,---------------------'                 | |
+#   | |   + $swp3               :  ||                  :                    | |
+#   | |   | 192.0.2.129/28      :  vv                  :                    | |
+#   | |   | 2001:db8:2::1/64    :  + ul (dummy)        :                    | |
+#   | +---|---------------------:----------------------:--------------------+ |
+#   +-----|---------------------:----------------------:----------------------+
+#         |                     :                      :
+#   +-----|---------------------:----------------------:----------------------+
+#   | H3  + $h3                 + h3-gt6 (ip6gretap)   + h3-gt4 (gretap)      |
+#   |       192.0.2.130/28        loc=2001:db8:2::2      loc=192.0.2.130      |
+#   |       2001:db8:2::2/64      rem=2001:db8:2::1      rem=192.0.2.129      |
+#   |                             ttl=100                ttl=100              |
+#   |                             tos=inherit            tos=inherit          |
+#   |                                                                         |
+#   +-------------------------------------------------------------------------+
+#
+# This tests mirroring to gretap and ip6gretap configured in an overlay /
+# underlay manner, i.e. with a bound dummy device that marks underlay VRF where
+# the encapsulated packed should be routed.
+
+ALL_TESTS="
+       test_gretap
+       test_ip6gretap
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+
+h1_create()
+{
+       simple_if_init $h1 192.0.2.1/28
+}
+
+h1_destroy()
+{
+       simple_if_fini $h1 192.0.2.1/28
+}
+
+h2_create()
+{
+       simple_if_init $h2 192.0.2.2/28
+}
+
+h2_destroy()
+{
+       simple_if_fini $h2 192.0.2.2/28
+}
+
+h3_create()
+{
+       simple_if_init $h3 192.0.2.130/28 2001:db8:2::2/64
+
+       tunnel_create h3-gt4 gretap 192.0.2.130 192.0.2.129
+       ip link set h3-gt4 vrf v$h3
+       matchall_sink_create h3-gt4
+
+       tunnel_create h3-gt6 ip6gretap 2001:db8:2::2 2001:db8:2::1
+       ip link set h3-gt6 vrf v$h3
+       matchall_sink_create h3-gt6
+}
+
+h3_destroy()
+{
+       tunnel_destroy h3-gt6
+       tunnel_destroy h3-gt4
+
+       simple_if_fini $h3 192.0.2.130/28 2001:db8:2::2/64
+}
+
+switch_create()
+{
+       # Bridge between H1 and H2.
+
+       ip link add name br1 type bridge vlan_filtering 1
+       ip link set dev br1 up
+
+       ip link set dev $swp1 master br1
+       ip link set dev $swp1 up
+
+       ip link set dev $swp2 master br1
+       ip link set dev $swp2 up
+
+       tc qdisc add dev $swp1 clsact
+
+       # Underlay.
+
+       simple_if_init $swp3 192.0.2.129/28 2001:db8:2::1/64
+
+       ip link add name ul type dummy
+       ip link set dev ul master v$swp3
+       ip link set dev ul up
+
+       # Overlay.
+
+       vrf_create vrf-ol
+       ip link set dev vrf-ol up
+
+       tunnel_create gt4 gretap 192.0.2.129 192.0.2.130 \
+                     ttl 100 tos inherit dev ul
+       ip link set dev gt4 master vrf-ol
+       ip link set dev gt4 up
+
+       tunnel_create gt6 ip6gretap 2001:db8:2::1 2001:db8:2::2 \
+                     ttl 100 tos inherit dev ul allow-localremote
+       ip link set dev gt6 master vrf-ol
+       ip link set dev gt6 up
+}
+
+switch_destroy()
+{
+       vrf_destroy vrf-ol
+
+       tunnel_destroy gt6
+       tunnel_destroy gt4
+
+       simple_if_fini $swp3 192.0.2.129/28 2001:db8:2::1/64
+
+       ip link del dev ul
+
+       tc qdisc del dev $swp1 clsact
+
+       ip link set dev $swp1 down
+       ip link set dev $swp2 down
+       ip link del dev br1
+}
+
+setup_prepare()
+{
+       h1=${NETIFS[p1]}
+       swp1=${NETIFS[p2]}
+
+       swp2=${NETIFS[p3]}
+       h2=${NETIFS[p4]}
+
+       swp3=${NETIFS[p5]}
+       h3=${NETIFS[p6]}
+
+       vrf_prepare
+
+       h1_create
+       h2_create
+       h3_create
+
+       switch_create
+}
+
+cleanup()
+{
+       pre_cleanup
+
+       switch_destroy
+
+       h3_destroy
+       h2_destroy
+       h1_destroy
+
+       vrf_cleanup
+}
+
+test_gretap()
+{
+       full_test_span_gre_dir gt4 ingress 8 0 "mirror to gretap w/ UL"
+       full_test_span_gre_dir gt4 egress  0 8 "mirror to gretap w/ UL"
+}
+
+test_ip6gretap()
+{
+       full_test_span_gre_dir gt6 ingress 8 0 "mirror to ip6gretap w/ UL"
+       full_test_span_gre_dir gt6 egress  0 8 "mirror to ip6gretap w/ UL"
+}
+
+test_all()
+{
+       RET=0
+
+       slow_path_trap_install $swp1 ingress
+       slow_path_trap_install $swp1 egress
+
+       tests_run
+
+       slow_path_trap_uninstall $swp1 egress
+       slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+       echo "WARN: Could not test offloaded functionality"
+else
+       tcflags="skip_sw"
+       test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh b/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh
new file mode 100755 (executable)
index 0000000..50ab346
--- /dev/null
@@ -0,0 +1,212 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing gretap. See
+# mirror_gre_topo_lib.sh for more details.
+#
+# Test how mirrors to gretap and ip6gretap react to changes to relevant
+# configuration.
+
+ALL_TESTS="
+       test_ttl
+       test_tun_up
+       test_egress_up
+       test_remote_ip
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+source mirror_gre_topo_lib.sh
+
+setup_prepare()
+{
+       h1=${NETIFS[p1]}
+       swp1=${NETIFS[p2]}
+
+       swp2=${NETIFS[p3]}
+       h2=${NETIFS[p4]}
+
+       swp3=${NETIFS[p5]}
+       h3=${NETIFS[p6]}
+
+       vrf_prepare
+       mirror_gre_topo_create
+
+       # This test downs $swp3, which deletes the configured IPv6 address
+       # unless this sysctl is set.
+       sysctl_set net.ipv6.conf.$swp3.keep_addr_on_down 1
+
+       ip address add dev $swp3 192.0.2.129/28
+       ip address add dev $h3 192.0.2.130/28
+
+       ip address add dev $swp3 2001:db8:2::1/64
+       ip address add dev $h3 2001:db8:2::2/64
+}
+
+cleanup()
+{
+       pre_cleanup
+
+       ip address del dev $h3 2001:db8:2::2/64
+       ip address del dev $swp3 2001:db8:2::1/64
+
+       ip address del dev $h3 192.0.2.130/28
+       ip address del dev $swp3 192.0.2.129/28
+
+       sysctl_restore net.ipv6.conf.$swp3.keep_addr_on_down
+
+       mirror_gre_topo_destroy
+       vrf_cleanup
+}
+
+test_span_gre_ttl()
+{
+       local tundev=$1; shift
+       local type=$1; shift
+       local prot=$1; shift
+       local what=$1; shift
+
+       RET=0
+
+       mirror_install $swp1 ingress $tundev "matchall $tcflags"
+       tc qdisc add dev $h3 clsact
+       tc filter add dev $h3 ingress pref 77 prot $prot \
+               flower ip_ttl 50 action pass
+
+       mirror_test v$h1 192.0.2.1 192.0.2.2 $h3 77 0
+
+       ip link set dev $tundev type $type ttl 50
+       mirror_test v$h1 192.0.2.1 192.0.2.2 $h3 77 10
+
+       ip link set dev $tundev type $type ttl 100
+       tc filter del dev $h3 ingress pref 77
+       tc qdisc del dev $h3 clsact
+       mirror_uninstall $swp1 ingress
+
+       log_test "$what: TTL change ($tcflags)"
+}
+
+test_span_gre_tun_up()
+{
+       local tundev=$1; shift
+       local what=$1; shift
+
+       RET=0
+
+       ip link set dev $tundev down
+       mirror_install $swp1 ingress $tundev "matchall $tcflags"
+       fail_test_span_gre_dir $tundev ingress
+
+       ip link set dev $tundev up
+
+       quick_test_span_gre_dir $tundev ingress
+       mirror_uninstall $swp1 ingress
+
+       log_test "$what: tunnel down/up ($tcflags)"
+}
+
+test_span_gre_egress_up()
+{
+       local tundev=$1; shift
+       local remote_ip=$1; shift
+       local what=$1; shift
+
+       RET=0
+
+       ip link set dev $swp3 down
+       mirror_install $swp1 ingress $tundev "matchall $tcflags"
+       fail_test_span_gre_dir $tundev ingress
+
+       # After setting the device up, wait for neighbor to get resolved so that
+       # we can expect mirroring to work.
+       ip link set dev $swp3 up
+       while true; do
+               ip neigh sh dev $swp3 $remote_ip nud reachable |
+                   grep -q ^
+               if [[ $? -ne 0 ]]; then
+                       sleep 1
+               else
+                       break
+               fi
+       done
+
+       quick_test_span_gre_dir $tundev ingress
+       mirror_uninstall $swp1 ingress
+
+       log_test "$what: egress down/up ($tcflags)"
+}
+
+test_span_gre_remote_ip()
+{
+       local tundev=$1; shift
+       local type=$1; shift
+       local correct_ip=$1; shift
+       local wrong_ip=$1; shift
+       local what=$1; shift
+
+       RET=0
+
+       ip link set dev $tundev type $type remote $wrong_ip
+       mirror_install $swp1 ingress $tundev "matchall $tcflags"
+       fail_test_span_gre_dir $tundev ingress
+
+       ip link set dev $tundev type $type remote $correct_ip
+       quick_test_span_gre_dir $tundev ingress
+       mirror_uninstall $swp1 ingress
+
+       log_test "$what: remote address change ($tcflags)"
+}
+
+test_ttl()
+{
+       test_span_gre_ttl gt4 gretap ip "mirror to gretap"
+       test_span_gre_ttl gt6 ip6gretap ipv6 "mirror to ip6gretap"
+}
+
+test_tun_up()
+{
+       test_span_gre_tun_up gt4 "mirror to gretap"
+       test_span_gre_tun_up gt6 "mirror to ip6gretap"
+}
+
+test_egress_up()
+{
+       test_span_gre_egress_up gt4 192.0.2.130 "mirror to gretap"
+       test_span_gre_egress_up gt6 2001:db8:2::2 "mirror to ip6gretap"
+}
+
+test_remote_ip()
+{
+       test_span_gre_remote_ip gt4 gretap 192.0.2.130 192.0.2.132 "mirror to gretap"
+       test_span_gre_remote_ip gt6 ip6gretap 2001:db8:2::2 2001:db8:2::4 "mirror to ip6gretap"
+}
+
+test_all()
+{
+       slow_path_trap_install $swp1 ingress
+       slow_path_trap_install $swp1 egress
+
+       tests_run
+
+       slow_path_trap_uninstall $swp1 egress
+       slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+       echo "WARN: Could not test offloaded functionality"
+else
+       tcflags="skip_sw"
+       test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_flower.sh b/tools/testing/selftests/net/forwarding/mirror_gre_flower.sh
new file mode 100755 (executable)
index 0000000..2e54407
--- /dev/null
@@ -0,0 +1,129 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing gretap. See
+# mirror_gre_topo_lib.sh for more details.
+#
+# This tests flower-triggered mirroring to gretap and ip6gretap netdevices. The
+# interfaces on H1 and H2 have two addresses each. Flower match on one of the
+# addresses is configured with mirror action. It is expected that when pinging
+# this address, mirroring takes place, whereas when pinging the other one,
+# there's no mirroring.
+
+ALL_TESTS="
+       test_gretap
+       test_ip6gretap
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+source mirror_gre_topo_lib.sh
+
+setup_prepare()
+{
+       h1=${NETIFS[p1]}
+       swp1=${NETIFS[p2]}
+
+       swp2=${NETIFS[p3]}
+       h2=${NETIFS[p4]}
+
+       swp3=${NETIFS[p5]}
+       h3=${NETIFS[p6]}
+
+       vrf_prepare
+       mirror_gre_topo_create
+
+       ip address add dev $swp3 192.0.2.129/28
+       ip address add dev $h3 192.0.2.130/28
+
+       ip address add dev $swp3 2001:db8:2::1/64
+       ip address add dev $h3 2001:db8:2::2/64
+
+       ip address add dev $h1 192.0.2.3/28
+       ip address add dev $h2 192.0.2.4/28
+}
+
+cleanup()
+{
+       pre_cleanup
+
+       ip address del dev $h2 192.0.2.4/28
+       ip address del dev $h1 192.0.2.3/28
+
+       ip address del dev $h3 2001:db8:2::2/64
+       ip address del dev $swp3 2001:db8:2::1/64
+
+       ip address del dev $h3 192.0.2.130/28
+       ip address del dev $swp3 192.0.2.129/28
+
+       mirror_gre_topo_destroy
+       vrf_cleanup
+}
+
+test_span_gre_dir_acl()
+{
+       test_span_gre_dir_ips "$@" 192.0.2.3 192.0.2.4
+}
+
+full_test_span_gre_dir_acl()
+{
+       local tundev=$1; shift
+       local direction=$1; shift
+       local forward_type=$1; shift
+       local backward_type=$1; shift
+       local match_dip=$1; shift
+       local what=$1; shift
+
+       mirror_install $swp1 $direction $tundev \
+                      "protocol ip flower $tcflags dst_ip $match_dip"
+       fail_test_span_gre_dir $tundev $direction
+       test_span_gre_dir_acl "$tundev" "$direction" \
+                         "$forward_type" "$backward_type"
+       mirror_uninstall $swp1 $direction
+
+       log_test "$direction $what ($tcflags)"
+}
+
+test_gretap()
+{
+       full_test_span_gre_dir_acl gt4 ingress 8 0 192.0.2.4 "ACL mirror to gretap"
+       full_test_span_gre_dir_acl gt4 egress 0 8 192.0.2.3 "ACL mirror to gretap"
+}
+
+test_ip6gretap()
+{
+       full_test_span_gre_dir_acl gt6 ingress 8 0 192.0.2.4 "ACL mirror to ip6gretap"
+       full_test_span_gre_dir_acl gt6 egress 0 8 192.0.2.3 "ACL mirror to ip6gretap"
+}
+
+test_all()
+{
+       RET=0
+
+       slow_path_trap_install $swp1 ingress
+       slow_path_trap_install $swp1 egress
+
+       tests_run
+
+       slow_path_trap_uninstall $swp1 egress
+       slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+       echo "WARN: Could not test offloaded functionality"
+else
+       tcflags="skip_sw"
+       test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh b/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh
new file mode 100644 (file)
index 0000000..207ffd1
--- /dev/null
@@ -0,0 +1,85 @@
+# SPDX-License-Identifier: GPL-2.0
+
+do_test_span_gre_dir_ips()
+{
+       local expect=$1; shift
+       local tundev=$1; shift
+       local direction=$1; shift
+       local ip1=$1; shift
+       local ip2=$1; shift
+
+       icmp_capture_install h3-$tundev
+       mirror_test v$h1 $ip1 $ip2 h3-$tundev 100 $expect
+       mirror_test v$h2 $ip2 $ip1 h3-$tundev 100 $expect
+       icmp_capture_uninstall h3-$tundev
+}
+
+quick_test_span_gre_dir_ips()
+{
+       do_test_span_gre_dir_ips 10 "$@"
+}
+
+fail_test_span_gre_dir_ips()
+{
+       do_test_span_gre_dir_ips 0 "$@"
+}
+
+test_span_gre_dir_ips()
+{
+       local tundev=$1; shift
+       local direction=$1; shift
+       local forward_type=$1; shift
+       local backward_type=$1; shift
+       local ip1=$1; shift
+       local ip2=$1; shift
+
+       quick_test_span_gre_dir_ips "$tundev" "$direction" "$ip1" "$ip2"
+
+       icmp_capture_install h3-$tundev "type $forward_type"
+       mirror_test v$h1 $ip1 $ip2 h3-$tundev 100 10
+       icmp_capture_uninstall h3-$tundev
+
+       icmp_capture_install h3-$tundev "type $backward_type"
+       mirror_test v$h2 $ip2 $ip1 h3-$tundev 100 10
+       icmp_capture_uninstall h3-$tundev
+}
+
+full_test_span_gre_dir_ips()
+{
+       local tundev=$1; shift
+       local direction=$1; shift
+       local forward_type=$1; shift
+       local backward_type=$1; shift
+       local what=$1; shift
+       local ip1=$1; shift
+       local ip2=$1; shift
+
+       RET=0
+
+       mirror_install $swp1 $direction $tundev "matchall $tcflags"
+       test_span_gre_dir_ips "$tundev" "$direction" "$forward_type" \
+                             "$backward_type" "$ip1" "$ip2"
+       mirror_uninstall $swp1 $direction
+
+       log_test "$direction $what ($tcflags)"
+}
+
+quick_test_span_gre_dir()
+{
+       quick_test_span_gre_dir_ips "$@" 192.0.2.1 192.0.2.2
+}
+
+fail_test_span_gre_dir()
+{
+       fail_test_span_gre_dir_ips "$@" 192.0.2.1 192.0.2.2
+}
+
+test_span_gre_dir()
+{
+       test_span_gre_dir_ips "$@" 192.0.2.1 192.0.2.2
+}
+
+full_test_span_gre_dir()
+{
+       full_test_span_gre_dir_ips "$@" 192.0.2.1 192.0.2.2
+}
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_neigh.sh b/tools/testing/selftests/net/forwarding/mirror_gre_neigh.sh
new file mode 100755 (executable)
index 0000000..fc0508e
--- /dev/null
@@ -0,0 +1,115 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing gretap. See
+# mirror_gre_topo_lib.sh for more details.
+#
+# Test for mirroring to gretap and ip6gretap, such that the neighbor entry for
+# the tunnel remote address has invalid address at the time that the mirroring
+# is set up. Later on, the neighbor is deleted and it is expected to be
+# reinitialized using the usual ARP process, and the mirroring offload updated.
+
+ALL_TESTS="
+       test_gretap
+       test_ip6gretap
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+source mirror_gre_topo_lib.sh
+
+setup_prepare()
+{
+       h1=${NETIFS[p1]}
+       swp1=${NETIFS[p2]}
+
+       swp2=${NETIFS[p3]}
+       h2=${NETIFS[p4]}
+
+       swp3=${NETIFS[p5]}
+       h3=${NETIFS[p6]}
+
+       vrf_prepare
+       mirror_gre_topo_create
+
+       ip address add dev $swp3 192.0.2.129/28
+       ip address add dev $h3 192.0.2.130/28
+
+       ip address add dev $swp3 2001:db8:2::1/64
+       ip address add dev $h3 2001:db8:2::2/64
+}
+
+cleanup()
+{
+       pre_cleanup
+
+       ip address del dev $h3 2001:db8:2::2/64
+       ip address del dev $swp3 2001:db8:2::1/64
+
+       ip address del dev $h3 192.0.2.130/28
+       ip address del dev $swp3 192.0.2.129/28
+
+       mirror_gre_topo_destroy
+       vrf_cleanup
+}
+
+test_span_gre_neigh()
+{
+       local addr=$1; shift
+       local tundev=$1; shift
+       local direction=$1; shift
+       local what=$1; shift
+
+       RET=0
+
+       ip neigh replace dev $swp3 $addr lladdr 00:11:22:33:44:55
+       mirror_install $swp1 $direction $tundev "matchall $tcflags"
+       fail_test_span_gre_dir $tundev ingress
+       ip neigh del dev $swp3 $addr
+       quick_test_span_gre_dir $tundev ingress
+       mirror_uninstall $swp1 $direction
+
+       log_test "$direction $what: neighbor change ($tcflags)"
+}
+
+test_gretap()
+{
+       test_span_gre_neigh 192.0.2.130 gt4 ingress "mirror to gretap"
+       test_span_gre_neigh 192.0.2.130 gt4 egress "mirror to gretap"
+}
+
+test_ip6gretap()
+{
+       test_span_gre_neigh 2001:db8:2::2 gt6 ingress "mirror to ip6gretap"
+       test_span_gre_neigh 2001:db8:2::2 gt6 egress "mirror to ip6gretap"
+}
+
+test_all()
+{
+       slow_path_trap_install $swp1 ingress
+       slow_path_trap_install $swp1 egress
+
+       tests_run
+
+       slow_path_trap_uninstall $swp1 egress
+       slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+       echo "WARN: Could not test offloaded functionality"
+else
+       tcflags="skip_sw"
+       test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_nh.sh b/tools/testing/selftests/net/forwarding/mirror_gre_nh.sh
new file mode 100755 (executable)
index 0000000..8fa681e
--- /dev/null
@@ -0,0 +1,127 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing gretap. See
+# mirror_gre_topo_lib.sh for more details.
+#
+# Test that gretap and ip6gretap mirroring works when the other tunnel endpoint
+# is reachable through a next-hop route (as opposed to directly-attached route).
+
+ALL_TESTS="
+       test_gretap
+       test_ip6gretap
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+source mirror_gre_topo_lib.sh
+
+setup_prepare()
+{
+       h1=${NETIFS[p1]}
+       swp1=${NETIFS[p2]}
+
+       swp2=${NETIFS[p3]}
+       h2=${NETIFS[p4]}
+
+       swp3=${NETIFS[p5]}
+       h3=${NETIFS[p6]}
+
+       sysctl_set net.ipv4.conf.all.rp_filter 0
+       sysctl_set net.ipv4.conf.$h3.rp_filter 0
+
+       vrf_prepare
+       mirror_gre_topo_create
+
+       ip address add dev $swp3 192.0.2.161/28
+       ip address add dev $h3 192.0.2.162/28
+       ip address add dev gt4 192.0.2.129/32
+       ip address add dev h3-gt4 192.0.2.130/32
+
+       # IPv6 route can't be added after address. Such routes are rejected due
+       # to the gateway address having been configured on the local system. It
+       # works the other way around though.
+       ip address add dev $swp3 2001:db8:4::1/64
+       ip -6 route add 2001:db8:2::2/128 via 2001:db8:4::2
+       ip address add dev $h3 2001:db8:4::2/64
+       ip address add dev gt6 2001:db8:2::1
+       ip address add dev h3-gt6 2001:db8:2::2
+}
+
+cleanup()
+{
+       pre_cleanup
+
+       ip -6 route del 2001:db8:2::2/128 via 2001:db8:4::2
+       ip address del dev $h3 2001:db8:4::2/64
+       ip address del dev $swp3 2001:db8:4::1/64
+
+       ip address del dev $h3 192.0.2.162/28
+       ip address del dev $swp3 192.0.2.161/28
+
+       mirror_gre_topo_destroy
+       vrf_cleanup
+
+       sysctl_restore net.ipv4.conf.$h3.rp_filter
+       sysctl_restore net.ipv4.conf.all.rp_filter
+}
+
+test_gretap()
+{
+       RET=0
+       mirror_install $swp1 ingress gt4 "matchall $tcflags"
+
+       # For IPv4, test that there's no mirroring without the route directing
+       # the traffic to tunnel remote address. Then add it and test that
+       # mirroring starts. For IPv6 we can't test this due to the limitation
+       # that routes for locally-specified IPv6 addresses can't be added.
+       fail_test_span_gre_dir gt4 ingress
+
+       ip route add 192.0.2.130/32 via 192.0.2.162
+       quick_test_span_gre_dir gt4 ingress
+       ip route del 192.0.2.130/32 via 192.0.2.162
+
+       mirror_uninstall $swp1 ingress
+       log_test "mirror to gre with next-hop remote ($tcflags)"
+}
+
+test_ip6gretap()
+{
+       RET=0
+
+       mirror_install $swp1 ingress gt6 "matchall $tcflags"
+       quick_test_span_gre_dir gt6 ingress
+       mirror_uninstall $swp1 ingress
+
+       log_test "mirror to ip6gre with next-hop remote ($tcflags)"
+}
+
+test_all()
+{
+       slow_path_trap_install $swp1 ingress
+       slow_path_trap_install $swp1 egress
+
+       tests_run
+
+       slow_path_trap_uninstall $swp1 egress
+       slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+       echo "WARN: Could not test offloaded functionality"
+else
+       tcflags="skip_sw"
+       test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh b/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh
new file mode 100644 (file)
index 0000000..b3ceda2
--- /dev/null
@@ -0,0 +1,129 @@
+# SPDX-License-Identifier: GPL-2.0
+
+# This is the standard topology for testing mirroring to gretap and ip6gretap
+# netdevices. The tests that use it tweak it in one way or another--importantly,
+# $swp3 and $h3 need to have addresses set up.
+#
+#   +---------------------+                             +---------------------+
+#   | H1                  |                             |                  H2 |
+#   |     + $h1           |                             |           $h2 +     |
+#   |     | 192.0.2.1/28  |                             |  192.0.2.2/28 |     |
+#   +-----|---------------+                             +---------------|-----+
+#         |                                                             |
+#   +-----|-------------------------------------------------------------|-----+
+#   | SW  o--> mirror                                                   |     |
+#   | +---|-------------------------------------------------------------|---+ |
+#   | |   + $swp1                    BR                           $swp2 +   | |
+#   | +---------------------------------------------------------------------+ |
+#   |                                                                         |
+#   |     + $swp3               + gt6 (ip6gretap)      + gt4 (gretap)         |
+#   |     |                     : loc=2001:db8:2::1    : loc=192.0.2.129      |
+#   |     |                     : rem=2001:db8:2::2    : rem=192.0.2.130      |
+#   |     |                     : ttl=100              : ttl=100              |
+#   |     |                     : tos=inherit          : tos=inherit          |
+#   |     |                     :                      :                      |
+#   +-----|---------------------:----------------------:----------------------+
+#         |                     :                      :
+#   +-----|---------------------:----------------------:----------------------+
+#   | H3  + $h3                 + h3-gt6 (ip6gretap)   + h3-gt4 (gretap)      |
+#   |                             loc=2001:db8:2::2      loc=192.0.2.130      |
+#   |                             rem=2001:db8:2::1      rem=192.0.2.129      |
+#   |                             ttl=100                ttl=100              |
+#   |                             tos=inherit            tos=inherit          |
+#   |                                                                         |
+#   +-------------------------------------------------------------------------+
+
+mirror_gre_topo_h1_create()
+{
+       simple_if_init $h1 192.0.2.1/28
+}
+
+mirror_gre_topo_h1_destroy()
+{
+       simple_if_fini $h1 192.0.2.1/28
+}
+
+mirror_gre_topo_h2_create()
+{
+       simple_if_init $h2 192.0.2.2/28
+}
+
+mirror_gre_topo_h2_destroy()
+{
+       simple_if_fini $h2 192.0.2.2/28
+}
+
+mirror_gre_topo_h3_create()
+{
+       simple_if_init $h3
+
+       tunnel_create h3-gt4 gretap 192.0.2.130 192.0.2.129
+       ip link set h3-gt4 vrf v$h3
+       matchall_sink_create h3-gt4
+
+       tunnel_create h3-gt6 ip6gretap 2001:db8:2::2 2001:db8:2::1
+       ip link set h3-gt6 vrf v$h3
+       matchall_sink_create h3-gt6
+}
+
+mirror_gre_topo_h3_destroy()
+{
+       tunnel_destroy h3-gt6
+       tunnel_destroy h3-gt4
+
+       simple_if_fini $h3
+}
+
+mirror_gre_topo_switch_create()
+{
+       ip link set dev $swp3 up
+
+       ip link add name br1 type bridge vlan_filtering 1
+       ip link set dev br1 up
+
+       ip link set dev $swp1 master br1
+       ip link set dev $swp1 up
+
+       ip link set dev $swp2 master br1
+       ip link set dev $swp2 up
+
+       tunnel_create gt4 gretap 192.0.2.129 192.0.2.130 \
+                     ttl 100 tos inherit
+
+       tunnel_create gt6 ip6gretap 2001:db8:2::1 2001:db8:2::2 \
+                     ttl 100 tos inherit allow-localremote
+
+       tc qdisc add dev $swp1 clsact
+}
+
+mirror_gre_topo_switch_destroy()
+{
+       tc qdisc del dev $swp1 clsact
+
+       tunnel_destroy gt6
+       tunnel_destroy gt4
+
+       ip link set dev $swp1 down
+       ip link set dev $swp2 down
+       ip link del dev br1
+
+       ip link set dev $swp3 down
+}
+
+mirror_gre_topo_create()
+{
+       mirror_gre_topo_h1_create
+       mirror_gre_topo_h2_create
+       mirror_gre_topo_h3_create
+
+       mirror_gre_topo_switch_create
+}
+
+mirror_gre_topo_destroy()
+{
+       mirror_gre_topo_switch_destroy
+
+       mirror_gre_topo_h3_destroy
+       mirror_gre_topo_h2_destroy
+       mirror_gre_topo_h1_destroy
+}
diff --git a/tools/testing/selftests/net/forwarding/mirror_lib.sh b/tools/testing/selftests/net/forwarding/mirror_lib.sh
new file mode 100644 (file)
index 0000000..e5028a5
--- /dev/null
@@ -0,0 +1,40 @@
+# SPDX-License-Identifier: GPL-2.0
+
+mirror_install()
+{
+       local from_dev=$1; shift
+       local direction=$1; shift
+       local to_dev=$1; shift
+       local filter=$1; shift
+
+       tc filter add dev $from_dev $direction \
+          pref 1000 $filter \
+          action mirred egress mirror dev $to_dev
+}
+
+mirror_uninstall()
+{
+       local from_dev=$1; shift
+       local direction=$1; shift
+
+       tc filter del dev $swp1 $direction pref 1000
+}
+
+mirror_test()
+{
+       local vrf_name=$1; shift
+       local sip=$1; shift
+       local dip=$1; shift
+       local dev=$1; shift
+       local pref=$1; shift
+       local expect=$1; shift
+
+       local t0=$(tc_rule_stats_get $dev $pref)
+       ip vrf exec $vrf_name \
+          ${PING} ${sip:+-I $sip} $dip -c 10 -i 0.1 -w 2 &> /dev/null
+       local t1=$(tc_rule_stats_get $dev $pref)
+       local delta=$((t1 - t0))
+       # Tolerate a couple stray extra packets.
+       ((expect <= delta && delta <= expect + 2))
+       check_err $? "Expected to capture $expect packets, got $delta."
+}
index cc6a14abfa87a84a3202ed24755f3da9cb089e1d..a75cb51cc5bd851e838df91c996d673afe6dde6e 100755 (executable)
@@ -1,6 +1,7 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
+ALL_TESTS="ping_ipv4 ping_ipv6"
 NUM_NETIFS=4
 source lib.sh
 
@@ -114,12 +115,21 @@ cleanup()
        vrf_cleanup
 }
 
+ping_ipv4()
+{
+       ping_test $h1 198.51.100.2
+}
+
+ping_ipv6()
+{
+       ping6_test $h1 2001:db8:2::2
+}
+
 trap cleanup EXIT
 
 setup_prepare
 setup_wait
 
-ping_test $h1 198.51.100.2
-ping6_test $h1 2001:db8:2::2
+tests_run
 
 exit $EXIT_STATUS
index 3bc351008db63055552e115d33d4b077bc74de32..8b6d0fb6d604d4b632edb8150c62f447ba176aec 100755 (executable)
@@ -1,6 +1,7 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
+ALL_TESTS="ping_ipv4 ping_ipv6 multipath_test"
 NUM_NETIFS=8
 source lib.sh
 
@@ -191,7 +192,7 @@ multipath_eval()
        diff=$(echo $weights_ratio - $packets_ratio | bc -l)
        diff=${diff#-}
 
-       test "$(echo "$diff / $weights_ratio > 0.1" | bc -l)" -eq 0
+       test "$(echo "$diff / $weights_ratio > 0.15" | bc -l)" -eq 0
        check_err $? "Too large discrepancy between expected and measured ratios"
        log_test "$desc"
        log_info "Expected ratio $weights_ratio Measured ratio $packets_ratio"
@@ -204,13 +205,11 @@ multipath4_test()
        local weight_rp13=$3
        local t0_rp12 t0_rp13 t1_rp12 t1_rp13
        local packets_rp12 packets_rp13
-       local hash_policy
 
        # Transmit multiple flows from h1 to h2 and make sure they are
        # distributed between both multipath links (rp12 and rp13)
        # according to the configured weights.
-       hash_policy=$(sysctl -n net.ipv4.fib_multipath_hash_policy)
-       sysctl -q -w net.ipv4.fib_multipath_hash_policy=1
+       sysctl_set net.ipv4.fib_multipath_hash_policy 1
        ip route replace 198.51.100.0/24 vrf vrf-r1 \
                nexthop via 169.254.2.22 dev $rp12 weight $weight_rp12 \
                nexthop via 169.254.3.23 dev $rp13 weight $weight_rp13
@@ -232,7 +231,7 @@ multipath4_test()
        ip route replace 198.51.100.0/24 vrf vrf-r1 \
                nexthop via 169.254.2.22 dev $rp12 \
                nexthop via 169.254.3.23 dev $rp13
-       sysctl -q -w net.ipv4.fib_multipath_hash_policy=$hash_policy
+       sysctl_restore net.ipv4.fib_multipath_hash_policy
 }
 
 multipath6_l4_test()
@@ -242,13 +241,11 @@ multipath6_l4_test()
        local weight_rp13=$3
        local t0_rp12 t0_rp13 t1_rp12 t1_rp13
        local packets_rp12 packets_rp13
-       local hash_policy
 
        # Transmit multiple flows from h1 to h2 and make sure they are
        # distributed between both multipath links (rp12 and rp13)
        # according to the configured weights.
-       hash_policy=$(sysctl -n net.ipv6.fib_multipath_hash_policy)
-       sysctl -q -w net.ipv6.fib_multipath_hash_policy=1
+       sysctl_set net.ipv6.fib_multipath_hash_policy 1
 
        ip route replace 2001:db8:2::/64 vrf vrf-r1 \
               nexthop via fe80:2::22 dev $rp12 weight $weight_rp12 \
@@ -271,7 +268,7 @@ multipath6_l4_test()
               nexthop via fe80:2::22 dev $rp12 \
               nexthop via fe80:3::23 dev $rp13
 
-       sysctl -q -w net.ipv6.fib_multipath_hash_policy=$hash_policy
+       sysctl_restore net.ipv6.fib_multipath_hash_policy
 }
 
 multipath6_test()
@@ -364,13 +361,21 @@ cleanup()
        vrf_cleanup
 }
 
+ping_ipv4()
+{
+       ping_test $h1 198.51.100.2
+}
+
+ping_ipv6()
+{
+       ping6_test $h1 2001:db8:2::2
+}
+
 trap cleanup EXIT
 
 setup_prepare
 setup_wait
 
-ping_test $h1 198.51.100.2
-ping6_test $h1 2001:db8:2::2
-multipath_test
+tests_run
 
 exit $EXIT_STATUS
index 3a6385ebd5d0f8d526e152191673c65b120dfe3c..813d02d1939dd2cbbcc3bc55c8f1a71a074edc2f 100755 (executable)
@@ -1,6 +1,8 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
+ALL_TESTS="gact_drop_and_ok_test mirred_egress_redirect_test \
+       mirred_egress_mirror_test gact_trap_test"
 NUM_NETIFS=4
 source tc_common.sh
 source lib.sh
@@ -111,6 +113,10 @@ gact_trap_test()
 {
        RET=0
 
+       if [[ "$tcflags" != "skip_sw" ]]; then
+               return 0;
+       fi
+
        tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
                skip_hw dst_ip 192.0.2.2 action drop
        tc filter add dev $swp1 ingress protocol ip pref 3 handle 103 flower \
@@ -179,24 +185,29 @@ cleanup()
        ip link set $swp1 address $swp1origmac
 }
 
+mirred_egress_redirect_test()
+{
+       mirred_egress_test "redirect"
+}
+
+mirred_egress_mirror_test()
+{
+       mirred_egress_test "mirror"
+}
+
 trap cleanup EXIT
 
 setup_prepare
 setup_wait
 
-gact_drop_and_ok_test
-mirred_egress_test "redirect"
-mirred_egress_test "mirror"
+tests_run
 
 tc_offload_check
 if [[ $? -ne 0 ]]; then
        log_info "Could not test offloaded functionality"
 else
        tcflags="skip_sw"
-       gact_drop_and_ok_test
-       mirred_egress_test "redirect"
-       mirred_egress_test "mirror"
-       gact_trap_test
+       tests_run
 fi
 
 exit $EXIT_STATUS
index 2fd15226974b22d11449427d7b1bbf3b9f01d7cc..d2c783e94df3d6a5f160215fc11777b6b22b283e 100755 (executable)
@@ -1,6 +1,7 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
+ALL_TESTS="unreachable_chain_test gact_goto_chain_test"
 NUM_NETIFS=2
 source tc_common.sh
 source lib.sh
@@ -107,16 +108,14 @@ trap cleanup EXIT
 setup_prepare
 setup_wait
 
-unreachable_chain_test
-gact_goto_chain_test
+tests_run
 
 tc_offload_check
 if [[ $? -ne 0 ]]; then
        log_info "Could not test offloaded functionality"
 else
        tcflags="skip_sw"
-       unreachable_chain_test
-       gact_goto_chain_test
+       tests_run
 fi
 
 exit $EXIT_STATUS
index 0c54059f1875c7ea227efbbfb3288d7c93ee6374..20d1077e5a3de9cb8ad9186d866837421b84d518 100755 (executable)
@@ -1,6 +1,8 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
+ALL_TESTS="match_dst_mac_test match_src_mac_test match_dst_ip_test \
+       match_src_ip_test match_ip_flags_test"
 NUM_NETIFS=2
 source tc_common.sh
 source lib.sh
@@ -245,22 +247,14 @@ trap cleanup EXIT
 setup_prepare
 setup_wait
 
-match_dst_mac_test
-match_src_mac_test
-match_dst_ip_test
-match_src_ip_test
-match_ip_flags_test
+tests_run
 
 tc_offload_check
 if [[ $? -ne 0 ]]; then
        log_info "Could not test offloaded functionality"
 else
        tcflags="skip_sw"
-       match_dst_mac_test
-       match_src_mac_test
-       match_dst_ip_test
-       match_src_ip_test
-       match_ip_flags_test
+       tests_run
 fi
 
 exit $EXIT_STATUS
index 077b98048ef4886c5ff0533ca8e3b459a7b346a0..b5b91720381561efa48d2f664afc3af7b84671c1 100755 (executable)
@@ -1,6 +1,7 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
+ALL_TESTS="shared_block_test"
 NUM_NETIFS=4
 source tc_common.sh
 source lib.sh
@@ -109,14 +110,14 @@ trap cleanup EXIT
 setup_prepare
 setup_wait
 
-shared_block_test
+tests_run
 
 tc_offload_check
 if [[ $? -ne 0 ]]; then
        log_info "Could not test offloaded functionality"
 else
        tcflags="skip_sw"
-       shared_block_test
+       tests_run
 fi
 
 exit $EXIT_STATUS
index 1e428781a625b88b23105c4142223a7211399846..7651fd4d86fe0a793f43c9d34b54b793d84e6c2d 100755 (executable)
@@ -368,7 +368,7 @@ test_pmtu_vti6_link_add_mtu() {
 
        fail=0
 
-       min=1280
+       min=68                  # vti6 can carry IPv4 packets too
        max=$((65535 - 40))
        # Check invalid values first
        for v in $((min - 1)) $((max + 1)); do
@@ -384,7 +384,7 @@ test_pmtu_vti6_link_add_mtu() {
        done
 
        # Now check valid values
-       for v in 1280 1300 $((65535 - 40)); do
+       for v in 68 1280 1300 $((65535 - 40)); do
                ${ns_a} ip link add vti6_a mtu ${v} type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10
                mtu="$(link_get_mtu "${ns_a}" vti6_a)"
                ${ns_a} ip link del vti6_a
index e6f4852354353124df50e889f5c04ad220dc584e..760faef2e12eda8bd31c7bb816dd1143ca6ed37b 100755 (executable)
@@ -502,6 +502,108 @@ kci_test_macsec()
        echo "PASS: macsec"
 }
 
+#-------------------------------------------------------------------
+# Example commands
+#   ip x s add proto esp src 14.0.0.52 dst 14.0.0.70 \
+#            spi 0x07 mode transport reqid 0x07 replay-window 32 \
+#            aead 'rfc4106(gcm(aes))' 1234567890123456dcba 128 \
+#            sel src 14.0.0.52/24 dst 14.0.0.70/24
+#   ip x p add dir out src 14.0.0.52/24 dst 14.0.0.70/24 \
+#            tmpl proto esp src 14.0.0.52 dst 14.0.0.70 \
+#            spi 0x07 mode transport reqid 0x07
+#
+# Subcommands not tested
+#    ip x s update
+#    ip x s allocspi
+#    ip x s deleteall
+#    ip x p update
+#    ip x p deleteall
+#    ip x p set
+#-------------------------------------------------------------------
+kci_test_ipsec()
+{
+       srcip="14.0.0.52"
+       dstip="14.0.0.70"
+       algo="aead rfc4106(gcm(aes)) 0x3132333435363738393031323334353664636261 128"
+
+       # flush to be sure there's nothing configured
+       ip x s flush ; ip x p flush
+       check_err $?
+
+       # start the monitor in the background
+       tmpfile=`mktemp ipsectestXXX`
+       ip x m > $tmpfile &
+       mpid=$!
+       sleep 0.2
+
+       ipsecid="proto esp src $srcip dst $dstip spi 0x07"
+       ip x s add $ipsecid \
+            mode transport reqid 0x07 replay-window 32 \
+            $algo sel src $srcip/24 dst $dstip/24
+       check_err $?
+
+       lines=`ip x s list | grep $srcip | grep $dstip | wc -l`
+       test $lines -eq 2
+       check_err $?
+
+       ip x s count | grep -q "SAD count 1"
+       check_err $?
+
+       lines=`ip x s get $ipsecid | grep $srcip | grep $dstip | wc -l`
+       test $lines -eq 2
+       check_err $?
+
+       ip x s delete $ipsecid
+       check_err $?
+
+       lines=`ip x s list | wc -l`
+       test $lines -eq 0
+       check_err $?
+
+       ipsecsel="dir out src $srcip/24 dst $dstip/24"
+       ip x p add $ipsecsel \
+                   tmpl proto esp src $srcip dst $dstip \
+                   spi 0x07 mode transport reqid 0x07
+       check_err $?
+
+       lines=`ip x p list | grep $srcip | grep $dstip | wc -l`
+       test $lines -eq 2
+       check_err $?
+
+       ip x p count | grep -q "SPD IN  0 OUT 1 FWD 0"
+       check_err $?
+
+       lines=`ip x p get $ipsecsel | grep $srcip | grep $dstip | wc -l`
+       test $lines -eq 2
+       check_err $?
+
+       ip x p delete $ipsecsel
+       check_err $?
+
+       lines=`ip x p list | wc -l`
+       test $lines -eq 0
+       check_err $?
+
+       # check the monitor results
+       kill $mpid
+       lines=`wc -l $tmpfile | cut "-d " -f1`
+       test $lines -eq 20
+       check_err $?
+       rm -rf $tmpfile
+
+       # clean up any leftovers
+       ip x s flush
+       check_err $?
+       ip x p flush
+       check_err $?
+
+       if [ $ret -ne 0 ]; then
+               echo "FAIL: ipsec"
+               return 1
+       fi
+       echo "PASS: ipsec"
+}
+
 kci_test_gretap()
 {
        testns="testns"
@@ -755,6 +857,7 @@ kci_test_rtnl()
        kci_test_vrf
        kci_test_encap
        kci_test_macsec
+       kci_test_ipsec
 
        kci_del_dummy
 }
diff --git a/tools/testing/selftests/net/tcp_inq.c b/tools/testing/selftests/net/tcp_inq.c
new file mode 100644 (file)
index 0000000..d044b29
--- /dev/null
@@ -0,0 +1,189 @@
+/*
+ * Copyright 2018 Google Inc.
+ * Author: Soheil Hassas Yeganeh (soheil@google.com)
+ *
+ * Simple example on how to use TCP_INQ and TCP_CM_INQ.
+ *
+ * License (GPLv2):
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for
+ * more details.
+ */
+#define _GNU_SOURCE
+
+#include <error.h>
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <unistd.h>
+
+#ifndef TCP_INQ
+#define TCP_INQ 36
+#endif
+
+#ifndef TCP_CM_INQ
+#define TCP_CM_INQ TCP_INQ
+#endif
+
+#define BUF_SIZE 8192
+#define CMSG_SIZE 32
+
+static int family = AF_INET6;
+static socklen_t addr_len = sizeof(struct sockaddr_in6);
+static int port = 4974;
+
+static void setup_loopback_addr(int family, struct sockaddr_storage *sockaddr)
+{
+       struct sockaddr_in6 *addr6 = (void *) sockaddr;
+       struct sockaddr_in *addr4 = (void *) sockaddr;
+
+       switch (family) {
+       case PF_INET:
+               memset(addr4, 0, sizeof(*addr4));
+               addr4->sin_family = AF_INET;
+               addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+               addr4->sin_port = htons(port);
+               break;
+       case PF_INET6:
+               memset(addr6, 0, sizeof(*addr6));
+               addr6->sin6_family = AF_INET6;
+               addr6->sin6_addr = in6addr_loopback;
+               addr6->sin6_port = htons(port);
+               break;
+       default:
+               error(1, 0, "illegal family");
+       }
+}
+
+void *start_server(void *arg)
+{
+       int server_fd = (int)(unsigned long)arg;
+       struct sockaddr_in addr;
+       socklen_t addrlen = sizeof(addr);
+       char *buf;
+       int fd;
+       int r;
+
+       buf = malloc(BUF_SIZE);
+
+       for (;;) {
+               fd = accept(server_fd, (struct sockaddr *)&addr, &addrlen);
+               if (fd == -1) {
+                       perror("accept");
+                       break;
+               }
+               do {
+                       r = send(fd, buf, BUF_SIZE, 0);
+               } while (r < 0 && errno == EINTR);
+               if (r < 0)
+                       perror("send");
+               if (r != BUF_SIZE)
+                       fprintf(stderr, "can only send %d bytes\n", r);
+               /* TCP_INQ can overestimate in-queue by one byte if we send
+                * the FIN packet. Sleep for 1 second, so that the client
+                * likely invoked recvmsg().
+                */
+               sleep(1);
+               close(fd);
+       }
+
+       free(buf);
+       close(server_fd);
+       pthread_exit(0);
+}
+
+int main(int argc, char *argv[])
+{
+       struct sockaddr_storage listen_addr, addr;
+       int c, one = 1, inq = -1;
+       pthread_t server_thread;
+       char cmsgbuf[CMSG_SIZE];
+       struct iovec iov[1];
+       struct cmsghdr *cm;
+       struct msghdr msg;
+       int server_fd, fd;
+       char *buf;
+
+       while ((c = getopt(argc, argv, "46p:")) != -1) {
+               switch (c) {
+               case '4':
+                       family = PF_INET;
+                       addr_len = sizeof(struct sockaddr_in);
+                       break;
+               case '6':
+                       family = PF_INET6;
+                       addr_len = sizeof(struct sockaddr_in6);
+                       break;
+               case 'p':
+                       port = atoi(optarg);
+                       break;
+               }
+       }
+
+       server_fd = socket(family, SOCK_STREAM, 0);
+       if (server_fd < 0)
+               error(1, errno, "server socket");
+       setup_loopback_addr(family, &listen_addr);
+       if (setsockopt(server_fd, SOL_SOCKET, SO_REUSEADDR,
+                      &one, sizeof(one)) != 0)
+               error(1, errno, "setsockopt(SO_REUSEADDR)");
+       if (bind(server_fd, (const struct sockaddr *)&listen_addr,
+                addr_len) == -1)
+               error(1, errno, "bind");
+       if (listen(server_fd, 128) == -1)
+               error(1, errno, "listen");
+       if (pthread_create(&server_thread, NULL, start_server,
+                          (void *)(unsigned long)server_fd) != 0)
+               error(1, errno, "pthread_create");
+
+       fd = socket(family, SOCK_STREAM, 0);
+       if (fd < 0)
+               error(1, errno, "client socket");
+       setup_loopback_addr(family, &addr);
+       if (connect(fd, (const struct sockaddr *)&addr, addr_len) == -1)
+               error(1, errno, "connect");
+       if (setsockopt(fd, SOL_TCP, TCP_INQ, &one, sizeof(one)) != 0)
+               error(1, errno, "setsockopt(TCP_INQ)");
+
+       msg.msg_name = NULL;
+       msg.msg_namelen = 0;
+       msg.msg_iov = iov;
+       msg.msg_iovlen = 1;
+       msg.msg_control = cmsgbuf;
+       msg.msg_controllen = sizeof(cmsgbuf);
+       msg.msg_flags = 0;
+
+       buf = malloc(BUF_SIZE);
+       iov[0].iov_base = buf;
+       iov[0].iov_len = BUF_SIZE / 2;
+
+       if (recvmsg(fd, &msg, 0) != iov[0].iov_len)
+               error(1, errno, "recvmsg");
+       if (msg.msg_flags & MSG_CTRUNC)
+               error(1, 0, "control message is truncated");
+
+       for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm))
+               if (cm->cmsg_level == SOL_TCP && cm->cmsg_type == TCP_CM_INQ)
+                       inq = *((int *) CMSG_DATA(cm));
+
+       if (inq != BUF_SIZE - iov[0].iov_len) {
+               fprintf(stderr, "unexpected inq: %d\n", inq);
+               exit(1);
+       }
+
+       printf("PASSED\n");
+       free(buf);
+       close(fd);
+       return 0;
+}
index dea342fe6f4e88b5709d2ac37b2fc9a2a320bf44..77f762780199ff1f69f9f6b3f18e72deddb69f5e 100644 (file)
 #include <time.h>
 #include <sys/time.h>
 #include <netinet/in.h>
-#include <netinet/tcp.h>
 #include <arpa/inet.h>
 #include <poll.h>
+#include <linux/tcp.h>
+#include <assert.h>
 
 #ifndef MSG_ZEROCOPY
 #define MSG_ZEROCOPY    0x4000000
@@ -134,11 +135,12 @@ void hash_zone(void *zone, unsigned int length)
 void *child_thread(void *arg)
 {
        unsigned long total_mmap = 0, total = 0;
+       struct tcp_zerocopy_receive zc;
        unsigned long delta_usec;
        int flags = MAP_SHARED;
        struct timeval t0, t1;
        char *buffer = NULL;
-       void *oaddr = NULL;
+       void *addr = NULL;
        double throughput;
        struct rusage ru;
        int lu, fd;
@@ -153,41 +155,46 @@ void *child_thread(void *arg)
                perror("malloc");
                goto error;
        }
+       if (zflg) {
+               addr = mmap(NULL, chunk_size, PROT_READ, flags, fd, 0);
+               if (addr == (void *)-1)
+                       zflg = 0;
+       }
        while (1) {
                struct pollfd pfd = { .fd = fd, .events = POLLIN, };
                int sub;
 
                poll(&pfd, 1, 10000);
                if (zflg) {
-                       void *naddr;
-
-                       naddr = mmap(oaddr, chunk_size, PROT_READ, flags, fd, 0);
-                       if (naddr == (void *)-1) {
-                               if (errno == EAGAIN) {
-                                       /* That is if SO_RCVLOWAT is buggy */
-                                       usleep(1000);
-                                       continue;
-                               }
-                               if (errno == EINVAL) {
-                                       flags = MAP_SHARED;
-                                       oaddr = NULL;
-                                       goto fallback;
-                               }
-                               if (errno != EIO)
-                                       perror("mmap()");
+                       socklen_t zc_len = sizeof(zc);
+                       int res;
+
+                       zc.address = (__u64)addr;
+                       zc.length = chunk_size;
+                       zc.recv_skip_hint = 0;
+                       res = getsockopt(fd, IPPROTO_TCP, TCP_ZEROCOPY_RECEIVE,
+                                        &zc, &zc_len);
+                       if (res == -1)
                                break;
+
+                       if (zc.length) {
+                               assert(zc.length <= chunk_size);
+                               total_mmap += zc.length;
+                               if (xflg)
+                                       hash_zone(addr, zc.length);
+                               total += zc.length;
                        }
-                       total_mmap += chunk_size;
-                       if (xflg)
-                               hash_zone(naddr, chunk_size);
-                       total += chunk_size;
-                       if (!keepflag) {
-                               flags |= MAP_FIXED;
-                               oaddr = naddr;
+                       if (zc.recv_skip_hint) {
+                               assert(zc.recv_skip_hint <= chunk_size);
+                               lu = read(fd, buffer, zc.recv_skip_hint);
+                               if (lu > 0) {
+                                       if (xflg)
+                                               hash_zone(buffer, lu);
+                                       total += lu;
+                               }
                        }
                        continue;
                }
-fallback:
                sub = 0;
                while (sub < chunk_size) {
                        lu = read(fd, buffer + sub, chunk_size - sub);
@@ -228,6 +235,8 @@ void *child_thread(void *arg)
 error:
        free(buffer);
        close(fd);
+       if (zflg)
+               munmap(addr, chunk_size);
        pthread_exit(0);
 }
 
@@ -371,7 +380,8 @@ int main(int argc, char *argv[])
                setup_sockaddr(cfg_family, host, &listenaddr);
 
                if (mss &&
-                   setsockopt(fdlisten, SOL_TCP, TCP_MAXSEG, &mss, sizeof(mss)) == -1) {
+                   setsockopt(fdlisten, IPPROTO_TCP, TCP_MAXSEG,
+                              &mss, sizeof(mss)) == -1) {
                        perror("setsockopt TCP_MAXSEG");
                        exit(1);
                }
@@ -402,7 +412,7 @@ int main(int argc, char *argv[])
        setup_sockaddr(cfg_family, host, &addr);
 
        if (mss &&
-           setsockopt(fd, SOL_TCP, TCP_MAXSEG, &mss, sizeof(mss)) == -1) {
+           setsockopt(fd, IPPROTO_TCP, TCP_MAXSEG, &mss, sizeof(mss)) == -1) {
                perror("setsockopt TCP_MAXSEG");
                exit(1);
        }
diff --git a/tools/testing/selftests/net/udpgso.c b/tools/testing/selftests/net/udpgso.c
new file mode 100644 (file)
index 0000000..48a0592
--- /dev/null
@@ -0,0 +1,620 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <stddef.h>
+#include <arpa/inet.h>
+#include <error.h>
+#include <errno.h>
+#include <net/if.h>
+#include <linux/in.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <netinet/if_ether.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/udp.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#ifndef ETH_MAX_MTU
+#define ETH_MAX_MTU    0xFFFFU
+#endif
+
+#ifndef UDP_SEGMENT
+#define UDP_SEGMENT            103
+#endif
+
+#define CONST_MTU_TEST 1500
+
+#define CONST_HDRLEN_V4                (sizeof(struct iphdr) + sizeof(struct udphdr))
+#define CONST_HDRLEN_V6                (sizeof(struct ip6_hdr) + sizeof(struct udphdr))
+
+#define CONST_MSS_V4           (CONST_MTU_TEST - CONST_HDRLEN_V4)
+#define CONST_MSS_V6           (CONST_MTU_TEST - CONST_HDRLEN_V6)
+
+#define CONST_MAX_SEGS_V4      (ETH_MAX_MTU / CONST_MSS_V4)
+#define CONST_MAX_SEGS_V6      (ETH_MAX_MTU / CONST_MSS_V6)
+
+static bool            cfg_do_ipv4;
+static bool            cfg_do_ipv6;
+static bool            cfg_do_connected;
+static bool            cfg_do_connectionless;
+static bool            cfg_do_msgmore;
+static bool            cfg_do_setsockopt;
+static int             cfg_specific_test_id = -1;
+
+static const char      cfg_ifname[] = "lo";
+static unsigned short  cfg_port = 9000;
+
+static char buf[ETH_MAX_MTU];
+
+struct testcase {
+       int tlen;               /* send() buffer size, may exceed mss */
+       bool tfail;             /* send() call is expected to fail */
+       int gso_len;            /* mss after applying gso */
+       int r_num_mss;          /* recv(): number of calls of full mss */
+       int r_len_last;         /* recv(): size of last non-mss dgram, if any */
+};
+
+const struct in6_addr addr6 = IN6ADDR_LOOPBACK_INIT;
+const struct in_addr addr4 = { .s_addr = __constant_htonl(INADDR_LOOPBACK + 2) };
+
+struct testcase testcases_v4[] = {
+       {
+               /* no GSO: send a single byte */
+               .tlen = 1,
+               .r_len_last = 1,
+       },
+       {
+               /* no GSO: send a single MSS */
+               .tlen = CONST_MSS_V4,
+               .r_num_mss = 1,
+       },
+       {
+               /* no GSO: send a single MSS + 1B: fail */
+               .tlen = CONST_MSS_V4 + 1,
+               .tfail = true,
+       },
+       {
+               /* send a single MSS: will fail with GSO, because the segment
+                * logic in udp4_ufo_fragment demands a gso skb to be > MTU
+                */
+               .tlen = CONST_MSS_V4,
+               .gso_len = CONST_MSS_V4,
+               .tfail = true,
+               .r_num_mss = 1,
+       },
+       {
+               /* send a single MSS + 1B */
+               .tlen = CONST_MSS_V4 + 1,
+               .gso_len = CONST_MSS_V4,
+               .r_num_mss = 1,
+               .r_len_last = 1,
+       },
+       {
+               /* send exactly 2 MSS */
+               .tlen = CONST_MSS_V4 * 2,
+               .gso_len = CONST_MSS_V4,
+               .r_num_mss = 2,
+       },
+       {
+               /* send 2 MSS + 1B */
+               .tlen = (CONST_MSS_V4 * 2) + 1,
+               .gso_len = CONST_MSS_V4,
+               .r_num_mss = 2,
+               .r_len_last = 1,
+       },
+       {
+               /* send MAX segs */
+               .tlen = (ETH_MAX_MTU / CONST_MSS_V4) * CONST_MSS_V4,
+               .gso_len = CONST_MSS_V4,
+               .r_num_mss = (ETH_MAX_MTU / CONST_MSS_V4),
+       },
+
+       {
+               /* send MAX bytes */
+               .tlen = ETH_MAX_MTU - CONST_HDRLEN_V4,
+               .gso_len = CONST_MSS_V4,
+               .r_num_mss = CONST_MAX_SEGS_V4,
+               .r_len_last = ETH_MAX_MTU - CONST_HDRLEN_V4 -
+                             (CONST_MAX_SEGS_V4 * CONST_MSS_V4),
+       },
+       {
+               /* send MAX + 1: fail */
+               .tlen = ETH_MAX_MTU - CONST_HDRLEN_V4 + 1,
+               .gso_len = CONST_MSS_V4,
+               .tfail = true,
+       },
+       {
+               /* EOL */
+       }
+};
+
+#ifndef IP6_MAX_MTU
+#define IP6_MAX_MTU    (ETH_MAX_MTU + sizeof(struct ip6_hdr))
+#endif
+
+struct testcase testcases_v6[] = {
+       {
+               /* no GSO: send a single byte */
+               .tlen = 1,
+               .r_len_last = 1,
+       },
+       {
+               /* no GSO: send a single MSS */
+               .tlen = CONST_MSS_V6,
+               .r_num_mss = 1,
+       },
+       {
+               /* no GSO: send a single MSS + 1B: fail */
+               .tlen = CONST_MSS_V6 + 1,
+               .tfail = true,
+       },
+       {
+               /* send a single MSS: will fail with GSO, because the segment
+                * logic in udp4_ufo_fragment demands a gso skb to be > MTU
+                */
+               .tlen = CONST_MSS_V6,
+               .gso_len = CONST_MSS_V6,
+               .tfail = true,
+               .r_num_mss = 1,
+       },
+       {
+               /* send a single MSS + 1B */
+               .tlen = CONST_MSS_V6 + 1,
+               .gso_len = CONST_MSS_V6,
+               .r_num_mss = 1,
+               .r_len_last = 1,
+       },
+       {
+               /* send exactly 2 MSS */
+               .tlen = CONST_MSS_V6 * 2,
+               .gso_len = CONST_MSS_V6,
+               .r_num_mss = 2,
+       },
+       {
+               /* send 2 MSS + 1B */
+               .tlen = (CONST_MSS_V6 * 2) + 1,
+               .gso_len = CONST_MSS_V6,
+               .r_num_mss = 2,
+               .r_len_last = 1,
+       },
+       {
+               /* send MAX segs */
+               .tlen = (IP6_MAX_MTU / CONST_MSS_V6) * CONST_MSS_V6,
+               .gso_len = CONST_MSS_V6,
+               .r_num_mss = (IP6_MAX_MTU / CONST_MSS_V6),
+       },
+
+       {
+               /* send MAX bytes */
+               .tlen = IP6_MAX_MTU - CONST_HDRLEN_V6,
+               .gso_len = CONST_MSS_V6,
+               .r_num_mss = CONST_MAX_SEGS_V6,
+               .r_len_last = IP6_MAX_MTU - CONST_HDRLEN_V6 -
+                             (CONST_MAX_SEGS_V6 * CONST_MSS_V6),
+       },
+       {
+               /* send MAX + 1: fail */
+               .tlen = IP6_MAX_MTU - CONST_HDRLEN_V6 + 1,
+               .gso_len = CONST_MSS_V6,
+               .tfail = true,
+       },
+       {
+               /* EOL */
+       }
+};
+
+static unsigned int get_device_mtu(int fd, const char *ifname)
+{
+       struct ifreq ifr;
+
+       memset(&ifr, 0, sizeof(ifr));
+
+       strcpy(ifr.ifr_name, ifname);
+
+       if (ioctl(fd, SIOCGIFMTU, &ifr))
+               error(1, errno, "ioctl get mtu");
+
+       return ifr.ifr_mtu;
+}
+
+static void __set_device_mtu(int fd, const char *ifname, unsigned int mtu)
+{
+       struct ifreq ifr;
+
+       memset(&ifr, 0, sizeof(ifr));
+
+       ifr.ifr_mtu = mtu;
+       strcpy(ifr.ifr_name, ifname);
+
+       if (ioctl(fd, SIOCSIFMTU, &ifr))
+               error(1, errno, "ioctl set mtu");
+}
+
+static void set_device_mtu(int fd, int mtu)
+{
+       int val;
+
+       val = get_device_mtu(fd, cfg_ifname);
+       fprintf(stderr, "device mtu (orig): %u\n", val);
+
+       __set_device_mtu(fd, cfg_ifname, mtu);
+       val = get_device_mtu(fd, cfg_ifname);
+       if (val != mtu)
+               error(1, 0, "unable to set device mtu to %u\n", val);
+
+       fprintf(stderr, "device mtu (test): %u\n", val);
+}
+
+static void set_pmtu_discover(int fd, bool is_ipv4)
+{
+       int level, name, val;
+
+       if (is_ipv4) {
+               level   = SOL_IP;
+               name    = IP_MTU_DISCOVER;
+               val     = IP_PMTUDISC_DO;
+       } else {
+               level   = SOL_IPV6;
+               name    = IPV6_MTU_DISCOVER;
+               val     = IPV6_PMTUDISC_DO;
+       }
+
+       if (setsockopt(fd, level, name, &val, sizeof(val)))
+               error(1, errno, "setsockopt path mtu");
+}
+
+static unsigned int get_path_mtu(int fd, bool is_ipv4)
+{
+       socklen_t vallen;
+       unsigned int mtu;
+       int ret;
+
+       vallen = sizeof(mtu);
+       if (is_ipv4)
+               ret = getsockopt(fd, SOL_IP, IP_MTU, &mtu, &vallen);
+       else
+               ret = getsockopt(fd, SOL_IPV6, IPV6_MTU, &mtu, &vallen);
+
+       if (ret)
+               error(1, errno, "getsockopt mtu");
+
+
+       fprintf(stderr, "path mtu (read):  %u\n", mtu);
+       return mtu;
+}
+
+/* very wordy version of system("ip route add dev lo mtu 1500 127.0.0.3/32") */
+static void set_route_mtu(int mtu, bool is_ipv4)
+{
+       struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK };
+       struct nlmsghdr *nh;
+       struct rtattr *rta;
+       struct rtmsg *rt;
+       char data[NLMSG_ALIGN(sizeof(*nh)) +
+                 NLMSG_ALIGN(sizeof(*rt)) +
+                 NLMSG_ALIGN(RTA_LENGTH(sizeof(addr6))) +
+                 NLMSG_ALIGN(RTA_LENGTH(sizeof(int))) +
+                 NLMSG_ALIGN(RTA_LENGTH(0) + RTA_LENGTH(sizeof(int)))];
+       int fd, ret, alen, off = 0;
+
+       alen = is_ipv4 ? sizeof(addr4) : sizeof(addr6);
+
+       fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+       if (fd == -1)
+               error(1, errno, "socket netlink");
+
+       memset(data, 0, sizeof(data));
+
+       nh = (void *)data;
+       nh->nlmsg_type = RTM_NEWROUTE;
+       nh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE;
+       off += NLMSG_ALIGN(sizeof(*nh));
+
+       rt = (void *)(data + off);
+       rt->rtm_family = is_ipv4 ? AF_INET : AF_INET6;
+       rt->rtm_table = RT_TABLE_MAIN;
+       rt->rtm_dst_len = alen << 3;
+       rt->rtm_protocol = RTPROT_BOOT;
+       rt->rtm_scope = RT_SCOPE_UNIVERSE;
+       rt->rtm_type = RTN_UNICAST;
+       off += NLMSG_ALIGN(sizeof(*rt));
+
+       rta = (void *)(data + off);
+       rta->rta_type = RTA_DST;
+       rta->rta_len = RTA_LENGTH(alen);
+       if (is_ipv4)
+               memcpy(RTA_DATA(rta), &addr4, alen);
+       else
+               memcpy(RTA_DATA(rta), &addr6, alen);
+       off += NLMSG_ALIGN(rta->rta_len);
+
+       rta = (void *)(data + off);
+       rta->rta_type = RTA_OIF;
+       rta->rta_len = RTA_LENGTH(sizeof(int));
+       *((int *)(RTA_DATA(rta))) = 1; //if_nametoindex("lo");
+       off += NLMSG_ALIGN(rta->rta_len);
+
+       /* MTU is a subtype in a metrics type */
+       rta = (void *)(data + off);
+       rta->rta_type = RTA_METRICS;
+       rta->rta_len = RTA_LENGTH(0) + RTA_LENGTH(sizeof(int));
+       off += NLMSG_ALIGN(rta->rta_len);
+
+       /* now fill MTU subtype. Note that it fits within above rta_len */
+       rta = (void *)(((char *) rta) + RTA_LENGTH(0));
+       rta->rta_type = RTAX_MTU;
+       rta->rta_len = RTA_LENGTH(sizeof(int));
+       *((int *)(RTA_DATA(rta))) = mtu;
+
+       nh->nlmsg_len = off;
+
+       ret = sendto(fd, data, off, 0, (void *)&nladdr, sizeof(nladdr));
+       if (ret != off)
+               error(1, errno, "send netlink: %uB != %uB\n", ret, off);
+
+       if (close(fd))
+               error(1, errno, "close netlink");
+
+       fprintf(stderr, "route mtu (test): %u\n", mtu);
+}
+
+static bool __send_one(int fd, struct msghdr *msg, int flags)
+{
+       int ret;
+
+       ret = sendmsg(fd, msg, flags);
+       if (ret == -1 && (errno == EMSGSIZE || errno == ENOMEM))
+               return false;
+       if (ret == -1)
+               error(1, errno, "sendmsg");
+       if (ret != msg->msg_iov->iov_len)
+               error(1, 0, "sendto: %d != %lu", ret, msg->msg_iov->iov_len);
+       if (msg->msg_flags)
+               error(1, 0, "sendmsg: return flags 0x%x\n", msg->msg_flags);
+
+       return true;
+}
+
+static bool send_one(int fd, int len, int gso_len,
+                    struct sockaddr *addr, socklen_t alen)
+{
+       char control[CMSG_SPACE(sizeof(uint16_t))] = {0};
+       struct msghdr msg = {0};
+       struct iovec iov = {0};
+       struct cmsghdr *cm;
+
+       iov.iov_base = buf;
+       iov.iov_len = len;
+
+       msg.msg_iov = &iov;
+       msg.msg_iovlen = 1;
+
+       msg.msg_name = addr;
+       msg.msg_namelen = alen;
+
+       if (gso_len && !cfg_do_setsockopt) {
+               msg.msg_control = control;
+               msg.msg_controllen = sizeof(control);
+
+               cm = CMSG_FIRSTHDR(&msg);
+               cm->cmsg_level = SOL_UDP;
+               cm->cmsg_type = UDP_SEGMENT;
+               cm->cmsg_len = CMSG_LEN(sizeof(uint16_t));
+               *((uint16_t *) CMSG_DATA(cm)) = gso_len;
+       }
+
+       /* If MSG_MORE, send 1 byte followed by remainder */
+       if (cfg_do_msgmore && len > 1) {
+               iov.iov_len = 1;
+               if (!__send_one(fd, &msg, MSG_MORE))
+                       error(1, 0, "send 1B failed");
+
+               iov.iov_base++;
+               iov.iov_len = len - 1;
+       }
+
+       return __send_one(fd, &msg, 0);
+}
+
+static int recv_one(int fd, int flags)
+{
+       int ret;
+
+       ret = recv(fd, buf, sizeof(buf), flags);
+       if (ret == -1 && errno == EAGAIN && (flags & MSG_DONTWAIT))
+               return 0;
+       if (ret == -1)
+               error(1, errno, "recv");
+
+       return ret;
+}
+
+static void run_one(struct testcase *test, int fdt, int fdr,
+                   struct sockaddr *addr, socklen_t alen)
+{
+       int i, ret, val, mss;
+       bool sent;
+
+       fprintf(stderr, "ipv%d tx:%d gso:%d %s\n",
+                       addr->sa_family == AF_INET ? 4 : 6,
+                       test->tlen, test->gso_len,
+                       test->tfail ? "(fail)" : "");
+
+       val = test->gso_len;
+       if (cfg_do_setsockopt) {
+               if (setsockopt(fdt, SOL_UDP, UDP_SEGMENT, &val, sizeof(val)))
+                       error(1, errno, "setsockopt udp segment");
+       }
+
+       sent = send_one(fdt, test->tlen, test->gso_len, addr, alen);
+       if (sent && test->tfail)
+               error(1, 0, "send succeeded while expecting failure");
+       if (!sent && !test->tfail)
+               error(1, 0, "send failed while expecting success");
+       if (!sent)
+               return;
+
+       mss = addr->sa_family == AF_INET ? CONST_MSS_V4 : CONST_MSS_V6;
+
+       /* Recv all full MSS datagrams */
+       for (i = 0; i < test->r_num_mss; i++) {
+               ret = recv_one(fdr, 0);
+               if (ret != mss)
+                       error(1, 0, "recv.%d: %d != %d", i, ret, mss);
+       }
+
+       /* Recv the non-full last datagram, if tlen was not a multiple of mss */
+       if (test->r_len_last) {
+               ret = recv_one(fdr, 0);
+               if (ret != test->r_len_last)
+                       error(1, 0, "recv.%d: %d != %d (last)",
+                             i, ret, test->r_len_last);
+       }
+
+       /* Verify received all data */
+       ret = recv_one(fdr, MSG_DONTWAIT);
+       if (ret)
+               error(1, 0, "recv: unexpected datagram");
+}
+
+static void run_all(int fdt, int fdr, struct sockaddr *addr, socklen_t alen)
+{
+       struct testcase *tests, *test;
+
+       tests = addr->sa_family == AF_INET ? testcases_v4 : testcases_v6;
+
+       for (test = tests; test->tlen; test++) {
+               /* if a specific test is given, then skip all others */
+               if (cfg_specific_test_id == -1 ||
+                   cfg_specific_test_id == test - tests)
+                       run_one(test, fdt, fdr, addr, alen);
+       }
+}
+
+static void run_test(struct sockaddr *addr, socklen_t alen)
+{
+       struct timeval tv = { .tv_usec = 100 * 1000 };
+       int fdr, fdt, val;
+
+       fdr = socket(addr->sa_family, SOCK_DGRAM, 0);
+       if (fdr == -1)
+               error(1, errno, "socket r");
+
+       if (bind(fdr, addr, alen))
+               error(1, errno, "bind");
+
+       /* Have tests fail quickly instead of hang */
+       if (setsockopt(fdr, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)))
+               error(1, errno, "setsockopt rcv timeout");
+
+       fdt = socket(addr->sa_family, SOCK_DGRAM, 0);
+       if (fdt == -1)
+               error(1, errno, "socket t");
+
+       /* Do not fragment these datagrams: only succeed if GSO works */
+       set_pmtu_discover(fdt, addr->sa_family == AF_INET);
+
+       if (cfg_do_connectionless) {
+               set_device_mtu(fdt, CONST_MTU_TEST);
+               run_all(fdt, fdr, addr, alen);
+       }
+
+       if (cfg_do_connected) {
+               set_device_mtu(fdt, CONST_MTU_TEST + 100);
+               set_route_mtu(CONST_MTU_TEST, addr->sa_family == AF_INET);
+
+               if (connect(fdt, addr, alen))
+                       error(1, errno, "connect");
+
+               val = get_path_mtu(fdt, addr->sa_family == AF_INET);
+               if (val != CONST_MTU_TEST)
+                       error(1, 0, "bad path mtu %u\n", val);
+
+               run_all(fdt, fdr, addr, 0 /* use connected addr */);
+       }
+
+       if (close(fdt))
+               error(1, errno, "close t");
+       if (close(fdr))
+               error(1, errno, "close r");
+}
+
+static void run_test_v4(void)
+{
+       struct sockaddr_in addr = {0};
+
+       addr.sin_family = AF_INET;
+       addr.sin_port = htons(cfg_port);
+       addr.sin_addr = addr4;
+
+       run_test((void *)&addr, sizeof(addr));
+}
+
+static void run_test_v6(void)
+{
+       struct sockaddr_in6 addr = {0};
+
+       addr.sin6_family = AF_INET6;
+       addr.sin6_port = htons(cfg_port);
+       addr.sin6_addr = addr6;
+
+       run_test((void *)&addr, sizeof(addr));
+}
+
+static void parse_opts(int argc, char **argv)
+{
+       int c;
+
+       while ((c = getopt(argc, argv, "46cCmst:")) != -1) {
+               switch (c) {
+               case '4':
+                       cfg_do_ipv4 = true;
+                       break;
+               case '6':
+                       cfg_do_ipv6 = true;
+                       break;
+               case 'c':
+                       cfg_do_connected = true;
+                       break;
+               case 'C':
+                       cfg_do_connectionless = true;
+                       break;
+               case 'm':
+                       cfg_do_msgmore = true;
+                       break;
+               case 's':
+                       cfg_do_setsockopt = true;
+                       break;
+               case 't':
+                       cfg_specific_test_id = strtoul(optarg, NULL, 0);
+                       break;
+               default:
+                       error(1, 0, "%s: parse error", argv[0]);
+               }
+       }
+}
+
+int main(int argc, char **argv)
+{
+       parse_opts(argc, argv);
+
+       if (cfg_do_ipv4)
+               run_test_v4();
+       if (cfg_do_ipv6)
+               run_test_v6();
+
+       fprintf(stderr, "OK\n");
+       return 0;
+}
diff --git a/tools/testing/selftests/net/udpgso.sh b/tools/testing/selftests/net/udpgso.sh
new file mode 100755 (executable)
index 0000000..fec24f5
--- /dev/null
@@ -0,0 +1,29 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Run a series of udpgso regression tests
+
+echo "ipv4 cmsg"
+./in_netns.sh ./udpgso -4 -C
+
+echo "ipv4 setsockopt"
+./in_netns.sh ./udpgso -4 -C -s
+
+echo "ipv6 cmsg"
+./in_netns.sh ./udpgso -6 -C
+
+echo "ipv6 setsockopt"
+./in_netns.sh ./udpgso -6 -C -s
+
+echo "ipv4 connected"
+./in_netns.sh ./udpgso -4 -c
+
+# blocked on 2nd loopback address
+# echo "ipv6 connected"
+# ./in_netns.sh ./udpgso -6 -c
+
+echo "ipv4 msg_more"
+./in_netns.sh ./udpgso -4 -C -m
+
+echo "ipv6 msg_more"
+./in_netns.sh ./udpgso -6 -C -m
diff --git a/tools/testing/selftests/net/udpgso_bench.sh b/tools/testing/selftests/net/udpgso_bench.sh
new file mode 100755 (executable)
index 0000000..792fa4d
--- /dev/null
@@ -0,0 +1,74 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Run a series of udpgso benchmarks
+
+wake_children() {
+       local -r jobs="$(jobs -p)"
+
+       if [[ "${jobs}" != "" ]]; then
+               kill -1 ${jobs} 2>/dev/null
+       fi
+}
+trap wake_children EXIT
+
+run_one() {
+       local -r args=$@
+
+       ./udpgso_bench_rx &
+       ./udpgso_bench_rx -t &
+
+       ./udpgso_bench_tx ${args}
+}
+
+run_in_netns() {
+       local -r args=$@
+
+       ./in_netns.sh $0 __subprocess ${args}
+}
+
+run_udp() {
+       local -r args=$@
+
+       echo "udp"
+       run_in_netns ${args}
+
+       echo "udp gso"
+       run_in_netns ${args} -S
+
+       echo "udp gso zerocopy"
+       run_in_netns ${args} -S -z
+}
+
+run_tcp() {
+       local -r args=$@
+
+       echo "tcp"
+       run_in_netns ${args} -t
+
+       echo "tcp zerocopy"
+       run_in_netns ${args} -t -z
+}
+
+run_all() {
+       local -r core_args="-l 4"
+       local -r ipv4_args="${core_args} -4 -D 127.0.0.1"
+       local -r ipv6_args="${core_args} -6 -D ::1"
+
+       echo "ipv4"
+       run_tcp "${ipv4_args}"
+       run_udp "${ipv4_args}"
+
+       echo "ipv6"
+       run_tcp "${ipv4_args}"
+       run_udp "${ipv6_args}"
+}
+
+if [[ $# -eq 0 ]]; then
+       run_all
+elif [[ $1 == "__subprocess" ]]; then
+       shift
+       run_one $@
+else
+       run_in_netns $@
+fi
diff --git a/tools/testing/selftests/net/udpgso_bench_rx.c b/tools/testing/selftests/net/udpgso_bench_rx.c
new file mode 100644 (file)
index 0000000..727cf67
--- /dev/null
@@ -0,0 +1,265 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <error.h>
+#include <errno.h>
+#include <limits.h>
+#include <linux/errqueue.h>
+#include <linux/if_packet.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <sched.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+static int  cfg_port           = 8000;
+static bool cfg_tcp;
+static bool cfg_verify;
+
+static bool interrupted;
+static unsigned long packets, bytes;
+
+static void sigint_handler(int signum)
+{
+       if (signum == SIGINT)
+               interrupted = true;
+}
+
+static unsigned long gettimeofday_ms(void)
+{
+       struct timeval tv;
+
+       gettimeofday(&tv, NULL);
+       return (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
+}
+
+static void do_poll(int fd)
+{
+       struct pollfd pfd;
+       int ret;
+
+       pfd.events = POLLIN;
+       pfd.revents = 0;
+       pfd.fd = fd;
+
+       do {
+               ret = poll(&pfd, 1, 10);
+               if (ret == -1)
+                       error(1, errno, "poll");
+               if (ret == 0)
+                       continue;
+               if (pfd.revents != POLLIN)
+                       error(1, errno, "poll: 0x%x expected 0x%x\n",
+                                       pfd.revents, POLLIN);
+       } while (!ret && !interrupted);
+}
+
+static int do_socket(bool do_tcp)
+{
+       struct sockaddr_in6 addr = {0};
+       int fd, val;
+
+       fd = socket(PF_INET6, cfg_tcp ? SOCK_STREAM : SOCK_DGRAM, 0);
+       if (fd == -1)
+               error(1, errno, "socket");
+
+       val = 1 << 21;
+       if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &val, sizeof(val)))
+               error(1, errno, "setsockopt rcvbuf");
+       val = 1;
+       if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &val, sizeof(val)))
+               error(1, errno, "setsockopt reuseport");
+
+       addr.sin6_family =      PF_INET6;
+       addr.sin6_port =        htons(cfg_port);
+       addr.sin6_addr =        in6addr_any;
+       if (bind(fd, (void *) &addr, sizeof(addr)))
+               error(1, errno, "bind");
+
+       if (do_tcp) {
+               int accept_fd = fd;
+
+               if (listen(accept_fd, 1))
+                       error(1, errno, "listen");
+
+               do_poll(accept_fd);
+
+               fd = accept(accept_fd, NULL, NULL);
+               if (fd == -1)
+                       error(1, errno, "accept");
+               if (close(accept_fd))
+                       error(1, errno, "close accept fd");
+       }
+
+       return fd;
+}
+
+/* Flush all outstanding bytes for the tcp receive queue */
+static void do_flush_tcp(int fd)
+{
+       int ret;
+
+       while (true) {
+               /* MSG_TRUNC flushes up to len bytes */
+               ret = recv(fd, NULL, 1 << 21, MSG_TRUNC | MSG_DONTWAIT);
+               if (ret == -1 && errno == EAGAIN)
+                       return;
+               if (ret == -1)
+                       error(1, errno, "flush");
+               if (ret == 0) {
+                       /* client detached */
+                       exit(0);
+               }
+
+               packets++;
+               bytes += ret;
+       }
+
+}
+
+static char sanitized_char(char val)
+{
+       return (val >= 'a' && val <= 'z') ? val : '.';
+}
+
+static void do_verify_udp(const char *data, int len)
+{
+       char cur = data[0];
+       int i;
+
+       /* verify contents */
+       if (cur < 'a' || cur > 'z')
+               error(1, 0, "data initial byte out of range");
+
+       for (i = 1; i < len; i++) {
+               if (cur == 'z')
+                       cur = 'a';
+               else
+                       cur++;
+
+               if (data[i] != cur)
+                       error(1, 0, "data[%d]: len %d, %c(%hhu) != %c(%hhu)\n",
+                             i, len,
+                             sanitized_char(data[i]), data[i],
+                             sanitized_char(cur), cur);
+       }
+}
+
+/* Flush all outstanding datagrams. Verify first few bytes of each. */
+static void do_flush_udp(int fd)
+{
+       static char rbuf[ETH_DATA_LEN];
+       int ret, len, budget = 256;
+
+       len = cfg_verify ? sizeof(rbuf) : 0;
+       while (budget--) {
+               /* MSG_TRUNC will make return value full datagram length */
+               ret = recv(fd, rbuf, len, MSG_TRUNC | MSG_DONTWAIT);
+               if (ret == -1 && errno == EAGAIN)
+                       return;
+               if (ret == -1)
+                       error(1, errno, "recv");
+               if (len) {
+                       if (ret == 0)
+                               error(1, errno, "recv: 0 byte datagram\n");
+
+                       do_verify_udp(rbuf, ret);
+               }
+
+               packets++;
+               bytes += ret;
+       }
+}
+
+static void usage(const char *filepath)
+{
+       error(1, 0, "Usage: %s [-tv] [-p port]", filepath);
+}
+
+static void parse_opts(int argc, char **argv)
+{
+       int c;
+
+       while ((c = getopt(argc, argv, "ptv")) != -1) {
+               switch (c) {
+               case 'p':
+                       cfg_port = htons(strtoul(optarg, NULL, 0));
+                       break;
+               case 't':
+                       cfg_tcp = true;
+                       break;
+               case 'v':
+                       cfg_verify = true;
+                       break;
+               }
+       }
+
+       if (optind != argc)
+               usage(argv[0]);
+
+       if (cfg_tcp && cfg_verify)
+               error(1, 0, "TODO: implement verify mode for tcp");
+}
+
+static void do_recv(void)
+{
+       unsigned long tnow, treport;
+       int fd;
+
+       fd = do_socket(cfg_tcp);
+
+       treport = gettimeofday_ms() + 1000;
+       do {
+               do_poll(fd);
+
+               if (cfg_tcp)
+                       do_flush_tcp(fd);
+               else
+                       do_flush_udp(fd);
+
+               tnow = gettimeofday_ms();
+               if (tnow > treport) {
+                       if (packets)
+                               fprintf(stderr,
+                                       "%s rx: %6lu MB/s %8lu calls/s\n",
+                                       cfg_tcp ? "tcp" : "udp",
+                                       bytes >> 20, packets);
+                       bytes = packets = 0;
+                       treport = tnow + 1000;
+               }
+
+       } while (!interrupted);
+
+       if (close(fd))
+               error(1, errno, "close");
+}
+
+int main(int argc, char **argv)
+{
+       parse_opts(argc, argv);
+
+       signal(SIGINT, sigint_handler);
+
+       do_recv();
+
+       return 0;
+}
diff --git a/tools/testing/selftests/net/udpgso_bench_tx.c b/tools/testing/selftests/net/udpgso_bench_tx.c
new file mode 100644 (file)
index 0000000..e821564
--- /dev/null
@@ -0,0 +1,420 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <netinet/if_ether.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#ifndef ETH_MAX_MTU
+#define ETH_MAX_MTU 0xFFFFU
+#endif
+
+#ifndef UDP_SEGMENT
+#define UDP_SEGMENT            103
+#endif
+
+#ifndef SO_ZEROCOPY
+#define SO_ZEROCOPY    60
+#endif
+
+#ifndef MSG_ZEROCOPY
+#define MSG_ZEROCOPY   0x4000000
+#endif
+
+#define NUM_PKT                100
+
+static bool    cfg_cache_trash;
+static int     cfg_cpu         = -1;
+static int     cfg_connected   = true;
+static int     cfg_family      = PF_UNSPEC;
+static uint16_t        cfg_mss;
+static int     cfg_payload_len = (1472 * 42);
+static int     cfg_port        = 8000;
+static int     cfg_runtime_ms  = -1;
+static bool    cfg_segment;
+static bool    cfg_sendmmsg;
+static bool    cfg_tcp;
+static bool    cfg_zerocopy;
+
+static socklen_t cfg_alen;
+static struct sockaddr_storage cfg_dst_addr;
+
+static bool interrupted;
+static char buf[NUM_PKT][ETH_MAX_MTU];
+
+static void sigint_handler(int signum)
+{
+       if (signum == SIGINT)
+               interrupted = true;
+}
+
+static unsigned long gettimeofday_ms(void)
+{
+       struct timeval tv;
+
+       gettimeofday(&tv, NULL);
+       return (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
+}
+
+static int set_cpu(int cpu)
+{
+       cpu_set_t mask;
+
+       CPU_ZERO(&mask);
+       CPU_SET(cpu, &mask);
+       if (sched_setaffinity(0, sizeof(mask), &mask))
+               error(1, 0, "setaffinity %d", cpu);
+
+       return 0;
+}
+
+static void setup_sockaddr(int domain, const char *str_addr, void *sockaddr)
+{
+       struct sockaddr_in6 *addr6 = (void *) sockaddr;
+       struct sockaddr_in *addr4 = (void *) sockaddr;
+
+       switch (domain) {
+       case PF_INET:
+               addr4->sin_family = AF_INET;
+               addr4->sin_port = htons(cfg_port);
+               if (inet_pton(AF_INET, str_addr, &(addr4->sin_addr)) != 1)
+                       error(1, 0, "ipv4 parse error: %s", str_addr);
+               break;
+       case PF_INET6:
+               addr6->sin6_family = AF_INET6;
+               addr6->sin6_port = htons(cfg_port);
+               if (inet_pton(AF_INET6, str_addr, &(addr6->sin6_addr)) != 1)
+                       error(1, 0, "ipv6 parse error: %s", str_addr);
+               break;
+       default:
+               error(1, 0, "illegal domain");
+       }
+}
+
+static void flush_zerocopy(int fd)
+{
+       struct msghdr msg = {0};        /* flush */
+       int ret;
+
+       while (1) {
+               ret = recvmsg(fd, &msg, MSG_ERRQUEUE);
+               if (ret == -1 && errno == EAGAIN)
+                       break;
+               if (ret == -1)
+                       error(1, errno, "errqueue");
+               if (msg.msg_flags != (MSG_ERRQUEUE | MSG_CTRUNC))
+                       error(1, 0, "errqueue: flags 0x%x\n", msg.msg_flags);
+               msg.msg_flags = 0;
+       }
+}
+
+static int send_tcp(int fd, char *data)
+{
+       int ret, done = 0, count = 0;
+
+       while (done < cfg_payload_len) {
+               ret = send(fd, data + done, cfg_payload_len - done,
+                          cfg_zerocopy ? MSG_ZEROCOPY : 0);
+               if (ret == -1)
+                       error(1, errno, "write");
+
+               done += ret;
+               count++;
+       }
+
+       return count;
+}
+
+static int send_udp(int fd, char *data)
+{
+       int ret, total_len, len, count = 0;
+
+       total_len = cfg_payload_len;
+
+       while (total_len) {
+               len = total_len < cfg_mss ? total_len : cfg_mss;
+
+               ret = sendto(fd, data, len, cfg_zerocopy ? MSG_ZEROCOPY : 0,
+                            cfg_connected ? NULL : (void *)&cfg_dst_addr,
+                            cfg_connected ? 0 : cfg_alen);
+               if (ret == -1)
+                       error(1, errno, "write");
+               if (ret != len)
+                       error(1, errno, "write: %uB != %uB\n", ret, len);
+
+               total_len -= len;
+               count++;
+       }
+
+       return count;
+}
+
+static int send_udp_sendmmsg(int fd, char *data)
+{
+       const int max_nr_msg = ETH_MAX_MTU / ETH_DATA_LEN;
+       struct mmsghdr mmsgs[max_nr_msg];
+       struct iovec iov[max_nr_msg];
+       unsigned int off = 0, left;
+       int i = 0, ret;
+
+       memset(mmsgs, 0, sizeof(mmsgs));
+
+       left = cfg_payload_len;
+       while (left) {
+               if (i == max_nr_msg)
+                       error(1, 0, "sendmmsg: exceeds max_nr_msg");
+
+               iov[i].iov_base = data + off;
+               iov[i].iov_len = cfg_mss < left ? cfg_mss : left;
+
+               mmsgs[i].msg_hdr.msg_iov = iov + i;
+               mmsgs[i].msg_hdr.msg_iovlen = 1;
+
+               off += iov[i].iov_len;
+               left -= iov[i].iov_len;
+               i++;
+       }
+
+       ret = sendmmsg(fd, mmsgs, i, cfg_zerocopy ? MSG_ZEROCOPY : 0);
+       if (ret == -1)
+               error(1, errno, "sendmmsg");
+
+       return ret;
+}
+
+static void send_udp_segment_cmsg(struct cmsghdr *cm)
+{
+       uint16_t *valp;
+
+       cm->cmsg_level = SOL_UDP;
+       cm->cmsg_type = UDP_SEGMENT;
+       cm->cmsg_len = CMSG_LEN(sizeof(cfg_mss));
+       valp = (void *)CMSG_DATA(cm);
+       *valp = cfg_mss;
+}
+
+static int send_udp_segment(int fd, char *data)
+{
+       char control[CMSG_SPACE(sizeof(cfg_mss))] = {0};
+       struct msghdr msg = {0};
+       struct iovec iov = {0};
+       int ret;
+
+       iov.iov_base = data;
+       iov.iov_len = cfg_payload_len;
+
+       msg.msg_iov = &iov;
+       msg.msg_iovlen = 1;
+
+       msg.msg_control = control;
+       msg.msg_controllen = sizeof(control);
+       send_udp_segment_cmsg(CMSG_FIRSTHDR(&msg));
+
+       msg.msg_name = (void *)&cfg_dst_addr;
+       msg.msg_namelen = cfg_alen;
+
+       ret = sendmsg(fd, &msg, cfg_zerocopy ? MSG_ZEROCOPY : 0);
+       if (ret == -1)
+               error(1, errno, "sendmsg");
+       if (ret != iov.iov_len)
+               error(1, 0, "sendmsg: %u != %lu\n", ret, iov.iov_len);
+
+       return 1;
+}
+
+static void usage(const char *filepath)
+{
+       error(1, 0, "Usage: %s [-46cmStuz] [-C cpu] [-D dst ip] [-l secs] [-p port] [-s sendsize]",
+                   filepath);
+}
+
+static void parse_opts(int argc, char **argv)
+{
+       int max_len, hdrlen;
+       int c;
+
+       while ((c = getopt(argc, argv, "46cC:D:l:mp:s:Stuz")) != -1) {
+               switch (c) {
+               case '4':
+                       if (cfg_family != PF_UNSPEC)
+                               error(1, 0, "Pass one of -4 or -6");
+                       cfg_family = PF_INET;
+                       cfg_alen = sizeof(struct sockaddr_in);
+                       break;
+               case '6':
+                       if (cfg_family != PF_UNSPEC)
+                               error(1, 0, "Pass one of -4 or -6");
+                       cfg_family = PF_INET6;
+                       cfg_alen = sizeof(struct sockaddr_in6);
+                       break;
+               case 'c':
+                       cfg_cache_trash = true;
+                       break;
+               case 'C':
+                       cfg_cpu = strtol(optarg, NULL, 0);
+                       break;
+               case 'D':
+                       setup_sockaddr(cfg_family, optarg, &cfg_dst_addr);
+                       break;
+               case 'l':
+                       cfg_runtime_ms = strtoul(optarg, NULL, 10) * 1000;
+                       break;
+               case 'm':
+                       cfg_sendmmsg = true;
+                       break;
+               case 'p':
+                       cfg_port = strtoul(optarg, NULL, 0);
+                       break;
+               case 's':
+                       cfg_payload_len = strtoul(optarg, NULL, 0);
+                       break;
+               case 'S':
+                       cfg_segment = true;
+                       break;
+               case 't':
+                       cfg_tcp = true;
+                       break;
+               case 'u':
+                       cfg_connected = false;
+                       break;
+               case 'z':
+                       cfg_zerocopy = true;
+                       break;
+               }
+       }
+
+       if (optind != argc)
+               usage(argv[0]);
+
+       if (cfg_family == PF_UNSPEC)
+               error(1, 0, "must pass one of -4 or -6");
+       if (cfg_tcp && !cfg_connected)
+               error(1, 0, "connectionless tcp makes no sense");
+       if (cfg_segment && cfg_sendmmsg)
+               error(1, 0, "cannot combine segment offload and sendmmsg");
+
+       if (cfg_family == PF_INET)
+               hdrlen = sizeof(struct iphdr) + sizeof(struct udphdr);
+       else
+               hdrlen = sizeof(struct ip6_hdr) + sizeof(struct udphdr);
+
+       cfg_mss = ETH_DATA_LEN - hdrlen;
+       max_len = ETH_MAX_MTU - hdrlen;
+
+       if (cfg_payload_len > max_len)
+               error(1, 0, "payload length %u exceeds max %u",
+                     cfg_payload_len, max_len);
+}
+
+static void set_pmtu_discover(int fd, bool is_ipv4)
+{
+       int level, name, val;
+
+       if (is_ipv4) {
+               level   = SOL_IP;
+               name    = IP_MTU_DISCOVER;
+               val     = IP_PMTUDISC_DO;
+       } else {
+               level   = SOL_IPV6;
+               name    = IPV6_MTU_DISCOVER;
+               val     = IPV6_PMTUDISC_DO;
+       }
+
+       if (setsockopt(fd, level, name, &val, sizeof(val)))
+               error(1, errno, "setsockopt path mtu");
+}
+
+int main(int argc, char **argv)
+{
+       unsigned long num_msgs, num_sends;
+       unsigned long tnow, treport, tstop;
+       int fd, i, val;
+
+       parse_opts(argc, argv);
+
+       if (cfg_cpu > 0)
+               set_cpu(cfg_cpu);
+
+       for (i = 0; i < sizeof(buf[0]); i++)
+               buf[0][i] = 'a' + (i % 26);
+       for (i = 1; i < NUM_PKT; i++)
+               memcpy(buf[i], buf[0], sizeof(buf[0]));
+
+       signal(SIGINT, sigint_handler);
+
+       fd = socket(cfg_family, cfg_tcp ? SOCK_STREAM : SOCK_DGRAM, 0);
+       if (fd == -1)
+               error(1, errno, "socket");
+
+       if (cfg_zerocopy) {
+               val = 1;
+               if (setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, &val, sizeof(val)))
+                       error(1, errno, "setsockopt zerocopy");
+       }
+
+       if (cfg_connected &&
+           connect(fd, (void *)&cfg_dst_addr, cfg_alen))
+               error(1, errno, "connect");
+
+       if (cfg_segment)
+               set_pmtu_discover(fd, cfg_family == PF_INET);
+
+       num_msgs = num_sends = 0;
+       tnow = gettimeofday_ms();
+       tstop = tnow + cfg_runtime_ms;
+       treport = tnow + 1000;
+
+       i = 0;
+       do {
+               if (cfg_tcp)
+                       num_sends += send_tcp(fd, buf[i]);
+               else if (cfg_segment)
+                       num_sends += send_udp_segment(fd, buf[i]);
+               else if (cfg_sendmmsg)
+                       num_sends += send_udp_sendmmsg(fd, buf[i]);
+               else
+                       num_sends += send_udp(fd, buf[i]);
+               num_msgs++;
+
+               if (cfg_zerocopy && ((num_msgs & 0xF) == 0))
+                       flush_zerocopy(fd);
+
+               tnow = gettimeofday_ms();
+               if (tnow > treport) {
+                       fprintf(stderr,
+                               "%s tx: %6lu MB/s %8lu calls/s %6lu msg/s\n",
+                               cfg_tcp ? "tcp" : "udp",
+                               (num_msgs * cfg_payload_len) >> 20,
+                               num_sends, num_msgs);
+                       num_msgs = num_sends = 0;
+                       treport = tnow + 1000;
+               }
+
+               /* cold cache when writing buffer */
+               if (cfg_cache_trash)
+                       i = ++i < NUM_PKT ? i : 0;
+
+       } while (!interrupted && (cfg_runtime_ms == -1 || tnow < tstop));
+
+       if (close(fd))
+               error(1, errno, "close");
+
+       return 0;
+}
index 5b012f4981d4c411329d073e7fbcb832d090c1c0..6f289a49e5ecf01552026bef35534a93d8cd136c 100644 (file)
@@ -66,7 +66,7 @@
         "cmdUnderTest": "$TC action add action bpf object-file _b.o index 667",
         "expExitCode": "0",
         "verifyCmd": "$TC action get action bpf index 667",
-        "matchPattern": "action order [0-9]*: bpf _b.o:\\[action\\] id [0-9]* tag 3b185187f1855c4c default-action pipe.*index 667 ref",
+        "matchPattern": "action order [0-9]*: bpf _b.o:\\[action\\] id [0-9]* tag 3b185187f1855c4c( jited)? default-action pipe.*index 667 ref",
         "matchCount": "1",
         "teardown": [
             "$TC action flush action bpf",
         "cmdUnderTest": "$TC action add action bpf object-file _c.o index 667",
         "expExitCode": "255",
         "verifyCmd": "$TC action get action bpf index 667",
-        "matchPattern": "action order [0-9]*: bpf _b.o:\\[action\\] id [0-9].*index 667 ref",
+        "matchPattern": "action order [0-9]*: bpf _c.o:\\[action\\] id [0-9].*index 667 ref",
         "matchCount": "0",
         "teardown": [
-            "$TC action flush action bpf",
+            [
+                "$TC action flush action bpf",
+                0,
+                1,
+                255
+            ],
             "rm -f _c.o"
         ]
     },
index 93cf8fea8ae76d63eefaefe7063ed7daa864f0b6..3a2f51fc7fd457368d7e3a92e1d8187431ce1c5d 100644 (file)
                 255
             ]
         ],
-        "cmdUnderTest": "for i in `seq 1 32`; do cmd=\"action csum tcp continue index $i \"; args=\"$args$cmd\"; done && $TC actions add $args",
-        "expExitCode": "255",
+        "cmdUnderTest": "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action csum tcp continue index \\$i \\\"; args=\"\\$args\\$cmd\"; done && $TC actions add \\$args\"",
+        "expExitCode": "0",
         "verifyCmd": "$TC actions ls action csum",
         "matchPattern": "^[ \t]+index [0-9]* ref",
         "matchCount": "32",
         "teardown": [
             "$TC actions flush action csum"
         ]
+    },
+    {
+        "id": "b4e9",
+        "name": "Delete batch of 32 csum actions",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ],
+            "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action csum tcp continue index \\$i \\\"; args=\"\\$args\\$cmd\"; done && $TC actions add \\$args\""
+        ],
+        "cmdUnderTest": "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action csum index \\$i \\\"; args=\"\\$args\\$cmd\"; done && $TC actions del \\$args\"",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action csum",
+        "matchPattern": "^[ \t]+index [0-9]+ ref",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "0015",
+        "name": "Add batch of 32 csum tcp actions with large cookies",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action csum tcp continue index \\$i cookie aaabbbcccdddeee \\\"; args=\"\\$args\\$cmd\"; done && $TC actions add \\$args\"",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions ls action csum",
+        "matchPattern": "^[ \t]+index [0-9]* ref",
+        "matchCount": "32",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "989e",
+        "name": "Delete batch of 32 csum actions with large cookies",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ],
+            "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action csum tcp continue index \\$i cookie aaabbbcccdddeee \\\"; args=\"\\$args\\$cmd\"; done && $TC actions add \\$args\""
+        ],
+        "cmdUnderTest": "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action csum index \\$i \\\"; args=\"\\$args\\$cmd\"; done && $TC actions del \\$args\"",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action csum",
+        "matchPattern": "^[ \t]+index [0-9]+ ref",
+        "matchCount": "0",
+        "teardown": []
     }
 ]
index 03777730ef29a8e054ac0c04806e4e3ea556484e..de97e4ff705cd1212840f01e975003b6bb99de84 100644 (file)
@@ -20,7 +20,7 @@
         "matchPattern": "action order [0-9]*: ife encode action pass.*type 0xED3E.*allow mark.*index 2",
         "matchCount": "1",
         "teardown": [
-            "$TC actions flush action skbedit"
+            "$TC actions flush action ife"
         ]
     },
     {
@@ -44,7 +44,7 @@
         "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*use mark.*index 2",
         "matchCount": "1",
         "teardown": [
-            "$TC actions flush action skbedit"
+            "$TC actions flush action ife"
         ]
     },
     {
@@ -68,7 +68,7 @@
         "matchPattern": "action order [0-9]*: ife encode action continue.*type 0xED3E.*allow mark.*index 2",
         "matchCount": "1",
         "teardown": [
-            "$TC actions flush action skbedit"
+            "$TC actions flush action ife"
         ]
     },
     {
@@ -92,7 +92,7 @@
         "matchPattern": "action order [0-9]*: ife encode action drop.*type 0xED3E.*use mark 789.*index 2",
         "matchCount": "1",
         "teardown": [
-            "$TC actions flush action skbedit"
+            "$TC actions flush action ife"
         ]
     },
     {
         "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0xED3E.*use mark 656768.*index 2",
         "matchCount": "1",
         "teardown": [
-            "$TC actions flush action skbedit"
+            "$TC actions flush action ife"
         ]
     },
     {
         "matchPattern": "action order [0-9]*: ife encode action jump 1.*type 0xED3E.*use mark 65.*index 2",
         "matchCount": "1",
         "teardown": [
-            "$TC actions flush action skbedit"
+            "$TC actions flush action ife"
         ]
     },
     {
         "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0xED3E.*use mark 4294967295.*index 90",
         "matchCount": "1",
         "teardown": [
-            "$TC actions flush action skbedit"
+            "$TC actions flush action ife"
         ]
     },
     {
         "matchPattern": "action order [0-9]*: ife encode action pass.*type 0xED3E.*allow prio.*index 9",
         "matchCount": "1",
         "teardown": [
-            "$TC actions flush action skbedit"
+            "$TC actions flush action ife"
         ]
     },
     {
         "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*use prio 7.*index 9",
         "matchCount": "1",
         "teardown": [
-            "$TC actions flush action skbedit"
+            "$TC actions flush action ife"
         ]
     },
     {
         "matchPattern": "action order [0-9]*: ife encode action continue.*type 0xED3E.*use prio 3.*index 9",
         "matchCount": "1",
         "teardown": [
-            "$TC actions flush action skbedit"
+            "$TC actions flush action ife"
         ]
     },
     {
         "matchPattern": "action order [0-9]*: ife encode action drop.*type 0xED3E.*allow prio.*index 9",
         "matchCount": "1",
         "teardown": [
-            "$TC actions flush action skbedit"
+            "$TC actions flush action ife"
         ]
     },
     {
         "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0xED3E.*use prio 998877.*index 9",
         "matchCount": "1",
         "teardown": [
-            "$TC actions flush action skbedit"
+            "$TC actions flush action ife"
         ]
     },
     {
         "matchPattern": "action order [0-9]*: ife encode action jump 10.*type 0xED3E.*use prio 998877.*index 9",
         "matchCount": "1",
         "teardown": [
-            "$TC actions flush action skbedit"
+            "$TC actions flush action ife"
         ]
     },
     {
         "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0xED3E.*use prio 4294967295.*index 99",
         "matchCount": "1",
         "teardown": [
-            "$TC actions flush action skbedit"
+            "$TC actions flush action ife"
         ]
     },
     {
index 443c9b3c8664b9bc2e279e601d9585f4edad8e05..6e4edfae1799f110178d97dc58e57ba825105bd9 100644 (file)
     },
     {
         "id": "8b69",
-        "name": "Add mirred mirror action with maximum index",
+        "name": "Add mirred mirror action with index at 32-bit maximum",
         "category": [
             "actions",
             "mirred"
             "$TC actions flush action mirred"
         ]
     },
+    {
+        "id": "3f66",
+        "name": "Add mirred mirror action with index exceeding 32-bit maximum",
+        "category": [
+            "actions",
+            "mirred"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mirred",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mirred ingress mirror dev lo pipe index 429496729555",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action mirred index 429496729555",
+        "matchPattern": "action order [0-9]*: mirred \\(Ingress Mirror to device lo\\) pipe.*index 429496729555",
+        "matchCount": "0",
+        "teardown": []
+    },
     {
         "id": "a70e",
         "name": "Delete mirred mirror action",
index 38d85a1d7492d7aed5324a13cba14e352911bfa3..f03763d816172e9a825ca896f0f3733e2109d503 100644 (file)
         ],
         "cmdUnderTest": "$TC actions add action police rate 10mbit burst 10k index 4294967295",
         "expExitCode": "0",
-        "verifyCmd": "$TC actions get action mirred index 4294967295",
+        "verifyCmd": "$TC actions get action police index 4294967295",
         "matchPattern": "action order [0-9]*:  police 0xffffffff rate 10Mbit burst 10Kb mtu 2Kb",
         "matchCount": "1",
         "teardown": [
-            "$TC actions flush action mirred"
+            "$TC actions flush action police"
         ]
     },
     {
index 4510ddfa6e54f3b067cee636ff357ba5e914c058..69ea09eefffc27290b5c16b084f8ff3798c184e8 100644 (file)
@@ -1,7 +1,7 @@
 [
     {
         "id": "6f5a",
-        "name": "Add vlan pop action",
+        "name": "Add vlan pop action with pipe opcode",
         "category": [
             "actions",
             "vlan"
                 255
             ]
         ],
-        "cmdUnderTest": "$TC actions add action vlan pop index 8",
+        "cmdUnderTest": "$TC actions add action vlan pop pipe index 8",
         "expExitCode": "0",
         "verifyCmd": "$TC actions list action vlan",
-        "matchPattern": "action order [0-9]+: vlan.*pop.*index 8 ref",
+        "matchPattern": "action order [0-9]+: vlan.*pop.*pipe.*index 8 ref",
         "matchCount": "1",
         "teardown": [
             "$TC actions flush action vlan"
         ]
     },
     {
-        "id": "ee6f",
-        "name": "Add vlan pop action with large index",
+        "id": "df35",
+        "name": "Add vlan pop action with pass opcode",
         "category": [
             "actions",
             "vlan"
                 255
             ]
         ],
-        "cmdUnderTest": "$TC actions add action vlan pop index 4294967295",
+        "cmdUnderTest": "$TC actions add action vlan pop pass index 8",
         "expExitCode": "0",
-        "verifyCmd": "$TC actions list action vlan",
-        "matchPattern": "action order [0-9]+: vlan.*pop.*index 4294967295 ref",
+        "verifyCmd": "$TC actions get action vlan index 8",
+        "matchPattern": "action order [0-9]+: vlan.*pop.*pass.*index 8 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "b0d4",
+        "name": "Add vlan pop action with drop opcode",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan pop drop index 8",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action vlan index 8",
+        "matchPattern": "action order [0-9]+: vlan.*pop.*drop.*index 8 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "95ee",
+        "name": "Add vlan pop action with reclassify opcode",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan pop reclassify index 8",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action vlan index 8",
+        "matchPattern": "action order [0-9]+: vlan.*pop.*reclassify.*index 8 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "0283",
+        "name": "Add vlan pop action with continue opcode",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan pop continue index 8",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action vlan index 8",
+        "matchPattern": "action order [0-9]+: vlan.*pop.*continue.*index 8 ref",
         "matchCount": "1",
         "teardown": [
             "$TC actions flush action vlan"
             "$TC actions flush action vlan"
         ]
     },
+    {
+        "id": "a178",
+        "name": "Add vlan pop action with invalid opcode",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan pop foo index 8",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action vlan",
+        "matchPattern": "action order [0-9]+: vlan.*pop.*foo.*index 8 ref",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "ee6f",
+        "name": "Add vlan pop action with index at 32-bit maximum",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan pop index 4294967295",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action vlan",
+        "matchPattern": "action order [0-9]+: vlan.*pop.*index 4294967295 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "0dfa",
+        "name": "Add vlan pop action with index exceeding 32-bit maximum",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan pop reclassify index 429496729599",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action vlan index 429496729599",
+        "matchPattern": "action order [0-9]+: vlan.*pop.reclassify.*index 429496729599",
+        "matchCount": "0",
+        "teardown": []
+    },
     {
         "id": "2b91",
         "name": "Add vlan invalid action",
         "verifyCmd": "$TC actions list action vlan",
         "matchPattern": "action order [0-9]+: vlan.*bad_mode",
         "matchCount": "0",
-        "teardown": [
-            "$TC actions flush action vlan"
-        ]
+        "teardown": []
     },
     {
         "id": "57fc",
-        "name": "Add vlan action with invalid protocol type",
+        "name": "Add vlan push action with invalid protocol type",
         "category": [
             "actions",
             "vlan"
         "verifyCmd": "$TC actions list action vlan",
         "matchPattern": "action order [0-9]+: vlan.*push",
         "matchCount": "0",
-        "teardown": [
-            "$TC actions flush action vlan"
-        ]
+        "teardown": []
     },
     {
         "id": "3989",
             "$TC actions flush action vlan"
         ]
     },
+    {
+        "id": "1f4b",
+        "name": "Add vlan push action with maximum 12-bit vlan ID",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan push id 4094 index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action vlan index 1",
+        "matchPattern": "action order [0-9]+: vlan.*push id 4094.*protocol 802.1Q.*priority 0.*index 1 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
     {
         "id": "1f7b",
         "name": "Add vlan push action with invalid vlan ID",
             "$TC actions flush action vlan"
         ]
     },
+    {
+        "id": "fe40",
+        "name": "Add vlan push action with maximum 3-bit IEEE 802.1p priority",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan push id 4 priority 7 reclassify index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action vlan index 1",
+        "matchPattern": "action order [0-9]+: vlan.*push id 4.*protocol 802.1Q.*priority 7.*reclassify.*index 1 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
     {
         "id": "5d02",
         "name": "Add vlan push action with invalid IEEE 802.1p priority",
         "verifyCmd": "$TC actions list action vlan",
         "matchPattern": "action order [0-9]+: vlan.*push id 5.*index 1 ref",
         "matchCount": "0",
-        "teardown": [
-            "$TC actions flush action vlan"
-        ]
+        "teardown": []
     },
     {
         "id": "6812",
             "$TC actions flush action vlan"
         ]
     },
+    {
+        "id": "3deb",
+        "name": "Replace existing vlan push action with new ID",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action vlan push id 500 pipe index 12"
+        ],
+        "cmdUnderTest": "$TC actions replace action vlan push id 700 pipe index 12",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action vlan index 12",
+        "matchPattern": "action order [0-9]+: vlan.*push id 700 protocol 802.1Q priority 0 pipe.*index 12 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "9e76",
+        "name": "Replace existing vlan push action with new protocol",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action vlan push id 1 protocol 802.1Q pipe index 1"
+        ],
+        "cmdUnderTest": "$TC actions replace action vlan push id 1 protocol 802.1ad pipe index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action vlan index 1",
+        "matchPattern": "action order [0-9]+: vlan.*push id 1 protocol 802.1ad priority 0 pipe.*index 1 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "ede4",
+        "name": "Replace existing vlan push action with new priority",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action vlan push id 1 protocol 802.1Q priority 3 reclassify index 1"
+        ],
+        "cmdUnderTest": "$TC actions replace action vlan push id 1 priority 4 reclassify index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action vlan index 1",
+        "matchPattern": "action order [0-9]+: vlan.*push id 1 protocol 802.1Q priority 4 reclassify.*index 1 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "d413",
+        "name": "Replace existing vlan pop action with new cookie",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action vlan pop continue index 1 cookie 22334455"
+        ],
+        "cmdUnderTest": "$TC actions replace action vlan pop continue index 1 cookie a1b1c2d1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action vlan index 1",
+        "matchPattern": "action order [0-9]+: vlan.*pop continue.*index 1 ref.*cookie a1b1c2d1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
     {
         "id": "83a4",
         "name": "Delete vlan pop action",
     },
     {
         "id": "1d78",
-        "name": "Add vlan action with cookie",
+        "name": "Add vlan push action with cookie",
         "category": [
             "actions",
             "vlan"
index d744991c0f4f44d56bda208ad3039ad81500f303..39f66bc29b8249f059886fb57f2a3e75dd81975a 100644 (file)
@@ -11,7 +11,7 @@ CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) trivial_64bit_program.c)
 
 TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \
                        check_initial_reg_state sigreturn iopl mpx-mini-test ioperm \
-                       protection_keys test_vdso test_vsyscall
+                       protection_keys test_vdso test_vsyscall mov_ss_trap
 TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
                        test_FCMOV test_FCOMI test_FISTTP \
                        vdso_restorer
diff --git a/tools/testing/selftests/x86/mov_ss_trap.c b/tools/testing/selftests/x86/mov_ss_trap.c
new file mode 100644 (file)
index 0000000..3c3a022
--- /dev/null
@@ -0,0 +1,285 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * mov_ss_trap.c: Exercise the bizarre side effects of a watchpoint on MOV SS
+ *
+ * This does MOV SS from a watchpointed address followed by various
+ * types of kernel entries.  A MOV SS that hits a watchpoint will queue
+ * up a #DB trap but will not actually deliver that trap.  The trap
+ * will be delivered after the next instruction instead.  The CPU's logic
+ * seems to be:
+ *
+ *  - Any fault: drop the pending #DB trap.
+ *  - INT $N, INT3, INTO, SYSCALL, SYSENTER: enter the kernel and then
+ *    deliver #DB.
+ *  - ICEBP: enter the kernel but do not deliver the watchpoint trap
+ *  - breakpoint: only one #DB is delivered (phew!)
+ *
+ * There are plenty of ways for a kernel to handle this incorrectly.  This
+ * test tries to exercise all the cases.
+ *
+ * This should mostly cover CVE-2018-1087 and CVE-2018-8897.
+ */
+#define _GNU_SOURCE
+
+#include <stdlib.h>
+#include <sys/ptrace.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/user.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <errno.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <err.h>
+#include <string.h>
+#include <setjmp.h>
+#include <sys/prctl.h>
+
+#define X86_EFLAGS_RF (1UL << 16)
+
+#if __x86_64__
+# define REG_IP REG_RIP
+#else
+# define REG_IP REG_EIP
+#endif
+
+unsigned short ss;
+extern unsigned char breakpoint_insn[];
+sigjmp_buf jmpbuf;
+static unsigned char altstack_data[SIGSTKSZ];
+
+static void enable_watchpoint(void)
+{
+       pid_t parent = getpid();
+       int status;
+
+       pid_t child = fork();
+       if (child < 0)
+               err(1, "fork");
+
+       if (child) {
+               if (waitpid(child, &status, 0) != child)
+                       err(1, "waitpid for child");
+       } else {
+               unsigned long dr0, dr1, dr7;
+
+               dr0 = (unsigned long)&ss;
+               dr1 = (unsigned long)breakpoint_insn;
+               dr7 = ((1UL << 1) |     /* G0 */
+                      (3UL << 16) |    /* RW0 = read or write */
+                      (1UL << 18) |    /* LEN0 = 2 bytes */
+                      (1UL << 3));     /* G1, RW1 = insn */
+
+               if (ptrace(PTRACE_ATTACH, parent, NULL, NULL) != 0)
+                       err(1, "PTRACE_ATTACH");
+
+               if (waitpid(parent, &status, 0) != parent)
+                       err(1, "waitpid for child");
+
+               if (ptrace(PTRACE_POKEUSER, parent, (void *)offsetof(struct user, u_debugreg[0]), dr0) != 0)
+                       err(1, "PTRACE_POKEUSER DR0");
+
+               if (ptrace(PTRACE_POKEUSER, parent, (void *)offsetof(struct user, u_debugreg[1]), dr1) != 0)
+                       err(1, "PTRACE_POKEUSER DR1");
+
+               if (ptrace(PTRACE_POKEUSER, parent, (void *)offsetof(struct user, u_debugreg[7]), dr7) != 0)
+                       err(1, "PTRACE_POKEUSER DR7");
+
+               printf("\tDR0 = %lx, DR1 = %lx, DR7 = %lx\n", dr0, dr1, dr7);
+
+               if (ptrace(PTRACE_DETACH, parent, NULL, NULL) != 0)
+                       err(1, "PTRACE_DETACH");
+
+               exit(0);
+       }
+}
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+                      int flags)
+{
+       struct sigaction sa;
+       memset(&sa, 0, sizeof(sa));
+       sa.sa_sigaction = handler;
+       sa.sa_flags = SA_SIGINFO | flags;
+       sigemptyset(&sa.sa_mask);
+       if (sigaction(sig, &sa, 0))
+               err(1, "sigaction");
+}
+
+static char const * const signames[] = {
+       [SIGSEGV] = "SIGSEGV",
+       [SIGBUS] = "SIBGUS",
+       [SIGTRAP] = "SIGTRAP",
+       [SIGILL] = "SIGILL",
+};
+
+static void sigtrap(int sig, siginfo_t *si, void *ctx_void)
+{
+       ucontext_t *ctx = ctx_void;
+
+       printf("\tGot SIGTRAP with RIP=%lx, EFLAGS.RF=%d\n",
+              (unsigned long)ctx->uc_mcontext.gregs[REG_IP],
+              !!(ctx->uc_mcontext.gregs[REG_EFL] & X86_EFLAGS_RF));
+}
+
+static void handle_and_return(int sig, siginfo_t *si, void *ctx_void)
+{
+       ucontext_t *ctx = ctx_void;
+
+       printf("\tGot %s with RIP=%lx\n", signames[sig],
+              (unsigned long)ctx->uc_mcontext.gregs[REG_IP]);
+}
+
+static void handle_and_longjmp(int sig, siginfo_t *si, void *ctx_void)
+{
+       ucontext_t *ctx = ctx_void;
+
+       printf("\tGot %s with RIP=%lx\n", signames[sig],
+              (unsigned long)ctx->uc_mcontext.gregs[REG_IP]);
+
+       siglongjmp(jmpbuf, 1);
+}
+
+int main()
+{
+       unsigned long nr;
+
+       asm volatile ("mov %%ss, %[ss]" : [ss] "=m" (ss));
+       printf("\tSS = 0x%hx, &SS = 0x%p\n", ss, &ss);
+
+       if (prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY, 0, 0, 0) == 0)
+               printf("\tPR_SET_PTRACER_ANY succeeded\n");
+
+       printf("\tSet up a watchpoint\n");
+       sethandler(SIGTRAP, sigtrap, 0);
+       enable_watchpoint();
+
+       printf("[RUN]\tRead from watched memory (should get SIGTRAP)\n");
+       asm volatile ("mov %[ss], %[tmp]" : [tmp] "=r" (nr) : [ss] "m" (ss));
+
+       printf("[RUN]\tMOV SS; INT3\n");
+       asm volatile ("mov %[ss], %%ss; int3" :: [ss] "m" (ss));
+
+       printf("[RUN]\tMOV SS; INT 3\n");
+       asm volatile ("mov %[ss], %%ss; .byte 0xcd, 0x3" :: [ss] "m" (ss));
+
+       printf("[RUN]\tMOV SS; CS CS INT3\n");
+       asm volatile ("mov %[ss], %%ss; .byte 0x2e, 0x2e; int3" :: [ss] "m" (ss));
+
+       printf("[RUN]\tMOV SS; CSx14 INT3\n");
+       asm volatile ("mov %[ss], %%ss; .fill 14,1,0x2e; int3" :: [ss] "m" (ss));
+
+       printf("[RUN]\tMOV SS; INT 4\n");
+       sethandler(SIGSEGV, handle_and_return, SA_RESETHAND);
+       asm volatile ("mov %[ss], %%ss; int $4" :: [ss] "m" (ss));
+
+#ifdef __i386__
+       printf("[RUN]\tMOV SS; INTO\n");
+       sethandler(SIGSEGV, handle_and_return, SA_RESETHAND);
+       nr = -1;
+       asm volatile ("add $1, %[tmp]; mov %[ss], %%ss; into"
+                     : [tmp] "+r" (nr) : [ss] "m" (ss));
+#endif
+
+       if (sigsetjmp(jmpbuf, 1) == 0) {
+               printf("[RUN]\tMOV SS; ICEBP\n");
+
+               /* Some emulators (e.g. QEMU TCG) don't emulate ICEBP. */
+               sethandler(SIGILL, handle_and_longjmp, SA_RESETHAND);
+
+               asm volatile ("mov %[ss], %%ss; .byte 0xf1" :: [ss] "m" (ss));
+       }
+
+       if (sigsetjmp(jmpbuf, 1) == 0) {
+               printf("[RUN]\tMOV SS; CLI\n");
+               sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND);
+               asm volatile ("mov %[ss], %%ss; cli" :: [ss] "m" (ss));
+       }
+
+       if (sigsetjmp(jmpbuf, 1) == 0) {
+               printf("[RUN]\tMOV SS; #PF\n");
+               sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND);
+               asm volatile ("mov %[ss], %%ss; mov (-1), %[tmp]"
+                             : [tmp] "=r" (nr) : [ss] "m" (ss));
+       }
+
+       /*
+        * INT $1: if #DB has DPL=3 and there isn't special handling,
+        * then the kernel will die.
+        */
+       if (sigsetjmp(jmpbuf, 1) == 0) {
+               printf("[RUN]\tMOV SS; INT 1\n");
+               sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND);
+               asm volatile ("mov %[ss], %%ss; int $1" :: [ss] "m" (ss));
+       }
+
+#ifdef __x86_64__
+       /*
+        * In principle, we should test 32-bit SYSCALL as well, but
+        * the calling convention is so unpredictable that it's
+        * not obviously worth the effort.
+        */
+       if (sigsetjmp(jmpbuf, 1) == 0) {
+               printf("[RUN]\tMOV SS; SYSCALL\n");
+               sethandler(SIGILL, handle_and_longjmp, SA_RESETHAND);
+               nr = SYS_getpid;
+               /*
+                * Toggle the high bit of RSP to make it noncanonical to
+                * strengthen this test on non-SMAP systems.
+                */
+               asm volatile ("btc $63, %%rsp\n\t"
+                             "mov %[ss], %%ss; syscall\n\t"
+                             "btc $63, %%rsp"
+                             : "+a" (nr) : [ss] "m" (ss)
+                             : "rcx"
+#ifdef __x86_64__
+                               , "r11"
+#endif
+                       );
+       }
+#endif
+
+       printf("[RUN]\tMOV SS; breakpointed NOP\n");
+       asm volatile ("mov %[ss], %%ss; breakpoint_insn: nop" :: [ss] "m" (ss));
+
+       /*
+        * Invoking SYSENTER directly breaks all the rules.  Just handle
+        * the SIGSEGV.
+        */
+       if (sigsetjmp(jmpbuf, 1) == 0) {
+               printf("[RUN]\tMOV SS; SYSENTER\n");
+               stack_t stack = {
+                       .ss_sp = altstack_data,
+                       .ss_size = SIGSTKSZ,
+               };
+               if (sigaltstack(&stack, NULL) != 0)
+                       err(1, "sigaltstack");
+               sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND | SA_ONSTACK);
+               nr = SYS_getpid;
+               asm volatile ("mov %[ss], %%ss; SYSENTER" : "+a" (nr)
+                             : [ss] "m" (ss) : "flags", "rcx"
+#ifdef __x86_64__
+                               , "r11"
+#endif
+                       );
+
+               /* We're unreachable here.  SYSENTER forgets RIP. */
+       }
+
+       if (sigsetjmp(jmpbuf, 1) == 0) {
+               printf("[RUN]\tMOV SS; INT $0x80\n");
+               sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND);
+               nr = 20;        /* compat getpid */
+               asm volatile ("mov %[ss], %%ss; int $0x80"
+                             : "+a" (nr) : [ss] "m" (ss)
+                             : "flags"
+#ifdef __x86_64__
+                               , "r8", "r9", "r10", "r11"
+#endif
+                       );
+       }
+
+       printf("[OK]\tI aten't dead\n");
+       return 0;
+}
index 9c0325e1ea6844f666bfdcc8204763a8614b9875..50f7e92724813a3525154ede4f2b282af7e5a839 100644 (file)
@@ -368,6 +368,11 @@ static int expected_bnd_index = -1;
 uint64_t shadow_plb[NR_MPX_BOUNDS_REGISTERS][2]; /* shadow MPX bound registers */
 unsigned long shadow_map[NR_MPX_BOUNDS_REGISTERS];
 
+/* Failed address bound checks: */
+#ifndef SEGV_BNDERR
+# define SEGV_BNDERR   3
+#endif
+
 /*
  * The kernel is supposed to provide some information about the bounds
  * exception in the siginfo.  It should match what we have in the bounds
@@ -419,8 +424,6 @@ void handler(int signum, siginfo_t *si, void *vucontext)
                br_count++;
                dprintf1("#BR 0x%jx (total seen: %d)\n", status, br_count);
 
-#define SEGV_BNDERR     3  /* failed address bound checks */
-
                dprintf2("Saw a #BR! status 0x%jx at %016lx br_reason: %jx\n",
                                status, ip, br_reason);
                dprintf2("si_signo: %d\n", si->si_signo);
index b3cb7670e02661cd2ab66fd3da98b3940dd44c70..254e5436bdd9926091dc7c53bdd37be8bdb19121 100644 (file)
@@ -26,30 +26,26 @@ static inline void sigsafe_printf(const char *format, ...)
 {
        va_list ap;
 
-       va_start(ap, format);
        if (!dprint_in_signal) {
+               va_start(ap, format);
                vprintf(format, ap);
+               va_end(ap);
        } else {
                int ret;
-               int len = vsnprintf(dprint_in_signal_buffer,
-                                   DPRINT_IN_SIGNAL_BUF_SIZE,
-                                   format, ap);
                /*
-                * len is amount that would have been printed,
-                * but actual write is truncated at BUF_SIZE.
+                * No printf() functions are signal-safe.
+                * They deadlock easily. Write the format
+                * string to get some output, even if
+                * incomplete.
                 */
-               if (len > DPRINT_IN_SIGNAL_BUF_SIZE)
-                       len = DPRINT_IN_SIGNAL_BUF_SIZE;
-               ret = write(1, dprint_in_signal_buffer, len);
+               ret = write(1, format, strlen(format));
                if (ret < 0)
-                       abort();
+                       exit(1);
        }
-       va_end(ap);
 }
 #define dprintf_level(level, args...) do {     \
        if (level <= DEBUG_LEVEL)               \
                sigsafe_printf(args);           \
-       fflush(NULL);                           \
 } while (0)
 #define dprintf0(args...) dprintf_level(0, args)
 #define dprintf1(args...) dprintf_level(1, args)
index f15aa5a76fe3457e96e438c15e7ad40d3c7fbce0..460b4bdf4c1edff9d5dfa0d451dbaa393d53b80c 100644 (file)
@@ -72,10 +72,9 @@ extern void abort_hooks(void);
                                test_nr, iteration_nr); \
                dprintf0("errno at assert: %d", errno); \
                abort_hooks();                  \
-               assert(condition);              \
+               exit(__LINE__);                 \
        }                                       \
 } while (0)
-#define raw_assert(cond) assert(cond)
 
 void cat_into_file(char *str, char *file)
 {
@@ -87,12 +86,17 @@ void cat_into_file(char *str, char *file)
         * these need to be raw because they are called under
         * pkey_assert()
         */
-       raw_assert(fd >= 0);
+       if (fd < 0) {
+               fprintf(stderr, "error opening '%s'\n", str);
+               perror("error: ");
+               exit(__LINE__);
+       }
+
        ret = write(fd, str, strlen(str));
        if (ret != strlen(str)) {
                perror("write to file failed");
                fprintf(stderr, "filename: '%s' str: '%s'\n", file, str);
-               raw_assert(0);
+               exit(__LINE__);
        }
        close(fd);
 }
@@ -191,26 +195,30 @@ void lots_o_noops_around_write(int *write_to_me)
 #ifdef __i386__
 
 #ifndef SYS_mprotect_key
-# define SYS_mprotect_key 380
+# define SYS_mprotect_key      380
 #endif
+
 #ifndef SYS_pkey_alloc
-# define SYS_pkey_alloc         381
-# define SYS_pkey_free  382
+# define SYS_pkey_alloc                381
+# define SYS_pkey_free         382
 #endif
-#define REG_IP_IDX REG_EIP
-#define si_pkey_offset 0x14
+
+#define REG_IP_IDX             REG_EIP
+#define si_pkey_offset         0x14
 
 #else
 
 #ifndef SYS_mprotect_key
-# define SYS_mprotect_key 329
+# define SYS_mprotect_key      329
 #endif
+
 #ifndef SYS_pkey_alloc
-# define SYS_pkey_alloc         330
-# define SYS_pkey_free  331
+# define SYS_pkey_alloc                330
+# define SYS_pkey_free         331
 #endif
-#define REG_IP_IDX REG_RIP
-#define si_pkey_offset 0x20
+
+#define REG_IP_IDX             REG_RIP
+#define si_pkey_offset         0x20
 
 #endif
 
@@ -225,8 +233,14 @@ void dump_mem(void *dumpme, int len_bytes)
        }
 }
 
-#define SEGV_BNDERR     3  /* failed address bound checks */
-#define SEGV_PKUERR     4
+/* Failed address bound checks: */
+#ifndef SEGV_BNDERR
+# define SEGV_BNDERR           3
+#endif
+
+#ifndef SEGV_PKUERR
+# define SEGV_PKUERR           4
+#endif
 
 static char *si_code_str(int si_code)
 {
@@ -289,13 +303,6 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
                dump_mem(pkru_ptr - 128, 256);
        pkey_assert(*pkru_ptr);
 
-       si_pkey_ptr = (u32 *)(((u8 *)si) + si_pkey_offset);
-       dprintf1("si_pkey_ptr: %p\n", si_pkey_ptr);
-       dump_mem(si_pkey_ptr - 8, 24);
-       siginfo_pkey = *si_pkey_ptr;
-       pkey_assert(siginfo_pkey < NR_PKEYS);
-       last_si_pkey = siginfo_pkey;
-
        if ((si->si_code == SEGV_MAPERR) ||
            (si->si_code == SEGV_ACCERR) ||
            (si->si_code == SEGV_BNDERR)) {
@@ -303,6 +310,13 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
                exit(4);
        }
 
+       si_pkey_ptr = (u32 *)(((u8 *)si) + si_pkey_offset);
+       dprintf1("si_pkey_ptr: %p\n", si_pkey_ptr);
+       dump_mem((u8 *)si_pkey_ptr - 8, 24);
+       siginfo_pkey = *si_pkey_ptr;
+       pkey_assert(siginfo_pkey < NR_PKEYS);
+       last_si_pkey = siginfo_pkey;
+
        dprintf1("signal pkru from xsave: %08x\n", *pkru_ptr);
        /* need __rdpkru() version so we do not do shadow_pkru checking */
        dprintf1("signal pkru from  pkru: %08x\n", __rdpkru());
@@ -311,22 +325,6 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
        dprintf1("WARNING: set PRKU=0 to allow faulting instruction to continue\n");
        pkru_faults++;
        dprintf1("<<<<==================================================\n");
-       return;
-       if (trapno == 14) {
-               fprintf(stderr,
-                       "ERROR: In signal handler, page fault, trapno = %d, ip = %016lx\n",
-                       trapno, ip);
-               fprintf(stderr, "si_addr %p\n", si->si_addr);
-               fprintf(stderr, "REG_ERR: %lx\n",
-                               (unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]);
-               exit(1);
-       } else {
-               fprintf(stderr, "unexpected trap %d! at 0x%lx\n", trapno, ip);
-               fprintf(stderr, "si_addr %p\n", si->si_addr);
-               fprintf(stderr, "REG_ERR: %lx\n",
-                               (unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]);
-               exit(2);
-       }
        dprint_in_signal = 0;
 }
 
@@ -393,10 +391,15 @@ pid_t fork_lazy_child(void)
        return forkret;
 }
 
-#define PKEY_DISABLE_ACCESS    0x1
-#define PKEY_DISABLE_WRITE     0x2
+#ifndef PKEY_DISABLE_ACCESS
+# define PKEY_DISABLE_ACCESS   0x1
+#endif
+
+#ifndef PKEY_DISABLE_WRITE
+# define PKEY_DISABLE_WRITE    0x2
+#endif
 
-u32 pkey_get(int pkey, unsigned long flags)
+static u32 hw_pkey_get(int pkey, unsigned long flags)
 {
        u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE);
        u32 pkru = __rdpkru();
@@ -418,7 +421,7 @@ u32 pkey_get(int pkey, unsigned long flags)
        return masked_pkru;
 }
 
-int pkey_set(int pkey, unsigned long rights, unsigned long flags)
+static int hw_pkey_set(int pkey, unsigned long rights, unsigned long flags)
 {
        u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE);
        u32 old_pkru = __rdpkru();
@@ -452,15 +455,15 @@ void pkey_disable_set(int pkey, int flags)
                pkey, flags);
        pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
 
-       pkey_rights = pkey_get(pkey, syscall_flags);
+       pkey_rights = hw_pkey_get(pkey, syscall_flags);
 
-       dprintf1("%s(%d) pkey_get(%d): %x\n", __func__,
+       dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
                        pkey, pkey, pkey_rights);
        pkey_assert(pkey_rights >= 0);
 
        pkey_rights |= flags;
 
-       ret = pkey_set(pkey, pkey_rights, syscall_flags);
+       ret = hw_pkey_set(pkey, pkey_rights, syscall_flags);
        assert(!ret);
        /*pkru and flags have the same format */
        shadow_pkru |= flags << (pkey * 2);
@@ -468,8 +471,8 @@ void pkey_disable_set(int pkey, int flags)
 
        pkey_assert(ret >= 0);
 
-       pkey_rights = pkey_get(pkey, syscall_flags);
-       dprintf1("%s(%d) pkey_get(%d): %x\n", __func__,
+       pkey_rights = hw_pkey_get(pkey, syscall_flags);
+       dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
                        pkey, pkey, pkey_rights);
 
        dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru());
@@ -483,24 +486,24 @@ void pkey_disable_clear(int pkey, int flags)
 {
        unsigned long syscall_flags = 0;
        int ret;
-       int pkey_rights = pkey_get(pkey, syscall_flags);
+       int pkey_rights = hw_pkey_get(pkey, syscall_flags);
        u32 orig_pkru = rdpkru();
 
        pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
 
-       dprintf1("%s(%d) pkey_get(%d): %x\n", __func__,
+       dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
                        pkey, pkey, pkey_rights);
        pkey_assert(pkey_rights >= 0);
 
        pkey_rights |= flags;
 
-       ret = pkey_set(pkey, pkey_rights, 0);
+       ret = hw_pkey_set(pkey, pkey_rights, 0);
        /* pkru and flags have the same format */
        shadow_pkru &= ~(flags << (pkey * 2));
        pkey_assert(ret >= 0);
 
-       pkey_rights = pkey_get(pkey, syscall_flags);
-       dprintf1("%s(%d) pkey_get(%d): %x\n", __func__,
+       pkey_rights = hw_pkey_get(pkey, syscall_flags);
+       dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
                        pkey, pkey, pkey_rights);
 
        dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru());
@@ -674,10 +677,12 @@ int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
 struct pkey_malloc_record {
        void *ptr;
        long size;
+       int prot;
 };
 struct pkey_malloc_record *pkey_malloc_records;
+struct pkey_malloc_record *pkey_last_malloc_record;
 long nr_pkey_malloc_records;
-void record_pkey_malloc(void *ptr, long size)
+void record_pkey_malloc(void *ptr, long size, int prot)
 {
        long i;
        struct pkey_malloc_record *rec = NULL;
@@ -709,6 +714,8 @@ void record_pkey_malloc(void *ptr, long size)
                (int)(rec - pkey_malloc_records), rec, ptr, size);
        rec->ptr = ptr;
        rec->size = size;
+       rec->prot = prot;
+       pkey_last_malloc_record = rec;
        nr_pkey_malloc_records++;
 }
 
@@ -753,7 +760,7 @@ void *malloc_pkey_with_mprotect(long size, int prot, u16 pkey)
        pkey_assert(ptr != (void *)-1);
        ret = mprotect_pkey((void *)ptr, PAGE_SIZE, prot, pkey);
        pkey_assert(!ret);
-       record_pkey_malloc(ptr, size);
+       record_pkey_malloc(ptr, size, prot);
        rdpkru();
 
        dprintf1("%s() for pkey %d @ %p\n", __func__, pkey, ptr);
@@ -774,7 +781,7 @@ void *malloc_pkey_anon_huge(long size, int prot, u16 pkey)
        size = ALIGN_UP(size, HPAGE_SIZE * 2);
        ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
        pkey_assert(ptr != (void *)-1);
-       record_pkey_malloc(ptr, size);
+       record_pkey_malloc(ptr, size, prot);
        mprotect_pkey(ptr, size, prot, pkey);
 
        dprintf1("unaligned ptr: %p\n", ptr);
@@ -847,7 +854,7 @@ void *malloc_pkey_hugetlb(long size, int prot, u16 pkey)
        pkey_assert(ptr != (void *)-1);
        mprotect_pkey(ptr, size, prot, pkey);
 
-       record_pkey_malloc(ptr, size);
+       record_pkey_malloc(ptr, size, prot);
 
        dprintf1("mmap()'d hugetlbfs for pkey %d @ %p\n", pkey, ptr);
        return ptr;
@@ -869,7 +876,7 @@ void *malloc_pkey_mmap_dax(long size, int prot, u16 pkey)
 
        mprotect_pkey(ptr, size, prot, pkey);
 
-       record_pkey_malloc(ptr, size);
+       record_pkey_malloc(ptr, size, prot);
 
        dprintf1("mmap()'d for pkey %d @ %p\n", pkey, ptr);
        close(fd);
@@ -918,13 +925,21 @@ void *malloc_pkey(long size, int prot, u16 pkey)
 }
 
 int last_pkru_faults;
+#define UNKNOWN_PKEY -2
 void expected_pk_fault(int pkey)
 {
        dprintf2("%s(): last_pkru_faults: %d pkru_faults: %d\n",
                        __func__, last_pkru_faults, pkru_faults);
        dprintf2("%s(%d): last_si_pkey: %d\n", __func__, pkey, last_si_pkey);
        pkey_assert(last_pkru_faults + 1 == pkru_faults);
-       pkey_assert(last_si_pkey == pkey);
+
+       /*
+       * For exec-only memory, we do not know the pkey in
+       * advance, so skip this check.
+       */
+       if (pkey != UNKNOWN_PKEY)
+               pkey_assert(last_si_pkey == pkey);
+
        /*
         * The signal handler shold have cleared out PKRU to let the
         * test program continue.  We now have to restore it.
@@ -939,10 +954,11 @@ void expected_pk_fault(int pkey)
        last_si_pkey = -1;
 }
 
-void do_not_expect_pk_fault(void)
-{
-       pkey_assert(last_pkru_faults == pkru_faults);
-}
+#define do_not_expect_pk_fault(msg)    do {                    \
+       if (last_pkru_faults != pkru_faults)                    \
+               dprintf0("unexpected PK fault: %s\n", msg);     \
+       pkey_assert(last_pkru_faults == pkru_faults);           \
+} while (0)
 
 int test_fds[10] = { -1 };
 int nr_test_fds;
@@ -1151,12 +1167,15 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey)
        pkey_assert(i < NR_PKEYS*2);
 
        /*
-        * There are 16 pkeys supported in hardware.  One is taken
-        * up for the default (0) and another can be taken up by
-        * an execute-only mapping.  Ensure that we can allocate
-        * at least 14 (16-2).
+        * There are 16 pkeys supported in hardware.  Three are
+        * allocated by the time we get here:
+        *   1. The default key (0)
+        *   2. One possibly consumed by an execute-only mapping.
+        *   3. One allocated by the test code and passed in via
+        *      'pkey' to this function.
+        * Ensure that we can allocate at least another 13 (16-3).
         */
-       pkey_assert(i >= NR_PKEYS-2);
+       pkey_assert(i >= NR_PKEYS-3);
 
        for (i = 0; i < nr_allocated_pkeys; i++) {
                err = sys_pkey_free(allocated_pkeys[i]);
@@ -1165,6 +1184,35 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey)
        }
 }
 
+/*
+ * pkey 0 is special.  It is allocated by default, so you do not
+ * have to call pkey_alloc() to use it first.  Make sure that it
+ * is usable.
+ */
+void test_mprotect_with_pkey_0(int *ptr, u16 pkey)
+{
+       long size;
+       int prot;
+
+       assert(pkey_last_malloc_record);
+       size = pkey_last_malloc_record->size;
+       /*
+        * This is a bit of a hack.  But mprotect() requires
+        * huge-page-aligned sizes when operating on hugetlbfs.
+        * So, make sure that we use something that's a multiple
+        * of a huge page when we can.
+        */
+       if (size >= HPAGE_SIZE)
+               size = HPAGE_SIZE;
+       prot = pkey_last_malloc_record->prot;
+
+       /* Use pkey 0 */
+       mprotect_pkey(ptr, size, prot, 0);
+
+       /* Make sure that we can set it back to the original pkey. */
+       mprotect_pkey(ptr, size, prot, pkey);
+}
+
 void test_ptrace_of_child(int *ptr, u16 pkey)
 {
        __attribute__((__unused__)) int peek_result;
@@ -1228,7 +1276,7 @@ void test_ptrace_of_child(int *ptr, u16 pkey)
        pkey_assert(ret != -1);
        /* Now access from the current task, and expect NO exception: */
        peek_result = read_ptr(plain_ptr);
-       do_not_expect_pk_fault();
+       do_not_expect_pk_fault("read plain pointer after ptrace");
 
        ret = ptrace(PTRACE_DETACH, child_pid, ignored, 0);
        pkey_assert(ret != -1);
@@ -1241,12 +1289,9 @@ void test_ptrace_of_child(int *ptr, u16 pkey)
        free(plain_ptr_unaligned);
 }
 
-void test_executing_on_unreadable_memory(int *ptr, u16 pkey)
+void *get_pointer_to_instructions(void)
 {
        void *p1;
-       int scratch;
-       int ptr_contents;
-       int ret;
 
        p1 = ALIGN_PTR_UP(&lots_o_noops_around_write, PAGE_SIZE);
        dprintf3("&lots_o_noops: %p\n", &lots_o_noops_around_write);
@@ -1256,7 +1301,23 @@ void test_executing_on_unreadable_memory(int *ptr, u16 pkey)
        /* Point 'p1' at the *second* page of the function: */
        p1 += PAGE_SIZE;
 
+       /*
+        * Try to ensure we fault this in on next touch to ensure
+        * we get an instruction fault as opposed to a data one
+        */
        madvise(p1, PAGE_SIZE, MADV_DONTNEED);
+
+       return p1;
+}
+
+void test_executing_on_unreadable_memory(int *ptr, u16 pkey)
+{
+       void *p1;
+       int scratch;
+       int ptr_contents;
+       int ret;
+
+       p1 = get_pointer_to_instructions();
        lots_o_noops_around_write(&scratch);
        ptr_contents = read_ptr(p1);
        dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
@@ -1272,12 +1333,55 @@ void test_executing_on_unreadable_memory(int *ptr, u16 pkey)
         */
        madvise(p1, PAGE_SIZE, MADV_DONTNEED);
        lots_o_noops_around_write(&scratch);
-       do_not_expect_pk_fault();
+       do_not_expect_pk_fault("executing on PROT_EXEC memory");
        ptr_contents = read_ptr(p1);
        dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
        expected_pk_fault(pkey);
 }
 
+void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey)
+{
+       void *p1;
+       int scratch;
+       int ptr_contents;
+       int ret;
+
+       dprintf1("%s() start\n", __func__);
+
+       p1 = get_pointer_to_instructions();
+       lots_o_noops_around_write(&scratch);
+       ptr_contents = read_ptr(p1);
+       dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
+
+       /* Use a *normal* mprotect(), not mprotect_pkey(): */
+       ret = mprotect(p1, PAGE_SIZE, PROT_EXEC);
+       pkey_assert(!ret);
+
+       dprintf2("pkru: %x\n", rdpkru());
+
+       /* Make sure this is an *instruction* fault */
+       madvise(p1, PAGE_SIZE, MADV_DONTNEED);
+       lots_o_noops_around_write(&scratch);
+       do_not_expect_pk_fault("executing on PROT_EXEC memory");
+       ptr_contents = read_ptr(p1);
+       dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
+       expected_pk_fault(UNKNOWN_PKEY);
+
+       /*
+        * Put the memory back to non-PROT_EXEC.  Should clear the
+        * exec-only pkey off the VMA and allow it to be readable
+        * again.  Go to PROT_NONE first to check for a kernel bug
+        * that did not clear the pkey when doing PROT_NONE.
+        */
+       ret = mprotect(p1, PAGE_SIZE, PROT_NONE);
+       pkey_assert(!ret);
+
+       ret = mprotect(p1, PAGE_SIZE, PROT_READ|PROT_EXEC);
+       pkey_assert(!ret);
+       ptr_contents = read_ptr(p1);
+       do_not_expect_pk_fault("plain read on recently PROT_EXEC area");
+}
+
 void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey)
 {
        int size = PAGE_SIZE;
@@ -1302,6 +1406,8 @@ void (*pkey_tests[])(int *ptr, u16 pkey) = {
        test_kernel_gup_of_access_disabled_region,
        test_kernel_gup_write_to_write_disabled_region,
        test_executing_on_unreadable_memory,
+       test_implicit_mprotect_exec_only_memory,
+       test_mprotect_with_pkey_0,
        test_ptrace_of_child,
        test_pkey_syscalls_on_non_allocated_pkey,
        test_pkey_syscalls_bad_args,
index 40370354d4c11e48f9404eccfbda1b5bf03cd682..c9c3281077bca413b9164547c9a3da9a2a061103 100644 (file)
@@ -100,12 +100,19 @@ asm (
        "       shl     $32, %r8\n"
        "       orq     $0x7f7f7f7f, %r8\n"
        "       movq    %r8, %r9\n"
-       "       movq    %r8, %r10\n"
-       "       movq    %r8, %r11\n"
-       "       movq    %r8, %r12\n"
-       "       movq    %r8, %r13\n"
-       "       movq    %r8, %r14\n"
-       "       movq    %r8, %r15\n"
+       "       incq    %r9\n"
+       "       movq    %r9, %r10\n"
+       "       incq    %r10\n"
+       "       movq    %r10, %r11\n"
+       "       incq    %r11\n"
+       "       movq    %r11, %r12\n"
+       "       incq    %r12\n"
+       "       movq    %r12, %r13\n"
+       "       incq    %r13\n"
+       "       movq    %r13, %r14\n"
+       "       incq    %r14\n"
+       "       movq    %r14, %r15\n"
+       "       incq    %r15\n"
        "       ret\n"
        "       .code32\n"
        "       .popsection\n"
@@ -128,12 +135,13 @@ int check_regs64(void)
        int err = 0;
        int num = 8;
        uint64_t *r64 = &regs64.r8;
+       uint64_t expected = 0x7f7f7f7f7f7f7f7fULL;
 
        if (!kernel_is_64bit)
                return 0;
 
        do {
-               if (*r64 == 0x7f7f7f7f7f7f7f7fULL)
+               if (*r64 == expected++)
                        continue; /* register did not change */
                if (syscall_addr != (long)&int80) {
                        /*
@@ -147,18 +155,17 @@ int check_regs64(void)
                                continue;
                        }
                } else {
-                       /* INT80 syscall entrypoint can be used by
+                       /*
+                        * INT80 syscall entrypoint can be used by
                         * 64-bit programs too, unlike SYSCALL/SYSENTER.
                         * Therefore it must preserve R12+
                         * (they are callee-saved registers in 64-bit C ABI).
                         *
-                        * This was probably historically not intended,
-                        * but R8..11 are clobbered (cleared to 0).
-                        * IOW: they are the only registers which aren't
-                        * preserved across INT80 syscall.
+                        * Starting in Linux 4.17 (and any kernel that
+                        * backports the change), R8..11 are preserved.
+                        * Historically (and probably unintentionally), they
+                        * were clobbered or zeroed.
                         */
-                       if (*r64 == 0 && num <= 11)
-                               continue;
                }
                printf("[FAIL]\tR%d has changed:%016llx\n", num, *r64);
                err++;
index dba629c5f8acd17eb7ef1743bf7fa0641094ed44..a4c1b76240df2d8a818fbef3ea73aac68f713fb2 100644 (file)
@@ -63,7 +63,7 @@ static DEFINE_PER_CPU(struct kvm_vcpu *, kvm_arm_running_vcpu);
 static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1);
 static u32 kvm_next_vmid;
 static unsigned int kvm_vmid_bits __read_mostly;
-static DEFINE_SPINLOCK(kvm_vmid_lock);
+static DEFINE_RWLOCK(kvm_vmid_lock);
 
 static bool vgic_present;
 
@@ -473,11 +473,16 @@ static void update_vttbr(struct kvm *kvm)
 {
        phys_addr_t pgd_phys;
        u64 vmid;
+       bool new_gen;
 
-       if (!need_new_vmid_gen(kvm))
+       read_lock(&kvm_vmid_lock);
+       new_gen = need_new_vmid_gen(kvm);
+       read_unlock(&kvm_vmid_lock);
+
+       if (!new_gen)
                return;
 
-       spin_lock(&kvm_vmid_lock);
+       write_lock(&kvm_vmid_lock);
 
        /*
         * We need to re-check the vmid_gen here to ensure that if another vcpu
@@ -485,7 +490,7 @@ static void update_vttbr(struct kvm *kvm)
         * use the same vmid.
         */
        if (!need_new_vmid_gen(kvm)) {
-               spin_unlock(&kvm_vmid_lock);
+               write_unlock(&kvm_vmid_lock);
                return;
        }
 
@@ -519,7 +524,7 @@ static void update_vttbr(struct kvm *kvm)
        vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK(kvm_vmid_bits);
        kvm->arch.vttbr = kvm_phys_to_vttbr(pgd_phys) | vmid;
 
-       spin_unlock(&kvm_vmid_lock);
+       write_unlock(&kvm_vmid_lock);
 }
 
 static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
index 6919352cbf15e8d50b3742e32acfd152be4b18a8..c4762bef13c6d389ff0c1e4f656322d66bb167b0 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/arm-smccc.h>
 #include <linux/preempt.h>
 #include <linux/kvm_host.h>
+#include <linux/uaccess.h>
 #include <linux/wait.h>
 
 #include <asm/cputype.h>
@@ -427,3 +428,62 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
        smccc_set_retval(vcpu, val, 0, 0, 0);
        return 1;
 }
+
+int kvm_arm_get_fw_num_regs(struct kvm_vcpu *vcpu)
+{
+       return 1;               /* PSCI version */
+}
+
+int kvm_arm_copy_fw_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
+{
+       if (put_user(KVM_REG_ARM_PSCI_VERSION, uindices))
+               return -EFAULT;
+
+       return 0;
+}
+
+int kvm_arm_get_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+       if (reg->id == KVM_REG_ARM_PSCI_VERSION) {
+               void __user *uaddr = (void __user *)(long)reg->addr;
+               u64 val;
+
+               val = kvm_psci_version(vcpu, vcpu->kvm);
+               if (copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)))
+                       return -EFAULT;
+
+               return 0;
+       }
+
+       return -EINVAL;
+}
+
+int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+       if (reg->id == KVM_REG_ARM_PSCI_VERSION) {
+               void __user *uaddr = (void __user *)(long)reg->addr;
+               bool wants_02;
+               u64 val;
+
+               if (copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id)))
+                       return -EFAULT;
+
+               wants_02 = test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features);
+
+               switch (val) {
+               case KVM_ARM_PSCI_0_1:
+                       if (wants_02)
+                               return -EINVAL;
+                       vcpu->kvm->arch.psci_version = val;
+                       return 0;
+               case KVM_ARM_PSCI_0_2:
+               case KVM_ARM_PSCI_1_0:
+                       if (!wants_02)
+                               return -EINVAL;
+                       vcpu->kvm->arch.psci_version = val;
+                       return 0;
+               }
+       }
+
+       return -EINVAL;
+}
index 10b38178cff207a9ae1e8d59970e3f19fc7be7f6..4ffc0b5e610560c752c99de33d6f5857e7344773 100644 (file)
@@ -211,6 +211,7 @@ static int vgic_debug_show(struct seq_file *s, void *v)
        struct vgic_state_iter *iter = (struct vgic_state_iter *)v;
        struct vgic_irq *irq;
        struct kvm_vcpu *vcpu = NULL;
+       unsigned long flags;
 
        if (iter->dist_id == 0) {
                print_dist_state(s, &kvm->arch.vgic);
@@ -227,9 +228,9 @@ static int vgic_debug_show(struct seq_file *s, void *v)
                irq = &kvm->arch.vgic.spis[iter->intid - VGIC_NR_PRIVATE_IRQS];
        }
 
-       spin_lock(&irq->irq_lock);
+       spin_lock_irqsave(&irq->irq_lock, flags);
        print_irq_state(s, irq, vcpu);
-       spin_unlock(&irq->irq_lock);
+       spin_unlock_irqrestore(&irq->irq_lock, flags);
 
        return 0;
 }
index 68378fe17a0e7e5b0be896863b94913bceb901c9..e07156c303235e13dc00b31d39e38ff1d30d281d 100644 (file)
@@ -423,7 +423,7 @@ static irqreturn_t vgic_maintenance_handler(int irq, void *data)
         * We cannot rely on the vgic maintenance interrupt to be
         * delivered synchronously. This means we can only use it to
         * exit the VM, and we perform the handling of EOIed
-        * interrupts on the exit path (see vgic_process_maintenance).
+        * interrupts on the exit path (see vgic_fold_lr_state).
         */
        return IRQ_HANDLED;
 }
index a8f07243aa9f0533dd2a8fe57c7589c849142e53..4ed79c939fb447188d0a2bc7c740e85e1d82135c 100644 (file)
@@ -52,6 +52,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid,
 {
        struct vgic_dist *dist = &kvm->arch.vgic;
        struct vgic_irq *irq = vgic_get_irq(kvm, NULL, intid), *oldirq;
+       unsigned long flags;
        int ret;
 
        /* In this case there is no put, since we keep the reference. */
@@ -71,7 +72,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid,
        irq->intid = intid;
        irq->target_vcpu = vcpu;
 
-       spin_lock(&dist->lpi_list_lock);
+       spin_lock_irqsave(&dist->lpi_list_lock, flags);
 
        /*
         * There could be a race with another vgic_add_lpi(), so we need to
@@ -99,7 +100,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid,
        dist->lpi_list_count++;
 
 out_unlock:
-       spin_unlock(&dist->lpi_list_lock);
+       spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
 
        /*
         * We "cache" the configuration table entries in our struct vgic_irq's.
@@ -280,8 +281,8 @@ static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq,
        int ret;
        unsigned long flags;
 
-       ret = kvm_read_guest(kvm, propbase + irq->intid - GIC_LPI_OFFSET,
-                            &prop, 1);
+       ret = kvm_read_guest_lock(kvm, propbase + irq->intid - GIC_LPI_OFFSET,
+                                 &prop, 1);
 
        if (ret)
                return ret;
@@ -315,6 +316,7 @@ static int vgic_copy_lpi_list(struct kvm_vcpu *vcpu, u32 **intid_ptr)
 {
        struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
        struct vgic_irq *irq;
+       unsigned long flags;
        u32 *intids;
        int irq_count, i = 0;
 
@@ -330,7 +332,7 @@ static int vgic_copy_lpi_list(struct kvm_vcpu *vcpu, u32 **intid_ptr)
        if (!intids)
                return -ENOMEM;
 
-       spin_lock(&dist->lpi_list_lock);
+       spin_lock_irqsave(&dist->lpi_list_lock, flags);
        list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) {
                if (i == irq_count)
                        break;
@@ -339,7 +341,7 @@ static int vgic_copy_lpi_list(struct kvm_vcpu *vcpu, u32 **intid_ptr)
                        continue;
                intids[i++] = irq->intid;
        }
-       spin_unlock(&dist->lpi_list_lock);
+       spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
 
        *intid_ptr = intids;
        return i;
@@ -348,10 +350,11 @@ static int vgic_copy_lpi_list(struct kvm_vcpu *vcpu, u32 **intid_ptr)
 static int update_affinity(struct vgic_irq *irq, struct kvm_vcpu *vcpu)
 {
        int ret = 0;
+       unsigned long flags;
 
-       spin_lock(&irq->irq_lock);
+       spin_lock_irqsave(&irq->irq_lock, flags);
        irq->target_vcpu = vcpu;
-       spin_unlock(&irq->irq_lock);
+       spin_unlock_irqrestore(&irq->irq_lock, flags);
 
        if (irq->hw) {
                struct its_vlpi_map map;
@@ -441,8 +444,9 @@ static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu)
                 * this very same byte in the last iteration. Reuse that.
                 */
                if (byte_offset != last_byte_offset) {
-                       ret = kvm_read_guest(vcpu->kvm, pendbase + byte_offset,
-                                            &pendmask, 1);
+                       ret = kvm_read_guest_lock(vcpu->kvm,
+                                                 pendbase + byte_offset,
+                                                 &pendmask, 1);
                        if (ret) {
                                kfree(intids);
                                return ret;
@@ -786,7 +790,7 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id,
                return false;
 
        /* Each 1st level entry is represented by a 64-bit value. */
-       if (kvm_read_guest(its->dev->kvm,
+       if (kvm_read_guest_lock(its->dev->kvm,
                           BASER_ADDRESS(baser) + index * sizeof(indirect_ptr),
                           &indirect_ptr, sizeof(indirect_ptr)))
                return false;
@@ -1367,8 +1371,8 @@ static void vgic_its_process_commands(struct kvm *kvm, struct vgic_its *its)
        cbaser = CBASER_ADDRESS(its->cbaser);
 
        while (its->cwriter != its->creadr) {
-               int ret = kvm_read_guest(kvm, cbaser + its->creadr,
-                                        cmd_buf, ITS_CMD_SIZE);
+               int ret = kvm_read_guest_lock(kvm, cbaser + its->creadr,
+                                             cmd_buf, ITS_CMD_SIZE);
                /*
                 * If kvm_read_guest() fails, this could be due to the guest
                 * programming a bogus value in CBASER or something else going
@@ -1893,7 +1897,7 @@ static int scan_its_table(struct vgic_its *its, gpa_t base, int size, int esz,
                int next_offset;
                size_t byte_offset;
 
-               ret = kvm_read_guest(kvm, gpa, entry, esz);
+               ret = kvm_read_guest_lock(kvm, gpa, entry, esz);
                if (ret)
                        return ret;
 
@@ -2263,7 +2267,7 @@ static int vgic_its_restore_cte(struct vgic_its *its, gpa_t gpa, int esz)
        int ret;
 
        BUG_ON(esz > sizeof(val));
-       ret = kvm_read_guest(kvm, gpa, &val, esz);
+       ret = kvm_read_guest_lock(kvm, gpa, &val, esz);
        if (ret)
                return ret;
        val = le64_to_cpu(val);
index e21e2f49b005256543fa912b8b33df8c134ff726..ffc587bf4742676d14930d21f07005275576a79e 100644 (file)
@@ -14,6 +14,8 @@
 #include <linux/irqchip/arm-gic.h>
 #include <linux/kvm.h>
 #include <linux/kvm_host.h>
+#include <linux/nospec.h>
+
 #include <kvm/iodev.h>
 #include <kvm/arm_vgic.h>
 
@@ -324,6 +326,9 @@ static unsigned long vgic_mmio_read_apr(struct kvm_vcpu *vcpu,
 
                if (n > vgic_v3_max_apr_idx(vcpu))
                        return 0;
+
+               n = array_index_nospec(n, 4);
+
                /* GICv3 only uses ICH_AP1Rn for memory mapped (GICv2) guests */
                return vgicv3->vgic_ap1r[n];
        }
index dbe99d635c80435ffd938999c4246ce6f45307c7..ff9655cfeb2f8678558163790de2aeb6ec5841df 100644 (file)
@@ -289,10 +289,16 @@ static void vgic_mmio_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
               irq->vcpu->cpu != -1) /* VCPU thread is running */
                cond_resched_lock(&irq->irq_lock);
 
-       if (irq->hw)
+       if (irq->hw) {
                vgic_hw_irq_change_active(vcpu, irq, active, !requester_vcpu);
-       else
+       } else {
+               u32 model = vcpu->kvm->arch.vgic.vgic_model;
+
                irq->active = active;
+               if (model == KVM_DEV_TYPE_ARM_VGIC_V2 &&
+                   active && vgic_irq_is_sgi(irq->intid))
+                       irq->active_source = requester_vcpu->vcpu_id;
+       }
 
        if (irq->active)
                vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
index 45aa433f018ffdebfd05cd7d2720ed996b57dc4d..a5f2e44f1c33d42ad2693d63b8142665864e64a6 100644 (file)
@@ -37,13 +37,6 @@ void vgic_v2_init_lrs(void)
                vgic_v2_write_lr(i, 0);
 }
 
-void vgic_v2_set_npie(struct kvm_vcpu *vcpu)
-{
-       struct vgic_v2_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v2;
-
-       cpuif->vgic_hcr |= GICH_HCR_NPIE;
-}
-
 void vgic_v2_set_underflow(struct kvm_vcpu *vcpu)
 {
        struct vgic_v2_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v2;
@@ -71,13 +64,18 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
        int lr;
        unsigned long flags;
 
-       cpuif->vgic_hcr &= ~(GICH_HCR_UIE | GICH_HCR_NPIE);
+       cpuif->vgic_hcr &= ~GICH_HCR_UIE;
 
        for (lr = 0; lr < vgic_cpu->used_lrs; lr++) {
                u32 val = cpuif->vgic_lr[lr];
-               u32 intid = val & GICH_LR_VIRTUALID;
+               u32 cpuid, intid = val & GICH_LR_VIRTUALID;
                struct vgic_irq *irq;
 
+               /* Extract the source vCPU id from the LR */
+               cpuid = val & GICH_LR_PHYSID_CPUID;
+               cpuid >>= GICH_LR_PHYSID_CPUID_SHIFT;
+               cpuid &= 7;
+
                /* Notify fds when the guest EOI'ed a level-triggered SPI */
                if (lr_signals_eoi_mi(val) && vgic_valid_spi(vcpu->kvm, intid))
                        kvm_notify_acked_irq(vcpu->kvm, 0,
@@ -90,17 +88,16 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
                /* Always preserve the active bit */
                irq->active = !!(val & GICH_LR_ACTIVE_BIT);
 
+               if (irq->active && vgic_irq_is_sgi(intid))
+                       irq->active_source = cpuid;
+
                /* Edge is the only case where we preserve the pending bit */
                if (irq->config == VGIC_CONFIG_EDGE &&
                    (val & GICH_LR_PENDING_BIT)) {
                        irq->pending_latch = true;
 
-                       if (vgic_irq_is_sgi(intid)) {
-                               u32 cpuid = val & GICH_LR_PHYSID_CPUID;
-
-                               cpuid >>= GICH_LR_PHYSID_CPUID_SHIFT;
+                       if (vgic_irq_is_sgi(intid))
                                irq->source |= (1 << cpuid);
-                       }
                }
 
                /*
@@ -152,8 +149,15 @@ void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
        u32 val = irq->intid;
        bool allow_pending = true;
 
-       if (irq->active)
+       if (irq->active) {
                val |= GICH_LR_ACTIVE_BIT;
+               if (vgic_irq_is_sgi(irq->intid))
+                       val |= irq->active_source << GICH_LR_PHYSID_CPUID_SHIFT;
+               if (vgic_irq_is_multi_sgi(irq)) {
+                       allow_pending = false;
+                       val |= GICH_LR_EOI;
+               }
+       }
 
        if (irq->hw) {
                val |= GICH_LR_HW;
@@ -190,8 +194,10 @@ void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
                        BUG_ON(!src);
                        val |= (src - 1) << GICH_LR_PHYSID_CPUID_SHIFT;
                        irq->source &= ~(1 << (src - 1));
-                       if (irq->source)
+                       if (irq->source) {
                                irq->pending_latch = true;
+                               val |= GICH_LR_EOI;
+                       }
                }
        }
 
index 8195f52ae6f0906c31432ca0ac4471d4bdd3c74a..bdcf8e7a6161298d373a7605dc5fe6be43fa872e 100644 (file)
@@ -27,13 +27,6 @@ static bool group1_trap;
 static bool common_trap;
 static bool gicv4_enable;
 
-void vgic_v3_set_npie(struct kvm_vcpu *vcpu)
-{
-       struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3;
-
-       cpuif->vgic_hcr |= ICH_HCR_NPIE;
-}
-
 void vgic_v3_set_underflow(struct kvm_vcpu *vcpu)
 {
        struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3;
@@ -55,17 +48,23 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
        int lr;
        unsigned long flags;
 
-       cpuif->vgic_hcr &= ~(ICH_HCR_UIE | ICH_HCR_NPIE);
+       cpuif->vgic_hcr &= ~ICH_HCR_UIE;
 
        for (lr = 0; lr < vgic_cpu->used_lrs; lr++) {
                u64 val = cpuif->vgic_lr[lr];
-               u32 intid;
+               u32 intid, cpuid;
                struct vgic_irq *irq;
+               bool is_v2_sgi = false;
 
-               if (model == KVM_DEV_TYPE_ARM_VGIC_V3)
+               cpuid = val & GICH_LR_PHYSID_CPUID;
+               cpuid >>= GICH_LR_PHYSID_CPUID_SHIFT;
+
+               if (model == KVM_DEV_TYPE_ARM_VGIC_V3) {
                        intid = val & ICH_LR_VIRTUAL_ID_MASK;
-               else
+               } else {
                        intid = val & GICH_LR_VIRTUALID;
+                       is_v2_sgi = vgic_irq_is_sgi(intid);
+               }
 
                /* Notify fds when the guest EOI'ed a level-triggered IRQ */
                if (lr_signals_eoi_mi(val) && vgic_valid_spi(vcpu->kvm, intid))
@@ -81,18 +80,16 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
                /* Always preserve the active bit */
                irq->active = !!(val & ICH_LR_ACTIVE_BIT);
 
+               if (irq->active && is_v2_sgi)
+                       irq->active_source = cpuid;
+
                /* Edge is the only case where we preserve the pending bit */
                if (irq->config == VGIC_CONFIG_EDGE &&
                    (val & ICH_LR_PENDING_BIT)) {
                        irq->pending_latch = true;
 
-                       if (vgic_irq_is_sgi(intid) &&
-                           model == KVM_DEV_TYPE_ARM_VGIC_V2) {
-                               u32 cpuid = val & GICH_LR_PHYSID_CPUID;
-
-                               cpuid >>= GICH_LR_PHYSID_CPUID_SHIFT;
+                       if (is_v2_sgi)
                                irq->source |= (1 << cpuid);
-                       }
                }
 
                /*
@@ -133,10 +130,20 @@ void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
 {
        u32 model = vcpu->kvm->arch.vgic.vgic_model;
        u64 val = irq->intid;
-       bool allow_pending = true;
+       bool allow_pending = true, is_v2_sgi;
 
-       if (irq->active)
+       is_v2_sgi = (vgic_irq_is_sgi(irq->intid) &&
+                    model == KVM_DEV_TYPE_ARM_VGIC_V2);
+
+       if (irq->active) {
                val |= ICH_LR_ACTIVE_BIT;
+               if (is_v2_sgi)
+                       val |= irq->active_source << GICH_LR_PHYSID_CPUID_SHIFT;
+               if (vgic_irq_is_multi_sgi(irq)) {
+                       allow_pending = false;
+                       val |= ICH_LR_EOI;
+               }
+       }
 
        if (irq->hw) {
                val |= ICH_LR_HW;
@@ -174,8 +181,10 @@ void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
                        BUG_ON(!src);
                        val |= (src - 1) << GICH_LR_PHYSID_CPUID_SHIFT;
                        irq->source &= ~(1 << (src - 1));
-                       if (irq->source)
+                       if (irq->source) {
                                irq->pending_latch = true;
+                               val |= ICH_LR_EOI;
+                       }
                }
        }
 
@@ -335,7 +344,7 @@ int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, struct vgic_irq *irq)
        bit_nr = irq->intid % BITS_PER_BYTE;
        ptr = pendbase + byte_offset;
 
-       ret = kvm_read_guest(kvm, ptr, &val, 1);
+       ret = kvm_read_guest_lock(kvm, ptr, &val, 1);
        if (ret)
                return ret;
 
@@ -388,7 +397,7 @@ int vgic_v3_save_pending_tables(struct kvm *kvm)
                ptr = pendbase + byte_offset;
 
                if (byte_offset != last_byte_offset) {
-                       ret = kvm_read_guest(kvm, ptr, &val, 1);
+                       ret = kvm_read_guest_lock(kvm, ptr, &val, 1);
                        if (ret)
                                return ret;
                        last_byte_offset = byte_offset;
index e74baec7636130c6e379c93c7312a0be70f97a59..33c8325c8f35662c03c37319605d28e2f2f9d7a4 100644 (file)
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <linux/interrupt.h>
+#include <linux/irq.h>
 #include <linux/kvm.h>
 #include <linux/kvm_host.h>
 #include <linux/list_sort.h>
-#include <linux/interrupt.h>
-#include <linux/irq.h>
+#include <linux/nospec.h>
+
 #include <asm/kvm_hyp.h>
 
 #include "vgic.h"
@@ -41,9 +43,13 @@ struct vgic_global kvm_vgic_global_state __ro_after_init = {
  * kvm->lock (mutex)
  *   its->cmd_lock (mutex)
  *     its->its_lock (mutex)
- *       vgic_cpu->ap_list_lock
- *         kvm->lpi_list_lock
- *           vgic_irq->irq_lock
+ *       vgic_cpu->ap_list_lock                must be taken with IRQs disabled
+ *         kvm->lpi_list_lock          must be taken with IRQs disabled
+ *           vgic_irq->irq_lock                must be taken with IRQs disabled
+ *
+ * As the ap_list_lock might be taken from the timer interrupt handler,
+ * we have to disable IRQs before taking this lock and everything lower
+ * than it.
  *
  * If you need to take multiple locks, always take the upper lock first,
  * then the lower ones, e.g. first take the its_lock, then the irq_lock.
@@ -70,8 +76,9 @@ static struct vgic_irq *vgic_get_lpi(struct kvm *kvm, u32 intid)
 {
        struct vgic_dist *dist = &kvm->arch.vgic;
        struct vgic_irq *irq = NULL;
+       unsigned long flags;
 
-       spin_lock(&dist->lpi_list_lock);
+       spin_lock_irqsave(&dist->lpi_list_lock, flags);
 
        list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) {
                if (irq->intid != intid)
@@ -87,7 +94,7 @@ static struct vgic_irq *vgic_get_lpi(struct kvm *kvm, u32 intid)
        irq = NULL;
 
 out_unlock:
-       spin_unlock(&dist->lpi_list_lock);
+       spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
 
        return irq;
 }
@@ -101,12 +108,16 @@ struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
                              u32 intid)
 {
        /* SGIs and PPIs */
-       if (intid <= VGIC_MAX_PRIVATE)
+       if (intid <= VGIC_MAX_PRIVATE) {
+               intid = array_index_nospec(intid, VGIC_MAX_PRIVATE);
                return &vcpu->arch.vgic_cpu.private_irqs[intid];
+       }
 
        /* SPIs */
-       if (intid <= VGIC_MAX_SPI)
+       if (intid <= VGIC_MAX_SPI) {
+               intid = array_index_nospec(intid, VGIC_MAX_SPI);
                return &kvm->arch.vgic.spis[intid - VGIC_NR_PRIVATE_IRQS];
+       }
 
        /* LPIs */
        if (intid >= VGIC_MIN_LPI)
@@ -128,19 +139,20 @@ static void vgic_irq_release(struct kref *ref)
 void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq)
 {
        struct vgic_dist *dist = &kvm->arch.vgic;
+       unsigned long flags;
 
        if (irq->intid < VGIC_MIN_LPI)
                return;
 
-       spin_lock(&dist->lpi_list_lock);
+       spin_lock_irqsave(&dist->lpi_list_lock, flags);
        if (!kref_put(&irq->refcount, vgic_irq_release)) {
-               spin_unlock(&dist->lpi_list_lock);
+               spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
                return;
        };
 
        list_del(&irq->lpi_list);
        dist->lpi_list_count--;
-       spin_unlock(&dist->lpi_list_lock);
+       spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
 
        kfree(irq);
 }
@@ -594,6 +606,7 @@ static void vgic_prune_ap_list(struct kvm_vcpu *vcpu)
 
        list_for_each_entry_safe(irq, tmp, &vgic_cpu->ap_list_head, ap_list) {
                struct kvm_vcpu *target_vcpu, *vcpuA, *vcpuB;
+               bool target_vcpu_needs_kick = false;
 
                spin_lock(&irq->irq_lock);
 
@@ -664,11 +677,18 @@ static void vgic_prune_ap_list(struct kvm_vcpu *vcpu)
                        list_del(&irq->ap_list);
                        irq->vcpu = target_vcpu;
                        list_add_tail(&irq->ap_list, &new_cpu->ap_list_head);
+                       target_vcpu_needs_kick = true;
                }
 
                spin_unlock(&irq->irq_lock);
                spin_unlock(&vcpuB->arch.vgic_cpu.ap_list_lock);
                spin_unlock_irqrestore(&vcpuA->arch.vgic_cpu.ap_list_lock, flags);
+
+               if (target_vcpu_needs_kick) {
+                       kvm_make_request(KVM_REQ_IRQ_PENDING, target_vcpu);
+                       kvm_vcpu_kick(target_vcpu);
+               }
+
                goto retry;
        }
 
@@ -711,14 +731,6 @@ static inline void vgic_set_underflow(struct kvm_vcpu *vcpu)
                vgic_v3_set_underflow(vcpu);
 }
 
-static inline void vgic_set_npie(struct kvm_vcpu *vcpu)
-{
-       if (kvm_vgic_global_state.type == VGIC_V2)
-               vgic_v2_set_npie(vcpu);
-       else
-               vgic_v3_set_npie(vcpu);
-}
-
 /* Requires the ap_list_lock to be held. */
 static int compute_ap_list_depth(struct kvm_vcpu *vcpu,
                                 bool *multi_sgi)
@@ -732,17 +744,15 @@ static int compute_ap_list_depth(struct kvm_vcpu *vcpu,
        DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&vgic_cpu->ap_list_lock));
 
        list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
+               int w;
+
                spin_lock(&irq->irq_lock);
                /* GICv2 SGIs can count for more than one... */
-               if (vgic_irq_is_sgi(irq->intid) && irq->source) {
-                       int w = hweight8(irq->source);
-
-                       count += w;
-                       *multi_sgi |= (w > 1);
-               } else {
-                       count++;
-               }
+               w = vgic_irq_get_lr_count(irq);
                spin_unlock(&irq->irq_lock);
+
+               count += w;
+               *multi_sgi |= (w > 1);
        }
        return count;
 }
@@ -753,7 +763,6 @@ static void vgic_flush_lr_state(struct kvm_vcpu *vcpu)
        struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
        struct vgic_irq *irq;
        int count;
-       bool npie = false;
        bool multi_sgi;
        u8 prio = 0xff;
 
@@ -783,10 +792,8 @@ static void vgic_flush_lr_state(struct kvm_vcpu *vcpu)
                if (likely(vgic_target_oracle(irq) == vcpu)) {
                        vgic_populate_lr(vcpu, irq, count++);
 
-                       if (irq->source) {
-                               npie = true;
+                       if (irq->source)
                                prio = irq->priority;
-                       }
                }
 
                spin_unlock(&irq->irq_lock);
@@ -799,9 +806,6 @@ static void vgic_flush_lr_state(struct kvm_vcpu *vcpu)
                }
        }
 
-       if (npie)
-               vgic_set_npie(vcpu);
-
        vcpu->arch.vgic_cpu.used_lrs = count;
 
        /* Nuke remaining LRs */
index 830e815748a05bae8d31de8db1b2fd6166392c08..32c25d42c93f401390f5d00fca80e637c8670151 100644 (file)
@@ -110,6 +110,20 @@ static inline bool vgic_irq_is_mapped_level(struct vgic_irq *irq)
        return irq->config == VGIC_CONFIG_LEVEL && irq->hw;
 }
 
+static inline int vgic_irq_get_lr_count(struct vgic_irq *irq)
+{
+       /* Account for the active state as an interrupt */
+       if (vgic_irq_is_sgi(irq->intid) && irq->source)
+               return hweight8(irq->source) + irq->active;
+
+       return irq_is_pending(irq) || irq->active;
+}
+
+static inline bool vgic_irq_is_multi_sgi(struct vgic_irq *irq)
+{
+       return vgic_irq_get_lr_count(irq) > 1;
+}
+
 /*
  * This struct provides an intermediate representation of the fields contained
  * in the GICH_VMCR and ICH_VMCR registers, such that code exporting the GIC